From 7ddfe32e30950425cfec99b4edc765275f4f7bf1 Mon Sep 17 00:00:00 2001 From: tastytea Date: Sun, 30 May 2021 21:52:52 +0200 Subject: [PATCH] Move is_whitespace() and urldecode() to helpers. --- src/helpers.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/helpers.hpp | 16 ++++++++++++++++ src/search.cpp | 20 +++++++------------- src/search.hpp | 3 --- src/zip.cpp | 21 ++------------------- src/zip.hpp | 3 --- 6 files changed, 63 insertions(+), 38 deletions(-) create mode 100644 src/helpers.cpp create mode 100644 src/helpers.hpp diff --git a/src/helpers.cpp b/src/helpers.cpp new file mode 100644 index 0000000..cc11cd6 --- /dev/null +++ b/src/helpers.cpp @@ -0,0 +1,38 @@ +#include "helpers.hpp" + +#include + +#include +#include +#include +#include + +namespace epubgrep::helpers +{ + +bool is_whitespace(const char check) +{ + const std::array whitespace{' ', '\n', '\r', '\t'}; + return std::any_of(whitespace.begin(), whitespace.end(), + [&check](const char ws) { return check == ws; }); +} + +std::string urldecode(const std::string_view url) +{ // RFC 3986, section 2.1. + size_t pos{0}; + size_t lastpos{0}; + std::string decoded; + while ((pos = url.find('%', pos)) != std::string_view::npos) + { + decoded += url.substr(lastpos, pos - lastpos); + decoded += static_cast( + std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16)); + pos += 3; + lastpos = pos; + } + decoded += url.substr(lastpos); + + return decoded; +} + +} // namespace epubgrep::helpers diff --git a/src/helpers.hpp b/src/helpers.hpp new file mode 100644 index 0000000..577584a --- /dev/null +++ b/src/helpers.hpp @@ -0,0 +1,16 @@ +#ifndef EPUBGREP_HELPERS_HPP +#define EPUBGREP_HELPERS_HPP + +#include +namespace epubgrep::helpers +{ + +//! Return true if check is whitespace. +[[nodiscard]] bool is_whitespace(char check); + +//! Decode percent-encoded text. Used for restricted characters in URLs. +[[nodiscard]] std::string urldecode(std::string_view url); + +} // namespace epubgrep::helpers + +#endif // EPUBGREP_HELPERS_HPP diff --git a/src/search.cpp b/src/search.cpp index 41e9329..57aa147 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -17,6 +17,7 @@ #include "search.hpp" #include "fs-compat.hpp" +#include "helpers.hpp" #include "zip.hpp" #include @@ -204,7 +205,7 @@ match_context context(const boost::match_results &match, whitespace.end()); if (pos_before != rend_before) { - while (is_whitespace(*pos_before)) + while (helpers::is_whitespace(*pos_before)) { ++pos_before; } @@ -218,7 +219,7 @@ match_context context(const boost::match_results &match, whitespace.end()); if (pos_after != end_after) { - while (is_whitespace(*pos_after)) + while (helpers::is_whitespace(*pos_after)) { ++pos_after; } @@ -230,11 +231,11 @@ match_context context(const boost::match_results &match, const std::string before_reversed(rbegin_before, pos_before); string before(before_reversed.rbegin(), before_reversed.rend()); std::string after(begin_after, pos_after); - while (is_whitespace(*before.begin())) + while (helpers::is_whitespace(*before.begin())) { before.erase(0, 1); } - while (is_whitespace(*after.rbegin())) + while (helpers::is_whitespace(*after.rbegin())) { after.erase(after.size() - 1); } @@ -250,11 +251,11 @@ string headline(const std::string_view prefix) pos += 3; string result{prefix.substr(pos, prefix.find('<', pos) - pos)}; - while (is_whitespace(*result.begin())) + while (helpers::is_whitespace(*result.begin())) { result.erase(0, 1); } - while (is_whitespace(*result.rbegin())) + while (helpers::is_whitespace(*result.rbegin())) { result.erase(result.size() - 1); } @@ -277,11 +278,4 @@ string page(const std::string_view prefix) return {}; } -bool is_whitespace(const char check) -{ - const std::array whitespace{' ', '\n', '\r', '\t'}; - return std::any_of(whitespace.begin(), whitespace.end(), - [&check](const char ws) { return check == ws; }); -} - } // namespace epubgrep::search diff --git a/src/search.hpp b/src/search.hpp index 5abd1e3..0981729 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -71,9 +71,6 @@ context(const boost::match_results &match, //! Return current page if possible. [[nodiscard]] std::string page(std::string_view prefix); -//! Return true if check is whitespace. -[[nodiscard]] bool is_whitespace(char check); - } // namespace epubgrep::search #endif // EPUBGREP_SEARCH_HPP diff --git a/src/zip.cpp b/src/zip.cpp index 8cc1d57..c2e5594 100644 --- a/src/zip.cpp +++ b/src/zip.cpp @@ -17,6 +17,7 @@ #include "zip.hpp" #include "fs-compat.hpp" +#include "helpers.hpp" #include #include @@ -210,7 +211,7 @@ std::vector list_spine(const fs::path &filepath) const auto &idref{itemref.attribute("idref").value()}; const auto &item{manifest.find_child_by_attribute("id", idref)}; const std::string href{ - urldecode(item.attribute("href").value())}; + helpers::urldecode(item.attribute("href").value())}; if (href[0] != '/') { spine_filepaths.emplace_back( @@ -241,22 +242,4 @@ std::vector list_spine(const fs::path &filepath) return spine_filepaths; } -std::string urldecode(const std::string_view url) -{ // RFC 3986, section 2.1. - size_t pos{0}; - size_t lastpos{0}; - std::string decoded; - while ((pos = url.find('%', pos)) != std::string_view::npos) - { - decoded += url.substr(lastpos, pos - lastpos); - decoded += static_cast( - std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16)); - pos += 3; - lastpos = pos; - } - decoded += url.substr(lastpos); - - return decoded; -} - } // namespace epubgrep::zip diff --git a/src/zip.hpp b/src/zip.hpp index c0de8f1..05f04b0 100644 --- a/src/zip.hpp +++ b/src/zip.hpp @@ -46,9 +46,6 @@ void close_file(struct archive *zipfile, const fs::path &filepath); //! Returns the files in the EPUB “spine” (all pages that are actually text). [[nodiscard]] std::vector list_spine(const fs::path &filepath); -//! Decode percent-encoding. Used for restricted characters in URLs. -[[nodiscard]] std::string urldecode(std::string_view url); - //! It's std::runtime_error, but with another name. class exception : public std::runtime_error {