Move is_whitespace() and urldecode() to helpers.

2021-05-30 21:52:52 +02:00 · 2021-05-30 21:52:52 +02:00 · 7ddfe32e30
parent af9563e669
commit 7ddfe32e30
6 changed files with 63 additions and 38 deletions
--- a/src/helpers.cpp
+++ b/src/helpers.cpp
@ -0,0 +1,38 @@
+#include "helpers.hpp"
+
+#include <boost/regex.hpp>
+
+#include <codecvt>
+#include <locale>
+#include <map>
+#include <string_view>
+
+namespace epubgrep::helpers
+{
+
+bool is_whitespace(const char check)
+{
+    const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
+    return std::any_of(whitespace.begin(), whitespace.end(),
+                       [&check](const char ws) { return check == ws; });
+}
+
+std::string urldecode(const std::string_view url)
+{ // RFC 3986, section 2.1.
+    size_t pos{0};
+    size_t lastpos{0};
+    std::string decoded;
+    while ((pos = url.find('%', pos)) != std::string_view::npos)
+    {
+        decoded += url.substr(lastpos, pos - lastpos);
+        decoded += static_cast<char>(
+            std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
+        pos += 3;
+        lastpos = pos;
+    }
+    decoded += url.substr(lastpos);
+
+    return decoded;
+}
+
+} // namespace epubgrep::helpers
--- a/src/helpers.hpp
+++ b/src/helpers.hpp
@ -0,0 +1,16 @@
+#ifndef EPUBGREP_HELPERS_HPP
+#define EPUBGREP_HELPERS_HPP
+
+#include <string>
+namespace epubgrep::helpers
+{
+
+//! Return true if check is whitespace.
+[[nodiscard]] bool is_whitespace(char check);
+
+//! Decode percent-encoded text. Used for restricted characters in URLs.
+[[nodiscard]] std::string urldecode(std::string_view url);
+
+} // namespace epubgrep::helpers
+
+#endif // EPUBGREP_HELPERS_HPP
--- a/src/search.cpp
+++ b/src/search.cpp
@ -17,6 +17,7 @@
 #include "search.hpp"

 #include "fs-compat.hpp"
+#include "helpers.hpp"
 #include "zip.hpp"

 #include <boost/regex.hpp>
@ -204,7 +205,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
                                            whitespace.end());
            if (pos_before != rend_before)
            {
-                while (is_whitespace(*pos_before))
+                while (helpers::is_whitespace(*pos_before))
                {
                    ++pos_before;
                }
@ -218,7 +219,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
                                           whitespace.end());
            if (pos_after != end_after)
            {
-                while (is_whitespace(*pos_after))
+                while (helpers::is_whitespace(*pos_after))
                {
                    ++pos_after;
                }
@ -230,11 +231,11 @@ match_context context(const boost::match_results<string::const_iterator> &match,
    const std::string before_reversed(rbegin_before, pos_before);
    string before(before_reversed.rbegin(), before_reversed.rend());
    std::string after(begin_after, pos_after);
-    while (is_whitespace(*before.begin()))
+    while (helpers::is_whitespace(*before.begin()))
    {
        before.erase(0, 1);
    }
-    while (is_whitespace(*after.rbegin()))
+    while (helpers::is_whitespace(*after.rbegin()))
    {
        after.erase(after.size() - 1);
    }
@ -250,11 +251,11 @@ string headline(const std::string_view prefix)
        pos += 3;
        string result{prefix.substr(pos, prefix.find('<', pos) - pos)};

-        while (is_whitespace(*result.begin()))
+        while (helpers::is_whitespace(*result.begin()))
        {
            result.erase(0, 1);
        }
-        while (is_whitespace(*result.rbegin()))
+        while (helpers::is_whitespace(*result.rbegin()))
        {
            result.erase(result.size() - 1);
        }
@ -277,11 +278,4 @@ string page(const std::string_view prefix)
    return {};
 }

-bool is_whitespace(const char check)
-{
-    const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
-    return std::any_of(whitespace.begin(), whitespace.end(),
-                       [&check](const char ws) { return check == ws; });
-}
-
 } // namespace epubgrep::search
--- a/src/search.hpp
+++ b/src/search.hpp
@ -71,9 +71,6 @@ context(const boost::match_results<std::string::const_iterator> &match,
 //! Return current page if possible.
 [[nodiscard]] std::string page(std::string_view prefix);

-//! Return true if check is whitespace.
-[[nodiscard]] bool is_whitespace(char check);
-
 } // namespace epubgrep::search

 #endif // EPUBGREP_SEARCH_HPP
--- a/src/zip.cpp
+++ b/src/zip.cpp
@ -17,6 +17,7 @@
 #include "zip.hpp"

 #include "fs-compat.hpp"
+#include "helpers.hpp"

 #include <archive.h>
 #include <archive_entry.h>
@ -210,7 +211,7 @@ std::vector<std::string> list_spine(const fs::path &filepath)
                const auto &idref{itemref.attribute("idref").value()};
                const auto &item{manifest.find_child_by_attribute("id", idref)};
                const std::string href{
-                    urldecode(item.attribute("href").value())};
+                    helpers::urldecode(item.attribute("href").value())};
                if (href[0] != '/')
                {
                    spine_filepaths.emplace_back(
@ -241,22 +242,4 @@ std::vector<std::string> list_spine(const fs::path &filepath)
    return spine_filepaths;
 }

-std::string urldecode(const std::string_view url)
-{ // RFC 3986, section 2.1.
-    size_t pos{0};
-    size_t lastpos{0};
-    std::string decoded;
-    while ((pos = url.find('%', pos)) != std::string_view::npos)
-    {
-        decoded += url.substr(lastpos, pos - lastpos);
-        decoded += static_cast<char>(
-            std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
-        pos += 3;
-        lastpos = pos;
-    }
-    decoded += url.substr(lastpos);
-
-    return decoded;
-}
-
 } // namespace epubgrep::zip
--- a/src/zip.hpp
+++ b/src/zip.hpp
@ -46,9 +46,6 @@ void close_file(struct archive *zipfile, const fs::path &filepath);
 //! Returns the files in the EPUB “spine” (all pages that are actually text).
 [[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);

-//! Decode percent-encoding. Used for restricted characters in URLs.
-[[nodiscard]] std::string urldecode(std::string_view url);
-
 //! It's std::runtime_error, but with another name.
 class exception : public std::runtime_error
 {