From bb1a43ca927e347662c86488dd4f5f86e60cd15e Mon Sep 17 00:00:00 2001 From: tastytea Date: Mon, 24 May 2021 16:23:07 +0200 Subject: [PATCH] Move cleanup_text(), document functions. --- src/search.cpp | 46 +++++++++++++++++++++++----------------------- src/search.hpp | 7 +++++-- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index eb2d11d..71ff97d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -87,6 +87,29 @@ std::vector search(const fs::path &filepath, std::string_view regex, return matches; } +void cleanup_text(std::string &text) +{ + for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos)) + { + text.erase(pos, text.find('>', pos) + 1 - pos); + } + + for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos)) + { + text.replace(pos, 1, ""); + } + + for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos)) + { + text.replace(pos, 1, " "); + } + + for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos)) + { + text.replace(pos, 2, " "); + } +} + match_context context(const boost::match_results &match, std::uint64_t words) @@ -114,27 +137,4 @@ context(const boost::match_results &match, return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)}; } -void cleanup_text(std::string &text) -{ - for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos)) - { - text.erase(pos, text.find('>', pos) + 1 - pos); - } - - for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos)) - { - text.replace(pos, 1, ""); - } - - for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos)) - { - text.replace(pos, 1, " "); - } - - for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos)) - { - text.replace(pos, 2, " "); - } -} - } // namespace epubgrep::search diff --git a/src/search.hpp b/src/search.hpp index 21e198e..70572cf 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -57,16 +57,19 @@ struct options std::uint64_t context{0}; }; +//! Search file, return matches. [[nodiscard]] std::vector search(const fs::path &filepath, std::string_view regex, const options &opts); +//! Strip HTML, remove newlines, condense spaces. +void cleanup_text(std::string &text); + +//! Return words before and after the match. [[nodiscard]] match_context context(const boost::match_results &match, std::uint64_t words); -void cleanup_text(std::string &text); - } // namespace epubgrep::search #endif // EPUBGREP_SEARCH_HPP