From 1979956f03c8bea80b68e2e2b65a75b19045a7be Mon Sep 17 00:00:00 2001 From: tastytea Date: Mon, 24 May 2021 15:35:49 +0200 Subject: [PATCH] Add basic search functionality and context output. --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 3 +- src/main.cpp | 6 ++-- src/search.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++- src/search.hpp | 10 ++++-- 5 files changed, 94 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 34f4a62..f764439 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) include(cmake/debug_flags.cmake) # All dependencies except test dependencies. -find_package(Boost 1.65.0 REQUIRED COMPONENTS program_options locale) +find_package(Boost 1.65.0 REQUIRED COMPONENTS locale program_options regex) find_package(Gettext REQUIRED) find_package(Filesystem REQUIRED COMPONENTS Final Experimental) find_package(LibArchive 3.2 REQUIRED) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bf0db77..19ece0c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,8 +17,9 @@ unset(headers_src) target_link_libraries(${PROJECT_NAME}_lib PUBLIC - Boost::program_options Boost::locale + Boost::program_options + Boost::regex std::filesystem LibArchive::LibArchive fmt::fmt) diff --git a/src/main.cpp b/src/main.cpp index dbc773b..d47bd4a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -85,15 +85,15 @@ int main(int argc, char *argv[]) search::options opts; if (vm.count("basic-regexp") > 0) { - opts.regex = search::regex_type::basic; + opts.regex = search::regex_kind::basic; } if (vm.count("extended-regexp") > 0) { - opts.regex = search::regex_type::extended; + opts.regex = search::regex_kind::extended; } if (vm.count("perl-regexp") > 0) { - opts.regex = search::regex_type::perl; + opts.regex = search::regex_kind::perl; } if (vm.count("grep") > 0) { diff --git a/src/search.cpp b/src/search.cpp index a8aa441..af08d27 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -17,7 +17,12 @@ #include "search.hpp" #include "fs-compat.hpp" +#include "zip.hpp" +#include + +#include +#include #include #include @@ -27,7 +32,81 @@ namespace epubgrep::search std::vector search(const fs::path &filepath, std::string_view regex, const options &opts) { - return {}; + boost::regex::flag_type flags{}; + + switch (opts.regex) + { + case regex_kind::basic: + { + flags = opts.grep_like ? boost::regex::grep : boost::regex::basic; + break; + } + case regex_kind::extended: + { + flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended; + break; + } + case regex_kind::perl: + { + flags = boost::regex::perl; + break; + } + } + + if (opts.ignore_case) + { + flags |= boost::regex::icase; + } + + boost::regex re(regex.data(), flags); + std::vector matches; + for (const auto &entry : zip::list(filepath)) + { + auto document{zip::read_file(filepath, entry)}; + std::string::const_iterator begin{document.begin()}; + std::string::const_iterator end{document.end()}; + boost::match_results match_result; + while (boost::regex_search(begin, end, match_result, re, + boost::match_default)) + { + match match; // FIXME: Rename variable or struct. + match.filepath = entry; + match.text = match_result[0]; + match.context = context(match_result, opts.context); + + matches.emplace_back(match); + begin = match_result[0].second; + } + } + + return matches; +} + +match_context +context(const boost::match_results &match, + std::uint64_t words) +{ + const auto &prefix{match.prefix().str()}; + const auto &suffix{match.suffix().str()}; + size_t pos_before{prefix.length()}; + size_t pos_after{}; + ++words; + + while (words != 0) + { + if (pos_before != std::string::npos) + { + pos_before = prefix.rfind(' ', pos_before) - 1; + } + + if (pos_after != std::string::npos) + { + pos_after = suffix.find(' ', pos_after) + 1; + } + words -= 1; + } + + return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)}; } } // namespace epubgrep::search diff --git a/src/search.hpp b/src/search.hpp index 8a08600..08e7620 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -19,6 +19,8 @@ #include "fs-compat.hpp" +#include + #include #include #include @@ -39,7 +41,7 @@ struct match std::string page; //!< The page number, if available. }; -enum class regex_type +enum class regex_kind { basic, extended, @@ -48,7 +50,7 @@ enum class regex_type struct options { - regex_type regex{regex_type::basic}; + regex_kind regex{regex_kind::basic}; bool grep_like{false}; bool ignore_case{false}; bool nostrip{false}; @@ -59,6 +61,10 @@ struct options std::string_view regex, const options &opts); +[[nodiscard]] match_context +context(const boost::match_results &match, + std::uint64_t words); + } // namespace epubgrep::search #endif // EPUBGREP_SEARCH_HPP