/* This file is part of epubgrep. * Copyright © 2021 tastytea * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "search.hpp" #include "book.hpp" #include "fs-compat.hpp" #include "helpers.hpp" #include "log.hpp" #include "zip.hpp" #include #include #include // For compatibility with fmt 4. #include #include #include #include #include #include #include namespace epubgrep::search { using fmt::format; using std::string; std::vector search(const fs::path &filepath, const std::string_view regex, const settings &opts) { LOG(log::sev::info) << format(R"(Starting search in {0:s} using regex "{1:s}")", filepath, regex); boost::regex::flag_type flags{}; switch (opts.regex) { case options::regex_kind::basic: { flags = opts.grep_like ? boost::regex::grep : boost::regex::basic; break; } case options::regex_kind::extended: { flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended; break; } case options::regex_kind::perl: { flags = boost::regex::perl; break; } } if (opts.ignore_case) { flags |= boost::regex::icase; } const boost::regex re(regex.data(), flags); std::vector matches; auto book{book::read(filepath, opts.raw)}; for (const auto &file : book.files) { const auto &doc{file.second}; string::const_iterator begin{doc.text->begin()}; string::const_iterator end{doc.text->end()}; auto begin_text{begin}; boost::match_results match_result; while (boost::regex_search(begin, end, match_result, re, boost::match_default)) { match match; // FIXME: Rename variable or struct. match.filepath_epub = filepath; match.filepath_inside = file.first; match.text = match_result[0]; match.context = context(match_result, opts.context); const auto pos = static_cast( std::distance(begin_text, match_result[0].begin())); match.headline = headline(doc, pos); match.page = page(doc, pos); matches.emplace_back(match); begin = match_result[0].end(); } } return matches; } match_context context(const boost::match_results &match, std::uint64_t words) { if (words == 0) { return {}; } ++words; const auto &rbegin_before{std::reverse_iterator(match.prefix().end())}; const auto &rend_before{std::reverse_iterator(match.prefix().begin())}; const auto &begin_after{match.suffix().begin()}; const auto &end_after{match.suffix().end()}; auto pos_before{rbegin_before}; auto pos_after{begin_after}; const std::array whitespace{' ', '\n', '\r', '\t'}; while (words != 0) { if (pos_before != rend_before) { pos_before = std::find_first_of(pos_before, rend_before, whitespace.begin(), whitespace.end()); while (pos_before != rend_before && helpers::is_whitespace(*pos_before)) { ++pos_before; } } if (pos_after != end_after) { pos_after = std::find_first_of(pos_after, end_after, whitespace.begin(), whitespace.end()); while (pos_after != end_after && helpers::is_whitespace(*pos_after)) { ++pos_after; } } words -= 1; } const string before_reversed(rbegin_before, pos_before); string before(before_reversed.rbegin(), before_reversed.rend()); string after(begin_after, pos_after); while (helpers::is_whitespace(*before.begin())) { before.erase(0, 1); } while (helpers::is_whitespace(*after.rbegin())) { after.erase(after.size() - 1); } return {before, after}; } } // namespace epubgrep::search