/* This file is part of epubgrep. * Copyright © 2021 tastytea * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "search.hpp" #include "fs-compat.hpp" #include "zip.hpp" #include #include #include #include #include namespace epubgrep::search { std::vector search(const fs::path &filepath, std::string_view regex, const options &opts) { boost::regex::flag_type flags{}; switch (opts.regex) { case regex_kind::basic: { flags = opts.grep_like ? boost::regex::grep : boost::regex::basic; break; } case regex_kind::extended: { flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended; break; } case regex_kind::perl: { flags = boost::regex::perl; break; } } if (opts.ignore_case) { flags |= boost::regex::icase; } boost::regex re(regex.data(), flags); std::vector matches; for (const auto &entry : zip::list(filepath)) { auto document{zip::read_file(filepath, entry)}; if (!opts.raw) { cleanup_text(document); } std::string::const_iterator begin{document.begin()}; std::string::const_iterator end{document.end()}; boost::match_results match_result; while (boost::regex_search(begin, end, match_result, re, boost::match_default)) { match match; // FIXME: Rename variable or struct. match.filepath = entry; match.text = match_result[0]; match.context = context(match_result, opts.context); matches.emplace_back(match); begin = match_result[0].second; } } return matches; } match_context context(const boost::match_results &match, std::uint64_t words) { const auto &prefix{match.prefix().str()}; const auto &suffix{match.suffix().str()}; size_t pos_before{prefix.length()}; size_t pos_after{}; ++words; while (words != 0) { if (pos_before != std::string::npos) { pos_before = prefix.rfind(' ', pos_before) - 1; } if (pos_after != std::string::npos) { pos_after = suffix.find(' ', pos_after) + 1; } words -= 1; } return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)}; } void cleanup_text(std::string &text) { for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos)) { text.erase(pos, text.find('>', pos) + 1 - pos); } for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos)) { text.replace(pos, 1, ""); } for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos)) { text.replace(pos, 1, " "); } for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos)) { text.replace(pos, 2, " "); } } } // namespace epubgrep::search