epubgrep/src/search.hpp

77 lines
2.3 KiB
C++
Raw Normal View History

/* This file is part of epubgrep.
* Copyright © 2021 tastytea <tastytea@tastytea.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef EPUBGREP_SEARCH_HPP
#define EPUBGREP_SEARCH_HPP
#include "fs-compat.hpp"
#include "options.hpp"
#include <boost/regex.hpp>
#include <cstdint>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace epubgrep::search
{
using match_context = std::pair<std::string, std::string>;
struct match
{
fs::path filepath_epub; //!< File path of the EPUB.
std::string text; //!< Matched string.
match_context context; //!< The context around the match.
std::string filepath_inside; //!< The file path of the matched line.
std::string headline; //!< The last headline, if available.
std::string page; //!< The page number, if available.
};
struct settings
{
options::regex_kind regex{options::regex_kind::basic};
bool grep_like{false};
bool ignore_case{false};
2021-05-24 16:01:41 +02:00
bool raw{false};
std::uint64_t context{0};
};
//! Search file, return matches.
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
std::string_view regex,
const settings &opts);
//! Strip HTML, remove newlines, condense spaces.
void cleanup_text(std::string &text);
//! Return words before and after the match.
[[nodiscard]] match_context
context(const boost::match_results<std::string::const_iterator> &match,
std::uint64_t words);
2021-05-24 17:18:10 +02:00
//! Return last headline if possible.
[[nodiscard]] std::string headline(std::string_view prefix);
2021-05-24 18:56:43 +02:00
//! Return current page if possible.
[[nodiscard]] std::string page(std::string_view prefix);
} // namespace epubgrep::search
#endif // EPUBGREP_SEARCH_HPP