2021-05-24 07:52:36 +02:00
|
|
|
/* This file is part of epubgrep.
|
|
|
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef EPUBGREP_SEARCH_HPP
|
|
|
|
#define EPUBGREP_SEARCH_HPP
|
|
|
|
|
|
|
|
#include "fs-compat.hpp"
|
2021-05-27 17:20:00 +02:00
|
|
|
#include "options.hpp"
|
2021-05-24 07:52:36 +02:00
|
|
|
|
2021-05-24 15:35:49 +02:00
|
|
|
#include <boost/regex.hpp>
|
|
|
|
|
2021-05-24 07:52:36 +02:00
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
|
|
|
#include <string_view>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace epubgrep::search
|
|
|
|
{
|
|
|
|
|
|
|
|
using match_context = std::pair<std::string, std::string>;
|
|
|
|
|
|
|
|
struct match
|
|
|
|
{
|
2021-06-01 19:15:00 +02:00
|
|
|
fs::path filepath_epub; //!< File path of the EPUB.
|
|
|
|
std::string text; //!< Matched string.
|
|
|
|
match_context context; //!< The context around the match.
|
|
|
|
std::string filepath_inside; //!< The file path of the matched line.
|
|
|
|
std::string headline; //!< The last headline, if available.
|
|
|
|
std::string page; //!< The page number, if available.
|
2021-05-24 07:52:36 +02:00
|
|
|
};
|
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
struct settings
|
2021-05-24 13:13:15 +02:00
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
options::regex_kind regex{options::regex_kind::basic};
|
2021-05-24 13:13:15 +02:00
|
|
|
bool grep_like{false};
|
2021-05-24 07:52:36 +02:00
|
|
|
bool ignore_case{false};
|
2021-05-24 16:01:41 +02:00
|
|
|
bool raw{false};
|
2021-05-24 07:52:36 +02:00
|
|
|
std::uint64_t context{0};
|
|
|
|
};
|
|
|
|
|
2021-05-24 16:23:07 +02:00
|
|
|
//! Search file, return matches.
|
2021-05-24 13:00:03 +02:00
|
|
|
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
|
|
|
|
std::string_view regex,
|
2021-05-27 17:20:00 +02:00
|
|
|
const settings &opts);
|
2021-05-24 07:52:36 +02:00
|
|
|
|
2021-05-24 16:23:07 +02:00
|
|
|
//! Strip HTML, remove newlines, condense spaces.
|
|
|
|
void cleanup_text(std::string &text);
|
|
|
|
|
|
|
|
//! Return words before and after the match.
|
2021-05-24 15:35:49 +02:00
|
|
|
[[nodiscard]] match_context
|
|
|
|
context(const boost::match_results<std::string::const_iterator> &match,
|
|
|
|
std::uint64_t words);
|
|
|
|
|
2021-05-24 17:18:10 +02:00
|
|
|
//! Return last headline if possible.
|
|
|
|
[[nodiscard]] std::string headline(std::string_view prefix);
|
|
|
|
|
2021-05-24 18:56:43 +02:00
|
|
|
//! Return current page if possible.
|
|
|
|
[[nodiscard]] std::string page(std::string_view prefix);
|
|
|
|
|
2021-05-24 07:52:36 +02:00
|
|
|
} // namespace epubgrep::search
|
|
|
|
|
|
|
|
#endif // EPUBGREP_SEARCH_HPP
|