From 8ab7d0f6556cb9585ecf24c3beeaeffa7cad65dc Mon Sep 17 00:00:00 2001 From: tastytea Date: Mon, 24 May 2021 17:18:10 +0200 Subject: [PATCH] Extract headlines. --- src/search.cpp | 26 ++++++++++++++++++++++++++ src/search.hpp | 3 +++ 2 files changed, 29 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 8061566..f27eb63 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -71,6 +71,8 @@ std::vector search(const fs::path &filepath, std::string_view regex, std::string::const_iterator begin{document.begin()}; std::string::const_iterator end{document.end()}; boost::match_results match_result; + std::string last_headline; + while (boost::regex_search(begin, end, match_result, re, boost::match_default)) { @@ -78,6 +80,12 @@ std::vector search(const fs::path &filepath, std::string_view regex, match.filepath = entry; match.text = match_result[0]; match.context = context(match_result, opts.context); + const auto current_headline{headline(match_result.prefix().str())}; + if (!current_headline.empty()) + { + last_headline = current_headline; + } + match.headline = last_headline; matches.emplace_back(match); begin = match_result[0].second; @@ -94,6 +102,7 @@ void cleanup_text(std::string &text) // Don't strip headlines. We need them later on. if (text[pos + 1] == 'h' || text.substr(pos + 1, 2) == "/h") { + ++pos; continue; } text.erase(pos, text.find('>', pos) + 1 - pos); @@ -142,4 +151,21 @@ context(const boost::match_results &match, return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)}; } +[[nodiscard]] std::string headline(const std::string_view prefix) +{ + size_t pos{prefix.length()}; + while ((pos = prefix.rfind(" ]"})) + { + pos = prefix.find('>', pos) + 1; + return std::string{prefix.substr(pos, prefix.find('<', pos) - pos)}; + } + pos -= 2; + } + + return {}; +} + } // namespace epubgrep::search diff --git a/src/search.hpp b/src/search.hpp index 70572cf..6510bb3 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -70,6 +70,9 @@ void cleanup_text(std::string &text); context(const boost::match_results &match, std::uint64_t words); +//! Return last headline if possible. +[[nodiscard]] std::string headline(std::string_view prefix); + } // namespace epubgrep::search #endif // EPUBGREP_SEARCH_HPP