Add pointer to preferred text version (raw or cleaned) to document.

This commit is contained in:
tastytea 2021-08-20 14:52:34 +02:00
parent d0738891c2
commit b134bd0301
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
3 changed files with 9 additions and 11 deletions

View File

@ -23,6 +23,8 @@
#include <boost/regex.hpp> #include <boost/regex.hpp>
#include <algorithm>
#include <memory>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
@ -57,8 +59,9 @@ book read(const fs::path filepath, const bool raw)
else else
{ {
doc.text_raw = zip::read_file(filepath, entry); doc.text_raw = zip::read_file(filepath, entry);
doc.text = std::make_unique<std::string>(doc.text_raw);
} }
current_book.files.emplace_back(entry, doc); current_book.files.emplace_back(entry, std::move(doc));
} }
return current_book; return current_book;
@ -143,6 +146,7 @@ document process_page(const std::string_view text)
} }
doc.text_cleaned = output; doc.text_cleaned = output;
doc.text = std::make_unique<string>(doc.text_cleaned);
return doc; return doc;
} }

View File

@ -20,6 +20,7 @@
#include "fs-compat.hpp" #include "fs-compat.hpp"
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <utility> #include <utility>
@ -35,6 +36,7 @@ struct document
{ {
string text_raw; //!< HTML page string text_raw; //!< HTML page
string text_cleaned; //!< Plain text page string text_cleaned; //!< Plain text page
std::unique_ptr<string> text; //!< Pointer to preferred text version
std::map<size_t, string> headlines; //!< pos, title std::map<size_t, string> headlines; //!< pos, title
std::map<size_t, string> pages; //!< pos, page std::map<size_t, string> pages; //!< pos, page
string language; //!< Page language string language; //!< Page language

View File

@ -78,16 +78,8 @@ std::vector<match> search(const fs::path &filepath,
for (const auto &file : book.files) for (const auto &file : book.files)
{ {
const auto &doc{file.second}; const auto &doc{file.second};
const auto &text{[&doc, &opts] string::const_iterator begin{doc.text->begin()};
{ string::const_iterator end{doc.text->end()};
if (!opts.raw)
{
return doc.text_cleaned;
}
return doc.text_raw;
}()};
string::const_iterator begin{text.begin()};
string::const_iterator end{text.end()};
auto begin_text{begin}; auto begin_text{begin};
boost::match_results<string::const_iterator> match_result; boost::match_results<string::const_iterator> match_result;