Add pointer to preferred text version (raw or cleaned) to document.

This commit is contained in:
tastytea 2021-08-20 14:52:34 +02:00
parent d0738891c2
commit b134bd0301
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
3 changed files with 9 additions and 11 deletions

View File

@ -23,6 +23,8 @@
#include <boost/regex.hpp>
#include <algorithm>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
@ -57,8 +59,9 @@ book read(const fs::path filepath, const bool raw)
else
{
doc.text_raw = zip::read_file(filepath, entry);
doc.text = std::make_unique<std::string>(doc.text_raw);
}
current_book.files.emplace_back(entry, doc);
current_book.files.emplace_back(entry, std::move(doc));
}
return current_book;
@ -143,6 +146,7 @@ document process_page(const std::string_view text)
}
doc.text_cleaned = output;
doc.text = std::make_unique<string>(doc.text_cleaned);
return doc;
}

View File

@ -20,6 +20,7 @@
#include "fs-compat.hpp"
#include <map>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
@ -35,6 +36,7 @@ struct document
{
string text_raw; //!< HTML page
string text_cleaned; //!< Plain text page
std::unique_ptr<string> text; //!< Pointer to preferred text version
std::map<size_t, string> headlines; //!< pos, title
std::map<size_t, string> pages; //!< pos, page
string language; //!< Page language

View File

@ -78,16 +78,8 @@ std::vector<match> search(const fs::path &filepath,
for (const auto &file : book.files)
{
const auto &doc{file.second};
const auto &text{[&doc, &opts]
{
if (!opts.raw)
{
return doc.text_cleaned;
}
return doc.text_raw;
}()};
string::const_iterator begin{text.begin()};
string::const_iterator end{text.end()};
string::const_iterator begin{doc.text->begin()};
string::const_iterator end{doc.text->end()};
auto begin_text{begin};
boost::match_results<string::const_iterator> match_result;