Add pointer to preferred text version (raw or cleaned) to document.
This commit is contained in:
parent
d0738891c2
commit
b134bd0301
@ -23,6 +23,8 @@
|
|||||||
|
|
||||||
#include <boost/regex.hpp>
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -57,8 +59,9 @@ book read(const fs::path filepath, const bool raw)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
doc.text_raw = zip::read_file(filepath, entry);
|
doc.text_raw = zip::read_file(filepath, entry);
|
||||||
|
doc.text = std::make_unique<std::string>(doc.text_raw);
|
||||||
}
|
}
|
||||||
current_book.files.emplace_back(entry, doc);
|
current_book.files.emplace_back(entry, std::move(doc));
|
||||||
}
|
}
|
||||||
|
|
||||||
return current_book;
|
return current_book;
|
||||||
@ -143,6 +146,7 @@ document process_page(const std::string_view text)
|
|||||||
}
|
}
|
||||||
|
|
||||||
doc.text_cleaned = output;
|
doc.text_cleaned = output;
|
||||||
|
doc.text = std::make_unique<string>(doc.text_cleaned);
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -35,6 +36,7 @@ struct document
|
|||||||
{
|
{
|
||||||
string text_raw; //!< HTML page
|
string text_raw; //!< HTML page
|
||||||
string text_cleaned; //!< Plain text page
|
string text_cleaned; //!< Plain text page
|
||||||
|
std::unique_ptr<string> text; //!< Pointer to preferred text version
|
||||||
std::map<size_t, string> headlines; //!< pos, title
|
std::map<size_t, string> headlines; //!< pos, title
|
||||||
std::map<size_t, string> pages; //!< pos, page
|
std::map<size_t, string> pages; //!< pos, page
|
||||||
string language; //!< Page language
|
string language; //!< Page language
|
||||||
|
@ -78,16 +78,8 @@ std::vector<match> search(const fs::path &filepath,
|
|||||||
for (const auto &file : book.files)
|
for (const auto &file : book.files)
|
||||||
{
|
{
|
||||||
const auto &doc{file.second};
|
const auto &doc{file.second};
|
||||||
const auto &text{[&doc, &opts]
|
string::const_iterator begin{doc.text->begin()};
|
||||||
{
|
string::const_iterator end{doc.text->end()};
|
||||||
if (!opts.raw)
|
|
||||||
{
|
|
||||||
return doc.text_cleaned;
|
|
||||||
}
|
|
||||||
return doc.text_raw;
|
|
||||||
}()};
|
|
||||||
string::const_iterator begin{text.begin()};
|
|
||||||
string::const_iterator end{text.end()};
|
|
||||||
auto begin_text{begin};
|
auto begin_text{begin};
|
||||||
boost::match_results<string::const_iterator> match_result;
|
boost::match_results<string::const_iterator> match_result;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user