Add language to books, documents and matches.

Currently only the book's language is actually read and applied down the line.
This commit is contained in:
tastytea 2021-08-20 16:52:01 +02:00
parent fca719634a
commit 299063e02c
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
3 changed files with 34 additions and 5 deletions

View File

@ -44,6 +44,8 @@ book read(const fs::path filepath, const bool raw)
{
using helpers::unescape_html;
DEBUGLOG << "Processing book " << filepath;
std::vector<string> epub_filepaths{[&filepath, raw]
{
if (!raw)
@ -54,9 +56,33 @@ book read(const fs::path filepath, const bool raw)
}()};
book current_book;
current_book.language = [&filepath]() -> string
{
pugi::xml_document xml;
auto opf_file_path{get_opf_file_path(filepath)};
const std::string opf_file{
zip::read_file(filepath, opf_file_path.string())};
const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
if (result)
{
auto lang{
xml.child("package").child("metadata").child("dc:language")};
if (lang == nullptr)
{
lang = xml.child("opf:package")
.child("opf:metadata")
.child("dc:language");
}
return lang.text().as_string();
}
return "";
}();
DEBUGLOG << "Book language detected: " << current_book.language;
for (const auto &entry : epub_filepaths)
{
DEBUGLOG << "Processing " << entry;
DEBUGLOG << "Processing document " << entry;
document doc;
if (!raw)
{
@ -67,6 +93,7 @@ book read(const fs::path filepath, const bool raw)
doc.text_raw = zip::read_file(filepath, entry);
doc.text = std::make_unique<std::string>(doc.text_raw);
}
doc.language = current_book.language; // FIXME: Get language of doc.
current_book.files.emplace_back(entry, std::move(doc));
}
@ -206,7 +233,7 @@ fs::path get_opf_file_path(const fs::path &zipfile)
LOG(log::sev::error) << result.description() << '\n';
return fs::path{};
};
}
std::vector<string> list_spine(const fs::path &filepath)
{

View File

@ -95,6 +95,7 @@ std::vector<match> search(const fs::path &filepath,
std::distance(begin_text, match_result[0].begin()));
match.headline = headline(doc, pos);
match.page = page(doc, pos);
match.language = doc.language; // FIXME: Get language of match.
matches.emplace_back(match);
begin = match_result[0].end();

View File

@ -43,7 +43,8 @@ struct match
std::string filepath_inside; //!< The file path of the matched line.
std::string headline; //!< The last headline, if available.
std::string page; //!< The page number, if available.
};
std::string language; //!< Match language.
} __attribute__((aligned(128)));
struct settings
{
@ -52,14 +53,14 @@ struct settings
bool ignore_case{false};
bool raw{false};
std::uint64_t context{0};
};
} __attribute__((aligned(16)));
struct file_in_epub
{
std::string text;
std::map<size_t, std::string> headlines;
std::map<size_t, std::string> pages;
};
} __attribute__((aligned(128)));
//! Search file, return matches.
[[nodiscard]] std::vector<match> search(const fs::path &filepath,