diff --git a/src/book.cpp b/src/book.cpp index 8a55ec5..0431600 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -21,7 +21,11 @@ #include "log.hpp" #include "zip.hpp" +#include #include +#include +#include // For compatibility with fmt 4. +#include #include #include @@ -32,6 +36,8 @@ namespace epubgrep::book { +using boost::locale::translate; +using fmt::format; using std::string; book read(const fs::path filepath, const bool raw) @@ -42,7 +48,7 @@ book read(const fs::path filepath, const bool raw) { if (!raw) { - return zip::list_spine(filepath); + return list_spine(filepath); } return zip::list(filepath); }()}; @@ -183,4 +189,80 @@ string page(const document &doc, const size_t pos) return string(last); } +std::vector list_spine(const fs::path &filepath) +{ + const auto opf_file_path{ + [&filepath] + { + pugi::xml_document xml; + const std::string container{ + zip::read_file(filepath, "META-INF/container.xml")}; + const auto result{xml.load_buffer(&container[0], container.size())}; + if (result) + { + return fs::path{xml.child("container") + .child("rootfiles") + .first_child() + .attribute("full-path") + .value()}; + } + LOG(log::sev::error) << result.description() << '\n'; + + return fs::path{}; + }()}; + + std::vector spine_filepaths; + if (!opf_file_path.empty()) + { + DEBUGLOG << "Parsing " << opf_file_path; + pugi::xml_document xml; + const std::string opf_file{ + zip::read_file(filepath, opf_file_path.string())}; + const auto result{xml.load_buffer(&opf_file[0], opf_file.size())}; + if (result) + { + auto manifest{xml.child("package").child("manifest")}; + if (manifest == nullptr) + { + manifest = xml.child("opf:package").child("opf:manifest"); + } + auto spine{xml.child("package").child("spine")}; + if (spine == nullptr) + { + spine = xml.child("opf:package").child("opf:spine"); + } + + for (const auto &itemref : spine) + { + const auto &idref{itemref.attribute("idref").value()}; + const auto &item{manifest.find_child_by_attribute("id", idref)}; + auto href{helpers::urldecode(item.attribute("href").value())}; + if (href[0] != '/') + { + href = (opf_file_path.parent_path() /= href); + } + DEBUGLOG << "Found in spine: " << href; + spine_filepaths.emplace_back(href); + } + } + else + { + LOG(log::sev::error) << "XML: " << result.description() << '\n'; + } + } + + if (opf_file_path.empty() || spine_filepaths.empty()) + { + LOG(log::sev::error) + << format(translate("{0:s} is damaged. Could not read spine. " + "Skipping file.\n") + .str() + .data(), + filepath); + return {}; + } + + return spine_filepaths; +} + } // namespace epubgrep::book diff --git a/src/book.hpp b/src/book.hpp index 16071ae..bcd3ff7 100644 --- a/src/book.hpp +++ b/src/book.hpp @@ -62,6 +62,9 @@ struct book //! Return current page if possible. [[nodiscard]] std::string page(const document &doc, size_t pos); +//! Returns the files in the EPUB “spine” (all pages that are actually text). +[[nodiscard]] std::vector list_spine(const fs::path &filepath); + } // namespace epubgrep::book #endif // EPUBGREP_BOOK_HPP diff --git a/src/zip.cpp b/src/zip.cpp index 1c6f858..6907f8f 100644 --- a/src/zip.cpp +++ b/src/zip.cpp @@ -25,7 +25,6 @@ #include #include #include // For compatibility with fmt 4. -#include #include #include @@ -165,79 +164,4 @@ void close_file(struct archive *zipfile, const fs::path &filepath) } } -std::vector list_spine(const fs::path &filepath) -{ - const auto opf_file_path{ - [&filepath] - { - pugi::xml_document xml; - const std::string container{ - read_file(filepath, "META-INF/container.xml")}; - const auto result{xml.load_buffer(&container[0], container.size())}; - if (result) - { - return fs::path{xml.child("container") - .child("rootfiles") - .first_child() - .attribute("full-path") - .value()}; - } - LOG(log::sev::error) << result.description() << '\n'; - - return fs::path{}; - }()}; - - std::vector spine_filepaths; - if (!opf_file_path.empty()) - { - DEBUGLOG << "Parsing " << opf_file_path; - pugi::xml_document xml; - const std::string opf_file{read_file(filepath, opf_file_path.string())}; - const auto result{xml.load_buffer(&opf_file[0], opf_file.size())}; - if (result) - { - auto manifest{xml.child("package").child("manifest")}; - if (manifest == nullptr) - { - manifest = xml.child("opf:package").child("opf:manifest"); - } - auto spine{xml.child("package").child("spine")}; - if (spine == nullptr) - { - spine = xml.child("opf:package").child("opf:spine"); - } - - for (const auto &itemref : spine) - { - const auto &idref{itemref.attribute("idref").value()}; - const auto &item{manifest.find_child_by_attribute("id", idref)}; - auto href{helpers::urldecode(item.attribute("href").value())}; - if (href[0] != '/') - { - href = (opf_file_path.parent_path() /= href); - } - DEBUGLOG << "Found in spine: " << href; - spine_filepaths.emplace_back(href); - } - } - else - { - LOG(log::sev::error) << "XML: " << result.description() << '\n'; - } - } - - if (opf_file_path.empty() || spine_filepaths.empty()) - { - LOG(log::sev::error) - << format(translate("{0:s} is damaged. Could not read spine. " - "Skipping file.\n") - .str() - .data(), - filepath); - return {}; - } - - return spine_filepaths; -} - } // namespace epubgrep::zip diff --git a/src/zip.hpp b/src/zip.hpp index 05f04b0..e8719a5 100644 --- a/src/zip.hpp +++ b/src/zip.hpp @@ -43,9 +43,6 @@ namespace epubgrep::zip //! Close zip file. void close_file(struct archive *zipfile, const fs::path &filepath); -//! Returns the files in the EPUB “spine” (all pages that are actually text). -[[nodiscard]] std::vector list_spine(const fs::path &filepath); - //! It's std::runtime_error, but with another name. class exception : public std::runtime_error {