Move spine_filepaths() from zip:: to book::.
This commit is contained in:
parent
b134bd0301
commit
d2aff45018
84
src/book.cpp
84
src/book.cpp
|
@ -21,7 +21,11 @@
|
||||||
#include "log.hpp"
|
#include "log.hpp"
|
||||||
#include "zip.hpp"
|
#include "zip.hpp"
|
||||||
|
|
||||||
|
#include <boost/locale/message.hpp>
|
||||||
#include <boost/regex.hpp>
|
#include <boost/regex.hpp>
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
||||||
|
#include <pugixml.hpp>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -32,6 +36,8 @@
|
||||||
namespace epubgrep::book
|
namespace epubgrep::book
|
||||||
{
|
{
|
||||||
|
|
||||||
|
using boost::locale::translate;
|
||||||
|
using fmt::format;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
book read(const fs::path filepath, const bool raw)
|
book read(const fs::path filepath, const bool raw)
|
||||||
|
@ -42,7 +48,7 @@ book read(const fs::path filepath, const bool raw)
|
||||||
{
|
{
|
||||||
if (!raw)
|
if (!raw)
|
||||||
{
|
{
|
||||||
return zip::list_spine(filepath);
|
return list_spine(filepath);
|
||||||
}
|
}
|
||||||
return zip::list(filepath);
|
return zip::list(filepath);
|
||||||
}()};
|
}()};
|
||||||
|
@ -183,4 +189,80 @@ string page(const document &doc, const size_t pos)
|
||||||
return string(last);
|
return string(last);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<string> list_spine(const fs::path &filepath)
|
||||||
|
{
|
||||||
|
const auto opf_file_path{
|
||||||
|
[&filepath]
|
||||||
|
{
|
||||||
|
pugi::xml_document xml;
|
||||||
|
const std::string container{
|
||||||
|
zip::read_file(filepath, "META-INF/container.xml")};
|
||||||
|
const auto result{xml.load_buffer(&container[0], container.size())};
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
return fs::path{xml.child("container")
|
||||||
|
.child("rootfiles")
|
||||||
|
.first_child()
|
||||||
|
.attribute("full-path")
|
||||||
|
.value()};
|
||||||
|
}
|
||||||
|
LOG(log::sev::error) << result.description() << '\n';
|
||||||
|
|
||||||
|
return fs::path{};
|
||||||
|
}()};
|
||||||
|
|
||||||
|
std::vector<std::string> spine_filepaths;
|
||||||
|
if (!opf_file_path.empty())
|
||||||
|
{
|
||||||
|
DEBUGLOG << "Parsing " << opf_file_path;
|
||||||
|
pugi::xml_document xml;
|
||||||
|
const std::string opf_file{
|
||||||
|
zip::read_file(filepath, opf_file_path.string())};
|
||||||
|
const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
auto manifest{xml.child("package").child("manifest")};
|
||||||
|
if (manifest == nullptr)
|
||||||
|
{
|
||||||
|
manifest = xml.child("opf:package").child("opf:manifest");
|
||||||
|
}
|
||||||
|
auto spine{xml.child("package").child("spine")};
|
||||||
|
if (spine == nullptr)
|
||||||
|
{
|
||||||
|
spine = xml.child("opf:package").child("opf:spine");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &itemref : spine)
|
||||||
|
{
|
||||||
|
const auto &idref{itemref.attribute("idref").value()};
|
||||||
|
const auto &item{manifest.find_child_by_attribute("id", idref)};
|
||||||
|
auto href{helpers::urldecode(item.attribute("href").value())};
|
||||||
|
if (href[0] != '/')
|
||||||
|
{
|
||||||
|
href = (opf_file_path.parent_path() /= href);
|
||||||
|
}
|
||||||
|
DEBUGLOG << "Found in spine: " << href;
|
||||||
|
spine_filepaths.emplace_back(href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOG(log::sev::error) << "XML: " << result.description() << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opf_file_path.empty() || spine_filepaths.empty())
|
||||||
|
{
|
||||||
|
LOG(log::sev::error)
|
||||||
|
<< format(translate("{0:s} is damaged. Could not read spine. "
|
||||||
|
"Skipping file.\n")
|
||||||
|
.str()
|
||||||
|
.data(),
|
||||||
|
filepath);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return spine_filepaths;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace epubgrep::book
|
} // namespace epubgrep::book
|
||||||
|
|
|
@ -62,6 +62,9 @@ struct book
|
||||||
//! Return current page if possible.
|
//! Return current page if possible.
|
||||||
[[nodiscard]] std::string page(const document &doc, size_t pos);
|
[[nodiscard]] std::string page(const document &doc, size_t pos);
|
||||||
|
|
||||||
|
//! Returns the files in the EPUB “spine” (all pages that are actually text).
|
||||||
|
[[nodiscard]] std::vector<string> list_spine(const fs::path &filepath);
|
||||||
|
|
||||||
} // namespace epubgrep::book
|
} // namespace epubgrep::book
|
||||||
|
|
||||||
#endif // EPUBGREP_BOOK_HPP
|
#endif // EPUBGREP_BOOK_HPP
|
||||||
|
|
76
src/zip.cpp
76
src/zip.cpp
|
@ -25,7 +25,6 @@
|
||||||
#include <boost/locale/message.hpp>
|
#include <boost/locale/message.hpp>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
||||||
#include <pugixml.hpp>
|
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -165,79 +164,4 @@ void close_file(struct archive *zipfile, const fs::path &filepath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> list_spine(const fs::path &filepath)
|
|
||||||
{
|
|
||||||
const auto opf_file_path{
|
|
||||||
[&filepath]
|
|
||||||
{
|
|
||||||
pugi::xml_document xml;
|
|
||||||
const std::string container{
|
|
||||||
read_file(filepath, "META-INF/container.xml")};
|
|
||||||
const auto result{xml.load_buffer(&container[0], container.size())};
|
|
||||||
if (result)
|
|
||||||
{
|
|
||||||
return fs::path{xml.child("container")
|
|
||||||
.child("rootfiles")
|
|
||||||
.first_child()
|
|
||||||
.attribute("full-path")
|
|
||||||
.value()};
|
|
||||||
}
|
|
||||||
LOG(log::sev::error) << result.description() << '\n';
|
|
||||||
|
|
||||||
return fs::path{};
|
|
||||||
}()};
|
|
||||||
|
|
||||||
std::vector<std::string> spine_filepaths;
|
|
||||||
if (!opf_file_path.empty())
|
|
||||||
{
|
|
||||||
DEBUGLOG << "Parsing " << opf_file_path;
|
|
||||||
pugi::xml_document xml;
|
|
||||||
const std::string opf_file{read_file(filepath, opf_file_path.string())};
|
|
||||||
const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
|
|
||||||
if (result)
|
|
||||||
{
|
|
||||||
auto manifest{xml.child("package").child("manifest")};
|
|
||||||
if (manifest == nullptr)
|
|
||||||
{
|
|
||||||
manifest = xml.child("opf:package").child("opf:manifest");
|
|
||||||
}
|
|
||||||
auto spine{xml.child("package").child("spine")};
|
|
||||||
if (spine == nullptr)
|
|
||||||
{
|
|
||||||
spine = xml.child("opf:package").child("opf:spine");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto &itemref : spine)
|
|
||||||
{
|
|
||||||
const auto &idref{itemref.attribute("idref").value()};
|
|
||||||
const auto &item{manifest.find_child_by_attribute("id", idref)};
|
|
||||||
auto href{helpers::urldecode(item.attribute("href").value())};
|
|
||||||
if (href[0] != '/')
|
|
||||||
{
|
|
||||||
href = (opf_file_path.parent_path() /= href);
|
|
||||||
}
|
|
||||||
DEBUGLOG << "Found in spine: " << href;
|
|
||||||
spine_filepaths.emplace_back(href);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LOG(log::sev::error) << "XML: " << result.description() << '\n';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opf_file_path.empty() || spine_filepaths.empty())
|
|
||||||
{
|
|
||||||
LOG(log::sev::error)
|
|
||||||
<< format(translate("{0:s} is damaged. Could not read spine. "
|
|
||||||
"Skipping file.\n")
|
|
||||||
.str()
|
|
||||||
.data(),
|
|
||||||
filepath);
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
return spine_filepaths;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace epubgrep::zip
|
} // namespace epubgrep::zip
|
||||||
|
|
|
@ -43,9 +43,6 @@ namespace epubgrep::zip
|
||||||
//! Close zip file.
|
//! Close zip file.
|
||||||
void close_file(struct archive *zipfile, const fs::path &filepath);
|
void close_file(struct archive *zipfile, const fs::path &filepath);
|
||||||
|
|
||||||
//! Returns the files in the EPUB “spine” (all pages that are actually text).
|
|
||||||
[[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
|
|
||||||
|
|
||||||
//! It's std::runtime_error, but with another name.
|
//! It's std::runtime_error, but with another name.
|
||||||
class exception : public std::runtime_error
|
class exception : public std::runtime_error
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue