/* This file is part of epubgrep. * Copyright © 2021 tastytea * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received zipfile copy of the GNU Affero General Public * License along with this program. If not, see . */ #include "zip.hpp" #include "fs-compat.hpp" #include "helpers.hpp" #include "log.hpp" #include #include #include #include #include // For compatibility with fmt 4. #include #include #include #include #include #include #include #include namespace epubgrep::zip { using boost::locale::translate; using fmt::format; std::vector list(const fs::path &filepath) { auto *zipfile{open_file(filepath)}; struct archive_entry *entry{}; std::vector toc; while (archive_read_next_header(zipfile, &entry) == ARCHIVE_OK) { const auto *in_epub_filepath{archive_entry_pathname_utf8(entry)}; if (in_epub_filepath == nullptr) { // If the encoding is broken, we skip the file. LOG(log::sev::warning) << format(translate("File in {0:s} is damaged. " "Skipping in-EPUB file.\n") .str() .data(), filepath); continue; } toc.emplace_back(in_epub_filepath); DEBUGLOG << "Found in file: " << in_epub_filepath; archive_read_data_skip(zipfile); } close_file(zipfile, filepath); return toc; } std::string read_file(const fs::path &filepath, std::string_view entry_path) { auto *zipfile{open_file(filepath)}; struct archive_entry *entry{}; while (archive_read_next_header(zipfile, &entry) == ARCHIVE_OK) { const auto *path{archive_entry_pathname_utf8(entry)}; if (path == nullptr) { // If the encoding is broken, we skip the file. LOG(log::sev::warning) << format(translate("File in {0:s} is damaged. " "Skipping in-EPUB file.\n") .str() .data(), filepath); continue; } if (std::strcmp(path, entry_path.data()) == 0) { const auto length{static_cast(archive_entry_size(entry))}; std::string filecontents; filecontents.resize(length); auto result_length{static_cast( archive_read_data(zipfile, &filecontents[0], length))}; if (result_length != length) { close_file(zipfile, filepath); throw exception{ format(translate("Could not read {0:s} in {1:s}.").str(), entry_path, filepath.string())}; } close_file(zipfile, filepath); return filecontents; } archive_read_data_skip(zipfile); } close_file(zipfile, filepath); if (entry_path == "META-INF/container.xml") { // File is probably not an EPUB. exception e{format(translate("{0:s} not found in {1:s}.").str(), entry_path, filepath.string())}; e.code = 1; throw exception{e}; } LOG(log::sev::warning) << format(translate("{0:s} not found in {1:s}.").str(), entry_path, filepath.string()) << '\n'; return {}; } struct archive *open_file(const fs::path &filepath) { // Throw exception if we can't open the file. std::ifstream file; file.exceptions(std::ios::failbit); file.open(filepath); file.close(); auto *zipfile{archive_read_new()}; archive_read_support_filter_all(zipfile); archive_read_support_format_zip(zipfile); auto result{archive_read_open_filename(zipfile, filepath.c_str(), 10240)}; if (result != ARCHIVE_OK) { close_file(zipfile, filepath); exception e{format(translate("Could not open {0:s}.").str(), filepath.string())}; e.code = 1; throw exception{e}; } return zipfile; } void close_file(struct archive *zipfile, const fs::path &filepath) { auto result{archive_read_free(zipfile)}; if (result != ARCHIVE_OK) { throw exception{format(translate("Could not close {0:s}.").str(), filepath.string())}; } } std::vector list_spine(const fs::path &filepath) { const auto opf_file_path{ [&filepath] { pugi::xml_document xml; const std::string container{ read_file(filepath, "META-INF/container.xml")}; const auto result{xml.load_buffer(&container[0], container.size())}; if (result) { return fs::path{xml.child("container") .child("rootfiles") .first_child() .attribute("full-path") .value()}; } LOG(log::sev::error) << result.description() << '\n'; return fs::path{}; }()}; std::vector spine_filepaths; if (!opf_file_path.empty()) { DEBUGLOG << "Parsing " << opf_file_path; pugi::xml_document xml; const std::string opf_file{read_file(filepath, opf_file_path.string())}; const auto result{xml.load_buffer(&opf_file[0], opf_file.size())}; if (result) { auto manifest{xml.child("package").child("manifest")}; if (manifest == nullptr) { manifest = xml.child("opf:package").child("opf:manifest"); } auto spine{xml.child("package").child("spine")}; if (spine == nullptr) { spine = xml.child("opf:package").child("opf:spine"); } for (const auto &itemref : spine) { const auto &idref{itemref.attribute("idref").value()}; const auto &item{manifest.find_child_by_attribute("id", idref)}; auto href{helpers::urldecode(item.attribute("href").value())}; if (href[0] != '/') { href = (opf_file_path.parent_path() /= href); } DEBUGLOG << "Found in spine: " << href; spine_filepaths.emplace_back(href); } } else { LOG(log::sev::error) << "XML: " << result.description() << '\n'; } } if (opf_file_path.empty() || spine_filepaths.empty()) { LOG(log::sev::error) << format(translate("{0:s} is damaged. Could not read spine. " "Skipping file.\n") .str() .data(), filepath); return {}; } return spine_filepaths; } } // namespace epubgrep::zip