/* This file is part of epubgrep. * Copyright © 2021 tastytea * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #ifndef EPUBGREP_BOOK_HPP #define EPUBGREP_BOOK_HPP #include "fs-compat.hpp" #include #include #include #include #include #include namespace epubgrep::book { using std::string; //! Document inside EPUB. struct document { string text_raw; //!< HTML page string text_cleaned; //!< Plain text page std::unique_ptr text; //!< Pointer to preferred text version std::map headlines; //!< pos, title std::map pages; //!< pos, page string language; //!< Page language } __attribute__((aligned(128))); //! EPUB file. struct book { std::vector> files; //!< filename, file std::vector> toc; //!< title, href string language; //!< Book language } __attribute__((aligned(128))); //! Read and process book. [[nodiscard]] book read(fs::path filepath, bool raw); //! Clean up page and record headlines and page numbers. [[nodiscard]] document process_page(std::string_view text); //! Return last headline if possible. [[nodiscard]] string headline(const document &doc, size_t pos); //! Return current page if possible. [[nodiscard]] string page(const document &doc, size_t pos); //! Returns the file path of the OPF file in the EPUB. [[nodiscard]] fs::path get_opf_file_path(const fs::path &zipfile); //! Returns the files in the EPUB “spine” (all pages that are actually text). [[nodiscard]] std::vector list_spine(const fs::path &filepath); } // namespace epubgrep::book #endif // EPUBGREP_BOOK_HPP