Move book processing into own file.
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
This commit is contained in:
parent
97fecd37f0
commit
84ef5d1bf3
182
src/book.cpp
Normal file
182
src/book.cpp
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/* This file is part of epubgrep.
|
||||||
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "book.hpp"
|
||||||
|
|
||||||
|
#include "fs-compat.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
#include "log.hpp"
|
||||||
|
#include "zip.hpp"
|
||||||
|
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace epubgrep::book
|
||||||
|
{
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
book read(const fs::path filepath, const bool raw)
|
||||||
|
{
|
||||||
|
using helpers::unescape_html;
|
||||||
|
|
||||||
|
std::vector<string> epub_filepaths{[&filepath, raw]
|
||||||
|
{
|
||||||
|
if (!raw)
|
||||||
|
{
|
||||||
|
return zip::list_spine(filepath);
|
||||||
|
}
|
||||||
|
return zip::list(filepath);
|
||||||
|
}()};
|
||||||
|
|
||||||
|
book current_book;
|
||||||
|
for (const auto &entry : epub_filepaths)
|
||||||
|
{
|
||||||
|
DEBUGLOG << "Processing " << entry;
|
||||||
|
document doc;
|
||||||
|
if (!raw)
|
||||||
|
{
|
||||||
|
doc = process_page(unescape_html(zip::read_file(filepath, entry)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
doc.text_raw = zip::read_file(filepath, entry);
|
||||||
|
}
|
||||||
|
current_book.files.insert({entry, doc});
|
||||||
|
}
|
||||||
|
|
||||||
|
return current_book;
|
||||||
|
}
|
||||||
|
|
||||||
|
document process_page(const std::string_view text)
|
||||||
|
{
|
||||||
|
string output{text};
|
||||||
|
static const boost::regex re_header_start{"<[hH][1-6]"};
|
||||||
|
static const boost::regex re_header_end{"</[hH][1-6]"};
|
||||||
|
static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
|
||||||
|
"(title|aria-label)"
|
||||||
|
"=\"([[:alnum:]]+)\""};
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t pos{0};
|
||||||
|
while ((pos = output.find_first_of("\n\t\r", pos)) != string::npos)
|
||||||
|
{
|
||||||
|
if (output[pos] == '\r')
|
||||||
|
{
|
||||||
|
output.erase(pos, 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
output.replace(pos, 1, " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
size_t pos{0};
|
||||||
|
while ((pos = output.find(" ", pos)) != string::npos)
|
||||||
|
{
|
||||||
|
output.replace(pos, 2, " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t pos{0};
|
||||||
|
document doc;
|
||||||
|
size_t headline_start{string::npos};
|
||||||
|
while ((pos = output.find('<', pos)) != string::npos)
|
||||||
|
{
|
||||||
|
auto endpos{output.find('>', pos) + 1};
|
||||||
|
|
||||||
|
if (boost::regex_match(output.substr(pos, 3), re_header_start))
|
||||||
|
{
|
||||||
|
headline_start = pos;
|
||||||
|
}
|
||||||
|
else if (boost::regex_match(output.substr(pos, 4), re_header_end))
|
||||||
|
{
|
||||||
|
if (headline_start != string::npos)
|
||||||
|
{
|
||||||
|
doc.headlines.insert(
|
||||||
|
{headline_start,
|
||||||
|
output.substr(headline_start, pos - headline_start)});
|
||||||
|
headline_start = string::npos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (output.substr(pos, 6) == "<span ")
|
||||||
|
{
|
||||||
|
boost::match_results<string::const_iterator> match;
|
||||||
|
using it_size_t = string::const_iterator::difference_type;
|
||||||
|
string::const_iterator begin{output.begin()
|
||||||
|
+ static_cast<it_size_t>(pos)};
|
||||||
|
string::const_iterator end{output.begin()
|
||||||
|
+ static_cast<it_size_t>(endpos)};
|
||||||
|
|
||||||
|
if (boost::regex_search(begin, end, match, re_pagebreak))
|
||||||
|
{
|
||||||
|
doc.pages.insert({pos, match[2].str()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (output.substr(pos, 7) == "<style "
|
||||||
|
|| output.substr(pos, 8) == "<script ")
|
||||||
|
{
|
||||||
|
if (output.find("/>", pos) > endpos)
|
||||||
|
{
|
||||||
|
endpos = output.find('>', endpos) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output.erase(pos, endpos - pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.text_cleaned = output;
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string headline(const document &doc, const size_t pos)
|
||||||
|
{
|
||||||
|
std::string_view last;
|
||||||
|
|
||||||
|
for (const auto &pair : doc.headlines)
|
||||||
|
{
|
||||||
|
if (pair.first > pos)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
last = pair.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(last);
|
||||||
|
}
|
||||||
|
|
||||||
|
string page(const document &doc, const size_t pos)
|
||||||
|
{
|
||||||
|
std::string_view last;
|
||||||
|
|
||||||
|
for (const auto &pair : doc.pages)
|
||||||
|
{
|
||||||
|
if (pair.first > pos)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
last = pair.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(last);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace epubgrep::book
|
63
src/book.hpp
Normal file
63
src/book.hpp
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/* This file is part of epubgrep.
|
||||||
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef EPUBGREP_BOOK_HPP
|
||||||
|
#define EPUBGREP_BOOK_HPP
|
||||||
|
|
||||||
|
#include "fs-compat.hpp"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
namespace epubgrep::book
|
||||||
|
{
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
//! Document inside EPUB.
|
||||||
|
struct document
|
||||||
|
{
|
||||||
|
string text_raw; //!< HTML page
|
||||||
|
string text_cleaned; //!< Plain text page
|
||||||
|
std::map<size_t, string> headlines; //!< pos, title
|
||||||
|
std::map<size_t, string> pages; //!< pos, page
|
||||||
|
string language; //!< Page language
|
||||||
|
} __attribute__((aligned(128)));
|
||||||
|
|
||||||
|
//! EPUB file.
|
||||||
|
struct book
|
||||||
|
{
|
||||||
|
std::map<string, document> files; //!< filename, file
|
||||||
|
std::map<string, string> toc; //!< title, href
|
||||||
|
string language; //!< Book language
|
||||||
|
} __attribute__((aligned(128)));
|
||||||
|
|
||||||
|
//! Read and process book.
|
||||||
|
[[nodiscard]] book read(fs::path filepath, bool raw);
|
||||||
|
|
||||||
|
//! Clean up page and record headlines and page numbers.
|
||||||
|
[[nodiscard]] document process_page(std::string_view text);
|
||||||
|
|
||||||
|
//! Return last headline if possible.
|
||||||
|
[[nodiscard]] std::string headline(const document &doc, size_t pos);
|
||||||
|
|
||||||
|
//! Return current page if possible.
|
||||||
|
[[nodiscard]] std::string page(const document &doc, size_t pos);
|
||||||
|
|
||||||
|
} // namespace epubgrep::book
|
||||||
|
|
||||||
|
#endif // EPUBGREP_BOOK_HPP
|
154
src/search.cpp
154
src/search.cpp
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
|
#include "book.hpp"
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
#include "helpers.hpp"
|
#include "helpers.hpp"
|
||||||
#include "log.hpp"
|
#include "log.hpp"
|
||||||
@ -73,33 +74,13 @@ std::vector<match> search(const fs::path &filepath,
|
|||||||
|
|
||||||
const boost::regex re(regex.data(), flags);
|
const boost::regex re(regex.data(), flags);
|
||||||
std::vector<match> matches;
|
std::vector<match> matches;
|
||||||
std::vector<string> epub_filepaths{[&opts, &filepath]
|
auto book{book::read(filepath, opts.raw)};
|
||||||
{
|
for (const auto &file : book.files)
|
||||||
if (!opts.raw)
|
|
||||||
{
|
|
||||||
return zip::list_spine(filepath);
|
|
||||||
}
|
|
||||||
return zip::list(filepath);
|
|
||||||
}()};
|
|
||||||
|
|
||||||
for (const auto &entry : epub_filepaths)
|
|
||||||
{
|
{
|
||||||
DEBUGLOG << "Processing " << entry;
|
const auto &doc{file.second};
|
||||||
file_in_epub file;
|
const auto &text{doc.text_cleaned};
|
||||||
{
|
string::const_iterator begin{text.begin()};
|
||||||
const auto document{zip::read_file(filepath, entry)};
|
string::const_iterator end{text.end()};
|
||||||
if (!opts.raw)
|
|
||||||
{
|
|
||||||
file = cleanup_text(helpers::unescape_html(document));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
file.text = document;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
string::const_iterator begin{file.text.begin()};
|
|
||||||
string::const_iterator end{file.text.end()};
|
|
||||||
auto begin_text{begin};
|
auto begin_text{begin};
|
||||||
boost::match_results<string::const_iterator> match_result;
|
boost::match_results<string::const_iterator> match_result;
|
||||||
|
|
||||||
@ -108,13 +89,13 @@ std::vector<match> search(const fs::path &filepath,
|
|||||||
{
|
{
|
||||||
match match; // FIXME: Rename variable or struct.
|
match match; // FIXME: Rename variable or struct.
|
||||||
match.filepath_epub = filepath;
|
match.filepath_epub = filepath;
|
||||||
match.filepath_inside = entry;
|
match.filepath_inside = file.first;
|
||||||
match.text = match_result[0];
|
match.text = match_result[0];
|
||||||
match.context = context(match_result, opts.context);
|
match.context = context(match_result, opts.context);
|
||||||
const auto pos = static_cast<size_t>(
|
const auto pos = static_cast<size_t>(
|
||||||
std::distance(begin_text, match_result[0].begin()));
|
std::distance(begin_text, match_result[0].begin()));
|
||||||
match.headline = headline(file, pos);
|
match.headline = headline(doc, pos);
|
||||||
match.page = page(file, pos);
|
match.page = page(doc, pos);
|
||||||
|
|
||||||
matches.emplace_back(match);
|
matches.emplace_back(match);
|
||||||
begin = match_result[0].end();
|
begin = match_result[0].end();
|
||||||
@ -124,89 +105,6 @@ std::vector<match> search(const fs::path &filepath,
|
|||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
file_in_epub cleanup_text(const std::string_view text)
|
|
||||||
{
|
|
||||||
string output{text};
|
|
||||||
static const boost::regex re_header_start{"<[hH][1-6]"};
|
|
||||||
static const boost::regex re_header_end{"</[hH][1-6]"};
|
|
||||||
static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
|
|
||||||
"(title|aria-label)"
|
|
||||||
"=\"([[:alnum:]]+)\""};
|
|
||||||
|
|
||||||
{
|
|
||||||
size_t pos{0};
|
|
||||||
while ((pos = output.find_first_of("\n\t\r", pos)) != string::npos)
|
|
||||||
{
|
|
||||||
if (output[pos] == '\r')
|
|
||||||
{
|
|
||||||
output.erase(pos, 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output.replace(pos, 1, " ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
size_t pos{0};
|
|
||||||
while ((pos = output.find(" ", pos)) != string::npos)
|
|
||||||
{
|
|
||||||
output.replace(pos, 2, " ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t pos{0};
|
|
||||||
file_in_epub file;
|
|
||||||
size_t headline_start{string::npos};
|
|
||||||
while ((pos = output.find('<', pos)) != string::npos)
|
|
||||||
{
|
|
||||||
auto endpos{output.find('>', pos) + 1};
|
|
||||||
|
|
||||||
if (boost::regex_match(output.substr(pos, 3), re_header_start))
|
|
||||||
{
|
|
||||||
headline_start = pos;
|
|
||||||
}
|
|
||||||
else if (boost::regex_match(output.substr(pos, 4), re_header_end))
|
|
||||||
{
|
|
||||||
if (headline_start != string::npos)
|
|
||||||
{
|
|
||||||
file.headlines.insert(
|
|
||||||
{headline_start,
|
|
||||||
output.substr(headline_start, pos - headline_start)});
|
|
||||||
headline_start = string::npos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (output.substr(pos, 6) == "<span ")
|
|
||||||
{
|
|
||||||
boost::match_results<string::const_iterator> match;
|
|
||||||
using it_size_t = string::const_iterator::difference_type;
|
|
||||||
string::const_iterator begin{output.begin()
|
|
||||||
+ static_cast<it_size_t>(pos)};
|
|
||||||
string::const_iterator end{output.begin()
|
|
||||||
+ static_cast<it_size_t>(endpos)};
|
|
||||||
|
|
||||||
if (boost::regex_search(begin, end, match, re_pagebreak))
|
|
||||||
{
|
|
||||||
file.pages.insert({pos, match[2].str()});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (output.substr(pos, 7) == "<style "
|
|
||||||
|| output.substr(pos, 8) == "<script ")
|
|
||||||
{
|
|
||||||
if (output.find("/>", pos) > endpos)
|
|
||||||
{
|
|
||||||
endpos = output.find('>', endpos) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
output.erase(pos, endpos - pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
file.text = output;
|
|
||||||
|
|
||||||
return file;
|
|
||||||
}
|
|
||||||
|
|
||||||
match_context context(const boost::match_results<string::const_iterator> &match,
|
match_context context(const boost::match_results<string::const_iterator> &match,
|
||||||
std::uint64_t words)
|
std::uint64_t words)
|
||||||
{
|
{
|
||||||
@ -270,36 +168,4 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
|||||||
return {before, after};
|
return {before, after};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string headline(const file_in_epub &file, const size_t pos)
|
|
||||||
{
|
|
||||||
std::string_view last;
|
|
||||||
|
|
||||||
for (const auto &pair : file.headlines)
|
|
||||||
{
|
|
||||||
if (pair.first > pos)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
last = pair.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
return string(last);
|
|
||||||
}
|
|
||||||
|
|
||||||
string page(const file_in_epub &file, const size_t pos)
|
|
||||||
{
|
|
||||||
std::string_view last;
|
|
||||||
|
|
||||||
for (const auto &pair : file.pages)
|
|
||||||
{
|
|
||||||
if (pair.first > pos)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
last = pair.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
return string(last);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
@ -66,20 +66,11 @@ struct file_in_epub
|
|||||||
std::string_view regex,
|
std::string_view regex,
|
||||||
const settings &opts);
|
const settings &opts);
|
||||||
|
|
||||||
//! Strip HTML, remove newlines, condense spaces.
|
|
||||||
[[nodiscard]] file_in_epub cleanup_text(std::string_view text);
|
|
||||||
|
|
||||||
//! Return words before and after the match.
|
//! Return words before and after the match.
|
||||||
[[nodiscard]] match_context
|
[[nodiscard]] match_context
|
||||||
context(const boost::match_results<std::string::const_iterator> &match,
|
context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
std::uint64_t words);
|
std::uint64_t words);
|
||||||
|
|
||||||
//! Return last headline if possible.
|
|
||||||
[[nodiscard]] std::string headline(const file_in_epub &file, size_t pos);
|
|
||||||
|
|
||||||
//! Return current page if possible.
|
|
||||||
[[nodiscard]] std::string page(const file_in_epub &file, size_t pos);
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
||||||
#endif // EPUBGREP_SEARCH_HPP
|
#endif // EPUBGREP_SEARCH_HPP
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include "book.hpp"
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
@ -26,7 +27,7 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = "Moss";
|
text = "Moss";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
text = epubgrep::search::cleanup_text(text).text;
|
text = epubgrep::book::process_page(text).text_cleaned;
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -46,7 +47,7 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = "💖\r\r🦝";
|
text = "💖\r\r🦝";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
text = epubgrep::search::cleanup_text(text).text;
|
text = epubgrep::book::process_page(text).text_cleaned;
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -66,7 +67,7 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = "Moss\n\n\n\n\n\nis good.";
|
text = "Moss\n\n\n\n\n\nis good.";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
text = epubgrep::search::cleanup_text(text).text;
|
text = epubgrep::book::process_page(text).text_cleaned;
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -91,8 +92,8 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = "… <h3>Soup</h3> …";
|
text = "… <h3>Soup</h3> …";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto file{epubgrep::search::cleanup_text(text)};
|
auto file{epubgrep::book::process_page(text)};
|
||||||
text = epubgrep::search::headline(file, text.size());
|
text = epubgrep::book::headline(file, text.size());
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -113,8 +114,8 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
"road to nowhere</h2> …";
|
"road to nowhere</h2> …";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto file{epubgrep::search::cleanup_text(text)};
|
auto file{epubgrep::book::process_page(text)};
|
||||||
text = epubgrep::search::headline(file, text.size());
|
text = epubgrep::book::headline(file, text.size());
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -134,8 +135,8 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = "<html><hr>The long<section>road to nowhere</section>";
|
text = "<html><hr>The long<section>road to nowhere</section>";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto file{epubgrep::search::cleanup_text(text)};
|
auto file{epubgrep::book::process_page(text)};
|
||||||
text = epubgrep::search::headline(file, text.size());
|
text = epubgrep::book::headline(file, text.size());
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -160,8 +161,8 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
|
text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto file{epubgrep::search::cleanup_text(text)};
|
auto file{epubgrep::book::process_page(text)};
|
||||||
text = epubgrep::search::page(file, text.size());
|
text = epubgrep::book::page(file, text.size());
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
@ -181,8 +182,8 @@ SCENARIO("Searching helpers work as intended")
|
|||||||
text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
|
text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto file{epubgrep::search::cleanup_text(text)};
|
auto file{epubgrep::book::process_page(text)};
|
||||||
text = epubgrep::search::page(file, text.size());
|
text = epubgrep::book::page(file, text.size());
|
||||||
}
|
}
|
||||||
catch (const std::exception &)
|
catch (const std::exception &)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user