2021-05-24 21:38:44 +02:00
|
|
|
#include "fs-compat.hpp"
|
|
|
|
#include "search.hpp"
|
|
|
|
|
|
|
|
#include <catch.hpp>
|
|
|
|
|
|
|
|
#include <clocale>
|
|
|
|
#include <exception>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
SCENARIO("Searching helpers work as intended")
|
|
|
|
{
|
|
|
|
GIVEN("Our test zip file")
|
|
|
|
{
|
|
|
|
fs::path zipfile{"test.zip"};
|
|
|
|
std::setlocale(LC_CTYPE, ""); // Needed for utf-8 support in libarchive.
|
|
|
|
bool exception{false};
|
|
|
|
|
|
|
|
REQUIRE(fs::exists(zipfile));
|
|
|
|
|
|
|
|
SECTION("cleanup_texts() does what it should do")
|
|
|
|
{
|
|
|
|
std::string text;
|
|
|
|
|
|
|
|
WHEN("Text is a single word")
|
|
|
|
{
|
|
|
|
text = "Moss";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The text is unchanged")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "Moss");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
WHEN("Text has 2 \\r next to each other in it.")
|
|
|
|
{
|
|
|
|
text = "💖\r\r🦝";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The \\r are removed unchanged")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "💖🦝");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
WHEN("Text has 6 \\n after another in it")
|
|
|
|
{
|
|
|
|
text = "Moss\n\n\n\n\n\nis good.";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The 6 \\n are condensed to one space")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "Moss is good.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SECTION("headline() does what it should do")
|
|
|
|
{
|
|
|
|
std::string text;
|
|
|
|
|
|
|
|
WHEN("We have a text with a h3 headline")
|
|
|
|
{
|
|
|
|
text = "… <h3>Soup</h3> …";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
text = epubgrep::search::headline(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The headline is correctly extracted")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "Soup");
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 08:53:48 +02:00
|
|
|
|
|
|
|
WHEN("There is a <span> in the h2 headline")
|
|
|
|
{
|
|
|
|
text = "… <h2>The <span class=\"long\">long</span> "
|
|
|
|
"road to nowhere</h2> …";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
text = epubgrep::search::headline(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The headline is correctly extracted")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "The long road to nowhere");
|
|
|
|
}
|
|
|
|
}
|
2021-05-29 23:00:40 +02:00
|
|
|
|
|
|
|
WHEN("There are tags that start with h but are not headlines")
|
|
|
|
{
|
|
|
|
text = "<html><hr>The long<section>road to nowhere</section>";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
text = epubgrep::search::headline(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("No headline is extracted")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text.empty());
|
|
|
|
}
|
|
|
|
}
|
2021-05-24 21:38:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
SECTION("page() does what it should do")
|
|
|
|
{
|
|
|
|
std::string text;
|
|
|
|
|
|
|
|
WHEN("We have text with epub:type pagebreak number 69")
|
|
|
|
{
|
|
|
|
text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
text = epubgrep::search::page(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The page number is correctly extracted")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "69");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
WHEN("We have text with doc-pagebreak number 69")
|
|
|
|
{
|
|
|
|
text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
epubgrep::search::cleanup_text(text);
|
|
|
|
text = epubgrep::search::page(text);
|
|
|
|
}
|
|
|
|
catch (const std::exception &)
|
|
|
|
{
|
|
|
|
exception = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
THEN("No exception is thrown")
|
|
|
|
AND_THEN("The page number is correctly extracted")
|
|
|
|
{
|
|
|
|
REQUIRE_FALSE(exception);
|
|
|
|
REQUIRE(text == "69");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|