epubgrep/tests/test_search_helpers.cpp

202 lines
5.9 KiB
C++

#include "fs-compat.hpp"
#include "search.hpp"
#include <catch.hpp>
#include <clocale>
#include <exception>
#include <string>
SCENARIO("Searching helpers work as intended")
{
GIVEN("Our test zip file")
{
fs::path zipfile{"test.zip"};
std::setlocale(LC_CTYPE, ""); // Needed for utf-8 support in libarchive.
bool exception{false};
REQUIRE(fs::exists(zipfile));
SECTION("cleanup_texts() does what it should do")
{
std::string text;
WHEN("Text is a single word")
{
text = "Moss";
try
{
epubgrep::search::cleanup_text(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The text is unchanged")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "Moss");
}
}
WHEN("Text has 2 \\r next to each other in it.")
{
text = "💖\r\r🦝";
try
{
epubgrep::search::cleanup_text(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The \\r are removed unchanged")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "💖🦝");
}
}
WHEN("Text has 6 \\n after another in it")
{
text = "Moss\n\n\n\n\n\nis good.";
try
{
epubgrep::search::cleanup_text(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The 6 \\n are condensed to one space")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "Moss is good.");
}
}
}
SECTION("headline() does what it should do")
{
std::string text;
WHEN("We have a text with a h3 headline")
{
text = "… <h3>Soup</h3> …";
try
{
epubgrep::search::cleanup_text(text);
text = epubgrep::search::headline(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The headline is correctly extracted")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "Soup");
}
}
WHEN("There is a <span> in the h2 headline")
{
text = "… <h2>The <span class=\"long\">long</span> "
"road to nowhere</h2> …";
try
{
epubgrep::search::cleanup_text(text);
text = epubgrep::search::headline(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The headline is correctly extracted")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "The long road to nowhere");
}
}
WHEN("There are tags that start with h but are not headlines")
{
text = "<html><hr>The long<section>road to nowhere</section>";
try
{
epubgrep::search::cleanup_text(text);
text = epubgrep::search::headline(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("No headline is extracted")
{
REQUIRE_FALSE(exception);
REQUIRE(text.empty());
}
}
}
SECTION("page() does what it should do")
{
std::string text;
WHEN("We have text with epub:type pagebreak number 69")
{
text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
try
{
epubgrep::search::cleanup_text(text);
text = epubgrep::search::page(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The page number is correctly extracted")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "69");
}
}
WHEN("We have text with doc-pagebreak number 69")
{
text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
try
{
epubgrep::search::cleanup_text(text);
text = epubgrep::search::page(text);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("The page number is correctly extracted")
{
REQUIRE_FALSE(exception);
REQUIRE(text == "69");
}
}
}
}
}