epubgrep/tests/test_search_zip.cpp
tastytea 00e3edb9f2
Only search files in spine, in the right order.
The spine lists all content documents in their linear reading order. So we're
finally getting our results in the right order! 🎉

Since we skip the images and fonts, which usually make up the most bytes in an
EPUB file, the performance increase is immense. I measured 60-70% in a very
short test.

Closes: #1
2021-05-29 17:34:43 +02:00

138 lines
5.1 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "fs-compat.hpp"
#include "options.hpp"
#include "search.hpp"
#include <catch.hpp>
#include <clocale>
#include <exception>
#include <string>
#include <vector>
SCENARIO("Searching works")
{
GIVEN("Our test zip file")
{
fs::path zipfile{"test.zip"};
std::setlocale(LC_CTYPE, ""); // Needed for utf-8 support in libarchive.
bool exception{false};
REQUIRE(fs::exists(zipfile));
SECTION("search() doesn't fail and returns the right lines")
{
std::vector<epubgrep::search::match> matches;
epubgrep::search::settings opts;
opts.raw = true;
WHEN("We search for ‘📙+\\w? using extended regular expressions")
{
try
{
opts.regex = epubgrep::options::regex_kind::extended;
matches = epubgrep::search::search(zipfile, "📙+\\w?", opts);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("It returns the match correctly")
{
REQUIRE_FALSE(exception);
REQUIRE(matches.at(0).filepath == "test folder/😊");
REQUIRE(matches.at(0).text == "📙");
}
}
WHEN("We search for ‘📗’ with context = 1")
{
try
{
opts.context = 1;
matches = epubgrep::search::search(zipfile, "📗", opts);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("It returns the match correctly")
{
REQUIRE_FALSE(exception);
REQUIRE(matches.at(0).filepath == "test folder/😊");
REQUIRE(matches.at(0).text == "📗");
REQUIRE(matches.at(0).context.first == "📖\n\n📘");
REQUIRE(matches.at(0).context.second == "📙\n");
}
}
WHEN("We search for [ \\n] with context = 1.")
{
try
{
opts.context = 1;
opts.regex = epubgrep::options::regex_kind::perl;
matches = epubgrep::search::search(zipfile, R"([ \n])",
opts);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("It returns the match correctly")
{
// I looked at this a week or so after I've written it, and
// I have come to the realization that this is a tiny bit
// more complicated than strictly required. 😄
// TODO: Rewrite test.zip and tests to be better
// understandable.
REQUIRE_FALSE(exception);
REQUIRE(matches.at(1).filepath == "test folder/test file");
REQUIRE(matches.at(1).text == " ");
REQUIRE(matches.at(1).context.first == "don't");
REQUIRE(matches.at(1).context.second == "want to");
REQUIRE(matches.at(10).filepath == "test folder/😊");
REQUIRE(matches.at(10).text == "\n");
REQUIRE(matches.at(10).context.first == "📖");
REQUIRE(matches.at(10).context.second == "\n📘📗📙\n");
REQUIRE(matches.at(12).filepath == "test folder/😊");
REQUIRE(matches.at(12).text == "\n");
REQUIRE(matches.at(12).context.first == "📘📗📙");
REQUIRE(matches.at(12).context.second.empty());
}
}
WHEN(R"(We search for work\s[\w]+\.\W[\w']+\Wstay )"
R"(using Perl regular expressions. context = 1.)")
{
try
{
opts.context = 1;
opts.regex = epubgrep::options::regex_kind::extended;
matches = epubgrep::search::search(
zipfile, R"(work\s[\w]+\.\W[\w']+\Wstay)", opts);
}
catch (const std::exception &)
{
exception = true;
}
THEN("No exception is thrown")
AND_THEN("It returns the match correctly")
{
REQUIRE_FALSE(exception);
REQUIRE(matches.at(0).filepath == "test folder/test file");
REQUIRE(matches.at(0).text == "work today.\nI'm stay");
REQUIRE(matches.at(0).context.first == "to ");
REQUIRE(matches.at(0).context.second == "ing in");
}
}
}
}
}