epubgrep/src/search.cpp

/*  This file is part of epubgrep.
 *  Copyright © 2021 tastytea <tastytea@tastytea.de>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, version 3.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "search.hpp"

#include "fs-compat.hpp"
#include "zip.hpp"

#include <boost/regex.hpp>

#include <algorithm>
#include <array>
#include <string>
#include <string_view>
#include <vector>

namespace epubgrep::search
{

using std::string;

std::vector<match> search(const fs::path &filepath,
                          const std::string_view regex, const settings &opts)
{
    boost::regex::flag_type flags{};

    switch (opts.regex)
    {
    case options::regex_kind::basic:
    {
        flags = opts.grep_like ? boost::regex::grep : boost::regex::basic;
        break;
    }
    case options::regex_kind::extended:
    {
        flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended;
        break;
    }
    case options::regex_kind::perl:
    {
        flags = boost::regex::perl;
        break;
    }
    }

    if (opts.ignore_case)
    {
        flags |= boost::regex::icase;
    }

    const boost::regex re(regex.data(), flags);
    std::vector<match> matches;
    std::vector<string> epub_filepaths{[&opts, &filepath]
                                       {
                                           if (!opts.raw)
                                           {
                                               return zip::list_spine(filepath);
                                           }
                                           return zip::list(filepath);
                                       }()};

    for (const auto &entry : epub_filepaths)
    {
        auto document{zip::read_file(filepath, entry)};
        if (!opts.raw)
        {
            cleanup_text(document);
        }

        string::const_iterator begin{document.begin()};
        string::const_iterator end{document.end()};
        boost::match_results<string::const_iterator> match_result;
        string last_headline;
        string last_page;

        while (boost::regex_search(begin, end, match_result, re,
                                   boost::match_default))
        {
            match match; // FIXME: Rename variable or struct.
            match.epub_filepath = filepath;
            match.filepath = entry;
            match.text = match_result[0];
            match.context = context(match_result, opts.context);
            const auto current_headline{headline(match_result.prefix().str())};
            if (!current_headline.empty())
            {
                last_headline = current_headline;
            }
            match.headline = last_headline;
            const auto current_page{page(match_result.prefix().str())};
            if (!current_page.empty())
            {
                last_page = current_page;
            }
            match.page = last_page;

            matches.emplace_back(match);
            begin = match_result[0].second;
        }
    }

    return matches;
}

void cleanup_text(string &text)
{
    static const boost::regex re_header_start{"<[hH][1-6]"};
    static const boost::regex re_header_end{"</[hH][1-6]"};
    static const boost::regex re_pagebreak{".+pagebreak.+(title|aria-label)"
                                           "=\"([[:alnum:]]+)\".*"};

    size_t pos{};
    while ((pos = text.find('<', pos)) != string::npos)
    {
        // Mark headlines. We need them later on.
        string replacement;
        if (boost::regex_match(text.substr(pos, 3), re_header_start))
        {
            replacement = "<H>";
        }
        else if (boost::regex_match(text.substr(pos, 4), re_header_end))
        {
            replacement = "</H>";
        }
        else if (text.substr(pos, 6) == "<span ")
        {
            auto endpos{text.find('>')};
            boost::match_results<const char *> match;
            if (boost::regex_search(text.substr(pos, endpos).data(), match,
                                    re_pagebreak))
            {
                replacement = "<PAGE " + match[2] + ">";
            }
        }
        else if (text.substr(pos, 7) == "<style "
                 || text.substr(pos, 8) == "<script ")
        {
            pos = text.find('>', pos) + 1;
        }

        text.replace(pos, text.find('>', pos) + 1 - pos, replacement);
        pos += replacement.length();
    }

    pos = 0;
    while ((pos = text.find('\r', pos)) != string::npos)
    {
        text.erase(pos, 1);
    }

    pos = 0;
    while ((pos = text.find('\n', pos)) != string::npos)
    {
        text.replace(pos, 1, " ");
    }

    pos = 0;
    while ((pos = text.find("  ", pos)) != string::npos)
    {
        text.replace(pos, 2, " ");
    }
}

match_context context(const boost::match_results<string::const_iterator> &match,
                      std::uint64_t words)
{
    if (words == 0)
    {
        return {};
    }

    ++words;

    const auto &rbegin_before{std::reverse_iterator(match.prefix().end())};
    const auto &rend_before{std::reverse_iterator(match.prefix().begin())};

    const auto &begin_after{match.suffix().begin()};
    const auto &end_after{match.suffix().end()};

    auto pos_before{rbegin_before};
    auto pos_after{begin_after};

    const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
    auto is_whitespace{
        [&whitespace](char check)
        {
            return std::any_of(whitespace.begin(), whitespace.end(),
                               [&check](const char ws) { return check == ws; });
        }};

    while (words != 0)
    {
        if (pos_before != rend_before)
        {
            pos_before = std::find_first_of(pos_before, rend_before,
                                            whitespace.begin(),
                                            whitespace.end());
            if (pos_before != rend_before)
            {
                while (is_whitespace(*pos_before))
                {
                    ++pos_before;
                }
            }
        }

        if (pos_after != end_after)
        {
            pos_after = std::find_first_of(pos_after, end_after,
                                           whitespace.begin(),
                                           whitespace.end());
            if (pos_after != end_after)
            {
                while (is_whitespace(*pos_after))
                {
                    ++pos_after;
                }
            }
        }
        words -= 1;
    }

    const std::string prefix_reversed(rbegin_before, pos_before);
    string prefix(prefix_reversed.rbegin(), prefix_reversed.rend());
    std::string suffix(begin_after, pos_after);
    while (is_whitespace(*prefix.begin()))
    {
        prefix.erase(0, 1);
    }
    while (is_whitespace(*suffix.rbegin()))
    {
        suffix.erase(suffix.size() - 1);
    }

    return {prefix, suffix};
}

string headline(const std::string_view prefix)
{
    size_t pos{prefix.length()};
    while ((pos = prefix.rfind("<H>", pos)) != std::string_view::npos)
    {
        pos += 3;
        return string{prefix.substr(pos, prefix.find('<', pos) - pos)};
    }

    return {};
}

string page(const std::string_view prefix)
{
    size_t pos{prefix.length()};
    while ((pos = prefix.rfind("<PAGE ", pos)) != std::string_view::npos)
    {
        pos += 6;
        return string{prefix.substr(pos, prefix.find('>', pos) - pos)};
    }

    return {};
}

} // namespace epubgrep::search
Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00			`/* This file is part of epubgrep.`
			`* Copyright © 2021 tastytea <tastytea@tastytea.de>`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU Affero General Public License as published by`
			`* the Free Software Foundation, version 3.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU Affero General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Affero General Public License`
			`* along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`*/`

			`#include "search.hpp"`

			`#include "fs-compat.hpp"`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`#include "zip.hpp"`
Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`#include <boost/regex.hpp>`

			`#include <algorithm>`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`#include <array>`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`#include <string>`
Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00			`#include <string_view>`
			`#include <vector>`

			`namespace epubgrep::search`
			`{`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`using std::string;`

Make regular expressions static variables. Fewer allocations → faster program. About 17% speed increase with 89 books on up to 3 cores. Measured using the average of 4 runs. Before: ~15,5 seconds After: ~12,8 seconds Calls to allocation functions went down from 16.652.583 to 5.059.301. 2021-05-28 19:07:27 +02:00			`std::vector<match> search(const fs::path &filepath,`
			`const std::string_view regex, const settings &opts)`
Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00			`{`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`boost::regex::flag_type flags{};`

			`switch (opts.regex)`
			`{`
Rework option parsing, change --no-filename. Options are now better accessible, --no-filename accepts the values filesystem, in-epub or all. 2021-05-27 17:20:00 +02:00			`case options::regex_kind::basic:`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
			`flags = opts.grep_like ? boost::regex::grep : boost::regex::basic;`
			`break;`
			`}`
Rework option parsing, change --no-filename. Options are now better accessible, --no-filename accepts the values filesystem, in-epub or all. 2021-05-27 17:20:00 +02:00			`case options::regex_kind::extended:`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
			`flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended;`
			`break;`
			`}`
Rework option parsing, change --no-filename. Options are now better accessible, --no-filename accepts the values filesystem, in-epub or all. 2021-05-27 17:20:00 +02:00			`case options::regex_kind::perl:`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
			`flags = boost::regex::perl;`
			`break;`
			`}`
			`}`

			`if (opts.ignore_case)`
			`{`
			`flags \|= boost::regex::icase;`
			`}`

Make regex const. 2021-05-27 09:46:59 +02:00			`const boost::regex re(regex.data(), flags);`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`std::vector<match> matches;`
Only search files in spine, in the right order. The spine lists all content documents in their linear reading order. So we're finally getting our results in the right order! 🎉 Since we skip the images and fonts, which usually make up the most bytes in an EPUB file, the performance increase is immense. I measured 60-70% in a very short test. Closes: https://schlomp.space/tastytea/epubgrep/issues/1 2021-05-29 15:50:03 +02:00			`std::vector<string> epub_filepaths{[&opts, &filepath]`
			`{`
			`if (!opts.raw)`
			`{`
			`return zip::list_spine(filepath);`
			`}`
			`return zip::list(filepath);`
			`}()};`

			`for (const auto &entry : epub_filepaths)`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
			`auto document{zip::read_file(filepath, entry)};`
Clean up text before searching. 2021-05-24 16:01:41 +02:00			`if (!opts.raw)`
			`{`
			`cleanup_text(document);`
			`}`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`string::const_iterator begin{document.begin()};`
			`string::const_iterator end{document.end()};`
			`boost::match_results<string::const_iterator> match_result;`
			`string last_headline;`
			`string last_page;`
Extract headlines. 2021-05-24 17:18:10 +02:00
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`while (boost::regex_search(begin, end, match_result, re,`
			`boost::match_default))`
			`{`
			`match match; // FIXME: Rename variable or struct.`
Print the EPUB file name if more than 1 input file. Change --no-filename to mean: Don't print the EPUB file name. 2021-05-27 14:46:23 +02:00			`match.epub_filepath = filepath;`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`match.filepath = entry;`
			`match.text = match_result[0];`
			`match.context = context(match_result, opts.context);`
Extract headlines. 2021-05-24 17:18:10 +02:00			`const auto current_headline{headline(match_result.prefix().str())};`
			`if (!current_headline.empty())`
			`{`
			`last_headline = current_headline;`
			`}`
			`match.headline = last_headline;`
Extract page numbers. 2021-05-24 18:56:43 +02:00			`const auto current_page{page(match_result.prefix().str())};`
			`if (!current_page.empty())`
			`{`
			`last_page = current_page;`
			`}`
			`match.page = last_page;`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00
			`matches.emplace_back(match);`
			`begin = match_result[0].second;`
			`}`
			`}`

			`return matches;`
			`}`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`void cleanup_text(string &text)`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`{`
Make regular expressions static variables. Fewer allocations → faster program. About 17% speed increase with 89 books on up to 3 cores. Measured using the average of 4 runs. Before: ~15,5 seconds After: ~12,8 seconds Calls to allocation functions went down from 16.652.583 to 5.059.301. 2021-05-28 19:07:27 +02:00			`static const boost::regex re_header_start{"<[hH][1-6]"};`
Fix end-of-headline detection. 2021-05-29 23:00:16 +02:00			`static const boost::regex re_header_end{"</[hH][1-6]"};`
Make regular expressions static variables. Fewer allocations → faster program. About 17% speed increase with 89 books on up to 3 cores. Measured using the average of 4 runs. Before: ~15,5 seconds After: ~12,8 seconds Calls to allocation functions went down from 16.652.583 to 5.059.301. 2021-05-28 19:07:27 +02:00			`static const boost::regex re_pagebreak{".+pagebreak.+(title\|aria-label)"`
			`"=\"([[:alnum:]]+)\".*"};`

Don't replace stuff in search::cleanup_text() if nothing matched. 2021-05-24 19:58:59 +02:00			`size_t pos{};`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`while ((pos = text.find('<', pos)) != string::npos)`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`{`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`// Mark headlines. We need them later on.`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`string replacement;`
Make regular expressions static variables. Fewer allocations → faster program. About 17% speed increase with 89 books on up to 3 cores. Measured using the average of 4 runs. Before: ~15,5 seconds After: ~12,8 seconds Calls to allocation functions went down from 16.652.583 to 5.059.301. 2021-05-28 19:07:27 +02:00			`if (boost::regex_match(text.substr(pos, 3), re_header_start))`
Don't strip headlines. 2021-05-24 16:37:30 +02:00			`{`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`replacement = "<H>";`
Don't strip headlines. 2021-05-24 16:37:30 +02:00			`}`
Fix end-of-headline detection. 2021-05-29 23:00:16 +02:00			`else if (boost::regex_match(text.substr(pos, 4), re_header_end))`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`{`
			`replacement = "</H>";`
			`}`
Remove <style> and <script> snippets. Closes: https://schlomp.space/tastytea/epubgrep/issues/8 2021-05-29 18:49:35 +02:00			`else if (text.substr(pos, 6) == "<span ")`
Extract page numbers. 2021-05-24 18:56:43 +02:00			`{`
			`auto endpos{text.find('>')};`
			`boost::match_results<const char *> match;`
			`if (boost::regex_search(text.substr(pos, endpos).data(), match,`
			`re_pagebreak))`
			`{`
			`replacement = "<PAGE " + match[2] + ">";`
			`}`
			`}`
Remove <style> and <script> snippets. Closes: https://schlomp.space/tastytea/epubgrep/issues/8 2021-05-29 18:49:35 +02:00			`else if (text.substr(pos, 7) == "<style "`
			`\|\| text.substr(pos, 8) == "<script ")`
			`{`
			`pos = text.find('>', pos) + 1;`
			`}`

Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`text.replace(pos, text.find('>', pos) + 1 - pos, replacement);`
			`pos += replacement.length();`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`}`

Don't replace stuff in search::cleanup_text() if nothing matched. 2021-05-24 19:58:59 +02:00			`pos = 0;`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`while ((pos = text.find('\r', pos)) != string::npos)`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`{`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`text.erase(pos, 1);`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`}`

Don't replace stuff in search::cleanup_text() if nothing matched. 2021-05-24 19:58:59 +02:00			`pos = 0;`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`while ((pos = text.find('\n', pos)) != string::npos)`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`{`
			`text.replace(pos, 1, " ");`
			`}`

Don't replace stuff in search::cleanup_text() if nothing matched. 2021-05-24 19:58:59 +02:00			`pos = 0;`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`while ((pos = text.find(" ", pos)) != string::npos)`
Move cleanup_text(), document functions. 2021-05-24 16:23:07 +02:00			`{`
			`text.replace(pos, 2, " ");`
			`}`
			`}`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`match_context context(const boost::match_results<string::const_iterator> &match,`
			`std::uint64_t words)`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`if (words == 0)`
			`{`
			`return {};`
			`}`

Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`++words;`

Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`const auto &rbegin_before{std::reverse_iterator(match.prefix().end())};`
			`const auto &rend_before{std::reverse_iterator(match.prefix().begin())};`

			`const auto &begin_after{match.suffix().begin()};`
			`const auto &end_after{match.suffix().end()};`

			`auto pos_before{rbegin_before};`
			`auto pos_after{begin_after};`

			`const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};`
			`auto is_whitespace{`
			`[&whitespace](char check)`
			`{`
			`return std::any_of(whitespace.begin(), whitespace.end(),`
			`[&check](const char ws) { return check == ws; });`
			`}};`

Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`while (words != 0)`
			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`if (pos_before != rend_before)`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`pos_before = std::find_first_of(pos_before, rend_before,`
			`whitespace.begin(),`
			`whitespace.end());`
			`if (pos_before != rend_before)`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`while (is_whitespace(*pos_before))`
			`{`
			`++pos_before;`
			`}`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`}`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`}`

Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`if (pos_after != end_after)`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`pos_after = std::find_first_of(pos_after, end_after,`
			`whitespace.begin(),`
			`whitespace.end());`
			`if (pos_after != end_after)`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`while (is_whitespace(*pos_after))`
			`{`
			`++pos_after;`
			`}`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`}`
Add basic search functionality and context output. 2021-05-24 15:35:49 +02:00			`}`
			`words -= 1;`
			`}`

Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`const std::string prefix_reversed(rbegin_before, pos_before);`
			`string prefix(prefix_reversed.rbegin(), prefix_reversed.rend());`
			`std::string suffix(begin_after, pos_after);`
			`while (is_whitespace(*prefix.begin()))`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`prefix.erase(0, 1);`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`}`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`while (is_whitespace(*suffix.rbegin()))`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`{`
Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`suffix.erase(suffix.size() - 1);`
Fix bugs in search::context(). - Don't add context if words == 0 - Handle beginning / end of text correctly. 2021-05-24 19:57:15 +02:00			`}`

Use iterators in search::context() and don't return extra whitespace Should be easier to understand now. 2021-05-30 13:31:59 +02:00			`return {prefix, suffix};`
Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00			`}`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`string headline(const std::string_view prefix)`
Extract headlines. 2021-05-24 17:18:10 +02:00			`{`
			`size_t pos{prefix.length()};`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`while ((pos = prefix.rfind("<H>", pos)) != std::string_view::npos)`
Extract headlines. 2021-05-24 17:18:10 +02:00			`{`
Wrap headlines in <H> and </H> during cleanup. 2021-05-24 18:08:40 +02:00			`pos += 3;`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`return string{prefix.substr(pos, prefix.find('<', pos) - pos)};`
Extract headlines. 2021-05-24 17:18:10 +02:00			`}`

			`return {};`
			`}`

Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`string page(const std::string_view prefix)`
Extract page numbers. 2021-05-24 18:56:43 +02:00			`{`
			`size_t pos{prefix.length()};`
			`while ((pos = prefix.rfind("<PAGE ", pos)) != std::string_view::npos)`
			`{`
			`pos += 6;`
Import std::string into epubgrep::search namespace. 2021-05-26 18:02:27 +02:00			`return string{prefix.substr(pos, prefix.find('>', pos) - pos)};`
Extract page numbers. 2021-05-24 18:56:43 +02:00			`}`

			`return {};`
			`}`

Add skeleton for search::search(). - Type for matches - Type for options. 2021-05-24 07:52:36 +02:00			`} // namespace epubgrep::search`