Add basic search functionality and context output.
This commit is contained in:
parent
4e01032c6f
commit
1979956f03
@ -22,7 +22,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
|||||||
include(cmake/debug_flags.cmake)
|
include(cmake/debug_flags.cmake)
|
||||||
|
|
||||||
# All dependencies except test dependencies.
|
# All dependencies except test dependencies.
|
||||||
find_package(Boost 1.65.0 REQUIRED COMPONENTS program_options locale)
|
find_package(Boost 1.65.0 REQUIRED COMPONENTS locale program_options regex)
|
||||||
find_package(Gettext REQUIRED)
|
find_package(Gettext REQUIRED)
|
||||||
find_package(Filesystem REQUIRED COMPONENTS Final Experimental)
|
find_package(Filesystem REQUIRED COMPONENTS Final Experimental)
|
||||||
find_package(LibArchive 3.2 REQUIRED)
|
find_package(LibArchive 3.2 REQUIRED)
|
||||||
|
@ -17,8 +17,9 @@ unset(headers_src)
|
|||||||
|
|
||||||
target_link_libraries(${PROJECT_NAME}_lib
|
target_link_libraries(${PROJECT_NAME}_lib
|
||||||
PUBLIC
|
PUBLIC
|
||||||
Boost::program_options
|
|
||||||
Boost::locale
|
Boost::locale
|
||||||
|
Boost::program_options
|
||||||
|
Boost::regex
|
||||||
std::filesystem
|
std::filesystem
|
||||||
LibArchive::LibArchive
|
LibArchive::LibArchive
|
||||||
fmt::fmt)
|
fmt::fmt)
|
||||||
|
@ -85,15 +85,15 @@ int main(int argc, char *argv[])
|
|||||||
search::options opts;
|
search::options opts;
|
||||||
if (vm.count("basic-regexp") > 0)
|
if (vm.count("basic-regexp") > 0)
|
||||||
{
|
{
|
||||||
opts.regex = search::regex_type::basic;
|
opts.regex = search::regex_kind::basic;
|
||||||
}
|
}
|
||||||
if (vm.count("extended-regexp") > 0)
|
if (vm.count("extended-regexp") > 0)
|
||||||
{
|
{
|
||||||
opts.regex = search::regex_type::extended;
|
opts.regex = search::regex_kind::extended;
|
||||||
}
|
}
|
||||||
if (vm.count("perl-regexp") > 0)
|
if (vm.count("perl-regexp") > 0)
|
||||||
{
|
{
|
||||||
opts.regex = search::regex_type::perl;
|
opts.regex = search::regex_kind::perl;
|
||||||
}
|
}
|
||||||
if (vm.count("grep") > 0)
|
if (vm.count("grep") > 0)
|
||||||
{
|
{
|
||||||
|
@ -17,7 +17,12 @@
|
|||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
|
#include "zip.hpp"
|
||||||
|
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -27,7 +32,81 @@ namespace epubgrep::search
|
|||||||
std::vector<match> search(const fs::path &filepath, std::string_view regex,
|
std::vector<match> search(const fs::path &filepath, std::string_view regex,
|
||||||
const options &opts)
|
const options &opts)
|
||||||
{
|
{
|
||||||
return {};
|
boost::regex::flag_type flags{};
|
||||||
|
|
||||||
|
switch (opts.regex)
|
||||||
|
{
|
||||||
|
case regex_kind::basic:
|
||||||
|
{
|
||||||
|
flags = opts.grep_like ? boost::regex::grep : boost::regex::basic;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case regex_kind::extended:
|
||||||
|
{
|
||||||
|
flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case regex_kind::perl:
|
||||||
|
{
|
||||||
|
flags = boost::regex::perl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts.ignore_case)
|
||||||
|
{
|
||||||
|
flags |= boost::regex::icase;
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::regex re(regex.data(), flags);
|
||||||
|
std::vector<match> matches;
|
||||||
|
for (const auto &entry : zip::list(filepath))
|
||||||
|
{
|
||||||
|
auto document{zip::read_file(filepath, entry)};
|
||||||
|
std::string::const_iterator begin{document.begin()};
|
||||||
|
std::string::const_iterator end{document.end()};
|
||||||
|
boost::match_results<std::string::const_iterator> match_result;
|
||||||
|
while (boost::regex_search(begin, end, match_result, re,
|
||||||
|
boost::match_default))
|
||||||
|
{
|
||||||
|
match match; // FIXME: Rename variable or struct.
|
||||||
|
match.filepath = entry;
|
||||||
|
match.text = match_result[0];
|
||||||
|
match.context = context(match_result, opts.context);
|
||||||
|
|
||||||
|
matches.emplace_back(match);
|
||||||
|
begin = match_result[0].second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
match_context
|
||||||
|
context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
|
std::uint64_t words)
|
||||||
|
{
|
||||||
|
const auto &prefix{match.prefix().str()};
|
||||||
|
const auto &suffix{match.suffix().str()};
|
||||||
|
size_t pos_before{prefix.length()};
|
||||||
|
size_t pos_after{};
|
||||||
|
++words;
|
||||||
|
|
||||||
|
while (words != 0)
|
||||||
|
{
|
||||||
|
if (pos_before != std::string::npos)
|
||||||
|
{
|
||||||
|
pos_before = prefix.rfind(' ', pos_before) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos_after != std::string::npos)
|
||||||
|
{
|
||||||
|
pos_after = suffix.find(' ', pos_after) + 1;
|
||||||
|
}
|
||||||
|
words -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
|
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
|
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
@ -39,7 +41,7 @@ struct match
|
|||||||
std::string page; //!< The page number, if available.
|
std::string page; //!< The page number, if available.
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class regex_type
|
enum class regex_kind
|
||||||
{
|
{
|
||||||
basic,
|
basic,
|
||||||
extended,
|
extended,
|
||||||
@ -48,7 +50,7 @@ enum class regex_type
|
|||||||
|
|
||||||
struct options
|
struct options
|
||||||
{
|
{
|
||||||
regex_type regex{regex_type::basic};
|
regex_kind regex{regex_kind::basic};
|
||||||
bool grep_like{false};
|
bool grep_like{false};
|
||||||
bool ignore_case{false};
|
bool ignore_case{false};
|
||||||
bool nostrip{false};
|
bool nostrip{false};
|
||||||
@ -59,6 +61,10 @@ struct options
|
|||||||
std::string_view regex,
|
std::string_view regex,
|
||||||
const options &opts);
|
const options &opts);
|
||||||
|
|
||||||
|
[[nodiscard]] match_context
|
||||||
|
context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
|
std::uint64_t words);
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
||||||
#endif // EPUBGREP_SEARCH_HPP
|
#endif // EPUBGREP_SEARCH_HPP
|
||||||
|
Loading…
x
Reference in New Issue
Block a user