Move cleanup_text(), document functions.

This commit is contained in:
tastytea 2021-05-24 16:23:07 +02:00
parent 30478f6feb
commit bb1a43ca92
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
2 changed files with 28 additions and 25 deletions

View File

@ -87,6 +87,29 @@ std::vector<match> search(const fs::path &filepath, std::string_view regex,
return matches;
}
void cleanup_text(std::string &text)
{
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
{
text.erase(pos, text.find('>', pos) + 1 - pos);
}
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
{
text.replace(pos, 1, "");
}
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
{
text.replace(pos, 1, " ");
}
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
{
text.replace(pos, 2, " ");
}
}
match_context
context(const boost::match_results<std::string::const_iterator> &match,
std::uint64_t words)
@ -114,27 +137,4 @@ context(const boost::match_results<std::string::const_iterator> &match,
return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
}
void cleanup_text(std::string &text)
{
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
{
text.erase(pos, text.find('>', pos) + 1 - pos);
}
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
{
text.replace(pos, 1, "");
}
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
{
text.replace(pos, 1, " ");
}
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
{
text.replace(pos, 2, " ");
}
}
} // namespace epubgrep::search

View File

@ -57,16 +57,19 @@ struct options
std::uint64_t context{0};
};
//! Search file, return matches.
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
std::string_view regex,
const options &opts);
//! Strip HTML, remove newlines, condense spaces.
void cleanup_text(std::string &text);
//! Return words before and after the match.
[[nodiscard]] match_context
context(const boost::match_results<std::string::const_iterator> &match,
std::uint64_t words);
void cleanup_text(std::string &text);
} // namespace epubgrep::search
#endif // EPUBGREP_SEARCH_HPP