Move cleanup_text(), document functions.
This commit is contained in:
parent
30478f6feb
commit
bb1a43ca92
|
@ -87,6 +87,29 @@ std::vector<match> search(const fs::path &filepath, std::string_view regex,
|
|||
return matches;
|
||||
}
|
||||
|
||||
void cleanup_text(std::string &text)
|
||||
{
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
||||
{
|
||||
text.erase(pos, text.find('>', pos) + 1 - pos);
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
|
||||
{
|
||||
text.replace(pos, 1, "");
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
|
||||
{
|
||||
text.replace(pos, 1, " ");
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
|
||||
{
|
||||
text.replace(pos, 2, " ");
|
||||
}
|
||||
}
|
||||
|
||||
match_context
|
||||
context(const boost::match_results<std::string::const_iterator> &match,
|
||||
std::uint64_t words)
|
||||
|
@ -114,27 +137,4 @@ context(const boost::match_results<std::string::const_iterator> &match,
|
|||
return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
|
||||
}
|
||||
|
||||
void cleanup_text(std::string &text)
|
||||
{
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
||||
{
|
||||
text.erase(pos, text.find('>', pos) + 1 - pos);
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
|
||||
{
|
||||
text.replace(pos, 1, "");
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
|
||||
{
|
||||
text.replace(pos, 1, " ");
|
||||
}
|
||||
|
||||
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
|
||||
{
|
||||
text.replace(pos, 2, " ");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace epubgrep::search
|
||||
|
|
|
@ -57,16 +57,19 @@ struct options
|
|||
std::uint64_t context{0};
|
||||
};
|
||||
|
||||
//! Search file, return matches.
|
||||
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
|
||||
std::string_view regex,
|
||||
const options &opts);
|
||||
|
||||
//! Strip HTML, remove newlines, condense spaces.
|
||||
void cleanup_text(std::string &text);
|
||||
|
||||
//! Return words before and after the match.
|
||||
[[nodiscard]] match_context
|
||||
context(const boost::match_results<std::string::const_iterator> &match,
|
||||
std::uint64_t words);
|
||||
|
||||
void cleanup_text(std::string &text);
|
||||
|
||||
} // namespace epubgrep::search
|
||||
|
||||
#endif // EPUBGREP_SEARCH_HPP
|
||||
|
|
Loading…
Reference in New Issue