Move cleanup_text(), document functions.
This commit is contained in:
parent
30478f6feb
commit
bb1a43ca92
|
@ -87,6 +87,29 @@ std::vector<match> search(const fs::path &filepath, std::string_view regex,
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cleanup_text(std::string &text)
|
||||||
|
{
|
||||||
|
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
||||||
|
{
|
||||||
|
text.erase(pos, text.find('>', pos) + 1 - pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
|
||||||
|
{
|
||||||
|
text.replace(pos, 1, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
|
||||||
|
{
|
||||||
|
text.replace(pos, 1, " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
|
||||||
|
{
|
||||||
|
text.replace(pos, 2, " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match_context
|
match_context
|
||||||
context(const boost::match_results<std::string::const_iterator> &match,
|
context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
std::uint64_t words)
|
std::uint64_t words)
|
||||||
|
@ -114,27 +137,4 @@ context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
|
return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanup_text(std::string &text)
|
|
||||||
{
|
|
||||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
|
||||||
{
|
|
||||||
text.erase(pos, text.find('>', pos) + 1 - pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
|
|
||||||
{
|
|
||||||
text.replace(pos, 1, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
|
|
||||||
{
|
|
||||||
text.replace(pos, 1, " ");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t pos{}; pos != std::string::npos; pos = text.find(" ", pos))
|
|
||||||
{
|
|
||||||
text.replace(pos, 2, " ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
|
@ -57,16 +57,19 @@ struct options
|
||||||
std::uint64_t context{0};
|
std::uint64_t context{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//! Search file, return matches.
|
||||||
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
|
[[nodiscard]] std::vector<match> search(const fs::path &filepath,
|
||||||
std::string_view regex,
|
std::string_view regex,
|
||||||
const options &opts);
|
const options &opts);
|
||||||
|
|
||||||
|
//! Strip HTML, remove newlines, condense spaces.
|
||||||
|
void cleanup_text(std::string &text);
|
||||||
|
|
||||||
|
//! Return words before and after the match.
|
||||||
[[nodiscard]] match_context
|
[[nodiscard]] match_context
|
||||||
context(const boost::match_results<std::string::const_iterator> &match,
|
context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
std::uint64_t words);
|
std::uint64_t words);
|
||||||
|
|
||||||
void cleanup_text(std::string &text);
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
||||||
#endif // EPUBGREP_SEARCH_HPP
|
#endif // EPUBGREP_SEARCH_HPP
|
||||||
|
|
Loading…
Reference in New Issue