Don't strip headlines.
This commit is contained in:
parent
bb1a43ca92
commit
972ce1d0fe
|
@ -72,7 +72,8 @@ epubgrep does not operate on lines, but on whole files. This means you can
|
||||||
search for text spanning multiple lines. All newlines will be replaced by
|
search for text spanning multiple lines. All newlines will be replaced by
|
||||||
spaces.
|
spaces.
|
||||||
|
|
||||||
HTML will be stripped unless *--raw* is specified.
|
HTML will be stripped (except headlines) and newlines will be removed unless
|
||||||
|
*--raw* is specified.
|
||||||
|
|
||||||
=== Configuration
|
=== Configuration
|
||||||
|
|
||||||
|
|
|
@ -91,6 +91,11 @@ void cleanup_text(std::string &text)
|
||||||
{
|
{
|
||||||
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
|
||||||
{
|
{
|
||||||
|
// Don't strip headlines. We need them later on.
|
||||||
|
if (text[pos + 1] == 'h' || text.substr(pos + 1, 2) == "/h")
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
text.erase(pos, text.find('>', pos) + 1 - pos);
|
text.erase(pos, text.find('>', pos) + 1 - pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue