From 972ce1d0fe4426baf882f004ed824b7d11d24545 Mon Sep 17 00:00:00 2001 From: tastytea Date: Mon, 24 May 2021 16:37:30 +0200 Subject: [PATCH] Don't strip headlines. --- man/epubgrep.1.adoc | 3 ++- src/search.cpp | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/man/epubgrep.1.adoc b/man/epubgrep.1.adoc index 41f8c08..a90a01c 100644 --- a/man/epubgrep.1.adoc +++ b/man/epubgrep.1.adoc @@ -72,7 +72,8 @@ epubgrep does not operate on lines, but on whole files. This means you can search for text spanning multiple lines. All newlines will be replaced by spaces. -HTML will be stripped unless *--raw* is specified. +HTML will be stripped (except headlines) and newlines will be removed unless +*--raw* is specified. === Configuration diff --git a/src/search.cpp b/src/search.cpp index 71ff97d..8061566 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -91,6 +91,11 @@ void cleanup_text(std::string &text) { for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos)) { + // Don't strip headlines. We need them later on. + if (text[pos + 1] == 'h' || text.substr(pos + 1, 2) == "/h") + { + continue; + } text.erase(pos, text.find('>', pos) + 1 - pos); }