diff --git a/man/epubgrep.1.adoc b/man/epubgrep.1.adoc
index 5b36cc2..41f8c08 100644
--- a/man/epubgrep.1.adoc
+++ b/man/epubgrep.1.adoc
@@ -50,7 +50,7 @@ Ignore case distinctions in pattern and data.
 Use additional _PATTERN_ for matching. Can be used more than once.
 
 *-a*, *--raw*::
-Do not strip HTML before searching.
+Do not clean up text before searching. No HTML stripping, no newline removal.
 
 *-C* _NUMBER_, **context* _NUMBER_::
 Print _NUMBER_ words of context around matches.
diff --git a/src/main.cpp b/src/main.cpp
index d47bd4a..f937f76 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -105,7 +105,7 @@ int main(int argc, char *argv[])
                 }
                 if (vm.count("raw") > 0)
                 {
-                    opts.nostrip = true;
+                    opts.raw = true;
                 }
                 opts.context = vm["context"].as<std::uint64_t>();
 
diff --git a/src/options.cpp b/src/options.cpp
index f204296..087f3e4 100644
--- a/src/options.cpp
+++ b/src/options.cpp
@@ -67,7 +67,7 @@ po::variables_map parse_options(int argc, char *argv[])
          ->value_name(translate("PATTERN"))->composing()->required(),
          translate("Use additional PATTERN for matching.").str().data())
         ("raw,a",
-         translate("Do not strip HTML before searching.").str().data())
+         translate("Do not clean up text before searching.").str().data())
         ("context,C", po::value<std::uint64_t>()
          ->value_name(translate("NUMBER"))->default_value(0),
          translate("Print NUMBER words of context around matches.").str().data())
diff --git a/src/search.cpp b/src/search.cpp
index af08d27..eb2d11d 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -63,6 +63,11 @@ std::vector<match> search(const fs::path &filepath, std::string_view regex,
     for (const auto &entry : zip::list(filepath))
     {
         auto document{zip::read_file(filepath, entry)};
+        if (!opts.raw)
+        {
+            cleanup_text(document);
+        }
+
         std::string::const_iterator begin{document.begin()};
         std::string::const_iterator end{document.end()};
         boost::match_results<std::string::const_iterator> match_result;
@@ -109,4 +114,27 @@ context(const boost::match_results<std::string::const_iterator> &match,
     return {prefix.substr(pos_before + 2), suffix.substr(0, pos_after - 1)};
 }
 
+void cleanup_text(std::string &text)
+{
+    for (size_t pos{}; pos != std::string::npos; pos = text.find('<', pos))
+    {
+        text.erase(pos, text.find('>', pos) + 1 - pos);
+    }
+
+    for (size_t pos{}; pos != std::string::npos; pos = text.find('\r', pos))
+    {
+        text.replace(pos, 1, "");
+    }
+
+    for (size_t pos{}; pos != std::string::npos; pos = text.find('\n', pos))
+    {
+        text.replace(pos, 1, " ");
+    }
+
+    for (size_t pos{}; pos != std::string::npos; pos = text.find("  ", pos))
+    {
+        text.replace(pos, 2, " ");
+    }
+}
+
 } // namespace epubgrep::search
diff --git a/src/search.hpp b/src/search.hpp
index 08e7620..21e198e 100644
--- a/src/search.hpp
+++ b/src/search.hpp
@@ -53,7 +53,7 @@ struct options
     regex_kind regex{regex_kind::basic};
     bool grep_like{false};
     bool ignore_case{false};
-    bool nostrip{false};
+    bool raw{false};
     std::uint64_t context{0};
 };
 
@@ -65,6 +65,8 @@ struct options
 context(const boost::match_results<std::string::const_iterator> &match,
         std::uint64_t words);
 
+void cleanup_text(std::string &text);
+
 } // namespace epubgrep::search
 
 #endif // EPUBGREP_SEARCH_HPP