add performance section to readme

disable cmake-format for now
clang-tidy: change MinimumVariableNameLength to 2
2022-10-01 20:41:23 +02:00 · 2022-08-30 23:04:22 +02:00 · 2022-08-19 01:41:34 +02:00 · 2022-08-16 21:42:13 +02:00 · 2022-08-16 19:14:19 +02:00 · 2022-08-16 19:03:08 +02:00
32 changed files with 1249 additions and 476 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -1,5 +1,4 @@
-# -*- mode: conf; fill-column: 100; -*-
+# Written for clang-tidy 14.
 # Written for clang-tidy 11.
 ---
 Checks:         '*,
@ -29,7 +28,9 @@ Checks:         '*,
                -fuchsia-multiple-inheritance,
                -llvmlibc*,
                -cppcoreguidelines-avoid-non-const-global-variables,
-                -cert-*-c'
+                -cert-*-c,
                -abseil-string-find-*,
                -altera-*'
 FormatStyle:    file            # Use .clang-format.
 CheckOptions:   # ↓ Clashes with static private member prefix. (static int _var;) ↓
                - { key: readability-identifier-naming.VariableCase,          value: lower_case }
@ -39,9 +40,15 @@ CheckOptions:   # ↓ Clashes with static private member prefix. (static int _va
                - { key: readability-identifier-naming.ProtectedMemberCase,   value: lower_case }
                - { key: readability-identifier-naming.ProtectedMemberPrefix, value: _          }
-                - { key: readability-identifier-naming.ClassCase,             value: lower_case  }
+                - { key: readability-identifier-naming.ClassCase,             value: lower_case }
                - { key: readability-identifier-naming.StructCase,            value: lower_case }
                - { key: readability-identifier-naming.EnumCase,              value: lower_case }
                - { key: readability-identifier-naming.FunctionCase,          value: lower_case }
                - { key: readability-identifier-naming.ParameterCase,         value: lower_case }
                - { key: readability-function-cognitive-complexity.Threshold, value: 30         }
                - { key: readability-identifier-length.MinimumVariableNameLength, value: 2      }
 ...
 # -*- mode: yaml; fill-column: 100; -*-
 # vim: set fenc=utf-8 tw=100 et ft=yaml:
--- a/.cmake-format.json
+++ b/.cmake-format.json
@ -0,0 +1,5 @@
 {
    "format": {
        "disable": true
    }
 }
--- a/.drone.yml
+++ b/.drone.yml
@ -4,9 +4,12 @@ kind: pipeline
 type: docker
 volumes:
- name: debian-package-cache
+- name: deb-package-cache
  host:
-    path: /var/cache/debian-package-cache
+    path: /var/cache/deb-package-cache
 - name: rpm-package-cache
  host:
    path: /var/cache/rpm-package-cache
 trigger:
  event:
@ -14,7 +17,7 @@ trigger:
    - tag
 steps:
- name: GCC 10 / clang 11
+- name: GCC 10 / clang 11 (debug)
  image: debian:bullseye-slim
  pull: always
  environment:
@ -28,19 +31,19 @@ steps:
  - apt-get update -q
  - apt-get install -qq build-essential cmake clang locales
  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev nlohmann-json3-dev
-  - rm -rf build && mkdir -p build && cd build
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
-  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
+  - cmake -DCMAKE_BUILD_TYPE=Debug -G "Unix Makefiles" -DWITH_TESTS=YES -DWITH_SANITIZERS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
-  - cd ../../
+  - cd ../
-  - rm -rf build && mkdir -p build && cd build
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
-  - CXX="clang++" cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
+  - CXX="clang++" cmake -DCMAKE_BUILD_TYPE=Debug -G "Unix Makefiles" -DWITH_TESTS=YES -DWITH_SANITIZERS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
 - name: Download CMake 3.12 installer
@ -69,20 +72,43 @@ steps:
  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev  nlohmann-json-dev
  - sh cmake_installer.sh --skip-license --exclude-subdir --prefix=/usr/local
  - cp /usr/lib/x86_64-linux-gnu/libpugixml* /lib/x86_64-linux-gnu/
-  - rm -rf build && mkdir -p build && cd build
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
-  - cd ../../
+  - cd ../
-  - rm -rf build && mkdir -p build && cd build
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - CXX="clang++" cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
  depends_on:
    - GCC 10 / clang 11 (debug)
    - Download CMake 3.12 installer
 - name: GCC 9
  image: opensuse/leap:15
  pull: always
  environment:
    CXX: g++-9
    CXXFLAGS: -pipe -O2
    LANG: C.UTF-8
  commands:
  - zypper --non-interactive modifyrepo --all --keep-packages
  - zypper --non-interactive install cmake gcc9-c++ rpm-build
  - zypper --non-interactive install Catch2-devel libboost_program_options1_75_0-devel libboost_locale1_75_0-devel libboost_log1_75_0-devel fmt-devel libarchive-devel pugixml-devel nlohmann_json-devel asciidoc
  - rm -rf build_rpm && mkdir -p build_rpm && cd build_rpm
  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - ctest -V
  volumes:
  - name: rpm-package-cache
    path: /var/cache/zypp/packages
 - name: notify
  image: drillster/drone-email
@ -96,6 +122,11 @@ steps:
      from_secret: email_password
  when:
    status: [ changed, failure ]
  depends_on:
    - GCC 10 / clang 11 (debug)
    - Download CMake 3.12 installer
    - GCC 9
    - GCC 8 / clang 6
 ---
 name: Packages x86_64
@ -103,15 +134,40 @@ kind: pipeline
 type: docker
 volumes:
- name: debian-package-cache
+- name: deb-package-cache
  host:
-    path: /var/cache/debian-package-cache
+    path: /var/cache/deb-package-cache
 trigger:
  event:
  - tag
 steps:
 - name: Debian bullseye
  image: debian:bullseye-slim
  pull: always
  environment:
    CXX: g++-10
    CXXFLAGS: -pipe -O2
    DEBIAN_FRONTEND: noninteractive
    LANG: C.UTF-8
  commands:
  - rm /etc/apt/apt.conf.d/docker-clean
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
  - apt-get install -qq build-essential cmake clang locales lsb-release file
  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json3-dev
  - apt-get install -qq --no-install-recommends asciidoc xsltproc
  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - cpack -G DEB
  - cp -v epubgrep_${DRONE_TAG}-0_amd64_bullseye.deb ..
  volumes:
  - name: deb-package-cache
    path: /var/cache/apt/archives
 - name: Debian buster
  image: debian:buster-slim
  pull: always
@ -125,17 +181,20 @@ steps:
  - rm /etc/apt/apt.conf.d/docker-clean
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
-  - apt-get install -qq build-essential cmake clang locales lsb-release
+  - apt-get install -qq build-essential cmake clang locales lsb-release file
-  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev  nlohmann-json-dev
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json-dev
-  - rm -rf build && mkdir -p build && cd build
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - cpack -G DEB
  - cp -v epubgrep_${DRONE_TAG}-0_amd64_buster.deb ..
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
  depends_on:
    - Debian bullseye
 - name: Ubuntu focal
  image: ubuntu:focal
@ -149,17 +208,21 @@ steps:
  - rm /etc/apt/apt.conf.d/docker-clean
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
-  - apt-get install -qq build-essential cmake clang locales lsb-release
+  - apt-get install -qq build-essential cmake clang locales lsb-release file
-  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev  nlohmann-json3-dev
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json3-dev
-  - rm -rf build && mkdir -p build && cd build
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - cpack -G DEB
  - cp -v epubgrep_${DRONE_TAG}-0_amd64_focal.deb ..
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
  depends_on:
    - Debian bullseye
    - Debian buster
 - name: Download CMake 3.12 installer
  image: plugins/download
@ -183,19 +246,46 @@ steps:
  - rm /etc/apt/apt.conf.d/docker-clean
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
-  - apt-get install -qq g++-8 build-essential clang locales lsb-release
+  - apt-get install -qq g++-8 build-essential clang locales lsb-release file
-  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev nlohmann-json-dev
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json-dev
  - apt-get install -qq --no-install-recommends asciidoc xsltproc
  - sh cmake_installer.sh --skip-license --exclude-subdir --prefix=/usr/local
  - cp /usr/lib/x86_64-linux-gnu/libpugixml* /lib/x86_64-linux-gnu/
-  - rm -rf build && mkdir -p build && cd build
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - cpack -G DEB
  - cp -v epubgrep_${DRONE_TAG}-0_amd64_bionic.deb ..
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
  depends_on:
    - Debian bullseye
    - Debian buster
    - Ubuntu focal
    - Download CMake 3.12 installer
 - name: openSUSE Leap 15
  image: opensuse/leap:15
  pull: always
  environment:
    CXX: g++-9
    CXXFLAGS: -pipe -O2
    LANG: C.UTF-8
  commands:
  - zypper --non-interactive modifyrepo --all --keep-packages
  - zypper --non-interactive install cmake gcc9-c++ rpm-build lsb-release
  - zypper --non-interactive install libboost_program_options1_75_0-devel libboost_locale1_75_0-devel libboost_log1_75_0-devel fmt-devel libarchive-devel pugixml-devel nlohmann_json-devel asciidoc
  - rm -rf build_rpm && mkdir -p build_rpm && cd build_rpm
  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
  - make VERBOSE=1
  - make install DESTDIR=install
  - cpack -G RPM
  - cp -v epubgrep-${DRONE_TAG}-0.x86_64.opensuse-$(lsb_release --release --short).rpm ..
  volumes:
  - name: rpm-package-cache
    path: /var/cache/zypp/packages
 - name: gitea_release
  image: plugins/gitea-release
@ -208,10 +298,18 @@ steps:
    prerelease: true
    files:
      - epubgrep_${DRONE_TAG}-0_amd64_buster.deb
      - epubgrep_${DRONE_TAG}-0_amd64_bullseye.deb
      - epubgrep_${DRONE_TAG}-0_amd64_focal.deb
      - epubgrep_${DRONE_TAG}-0_amd64_bionic.deb
      - epubgrep-${DRONE_TAG}-0.x86_64.opensuse-$(lsb_release --release --short).rpm
    checksum:
      - sha512
  depends_on:
    - Debian bullseye
    - Debian buster
    - Ubuntu focal
    - Ubuntu bionic
    - openSUSE Leap 15
 - name: notification
  image: drillster/drone-email
@ -225,3 +323,9 @@ steps:
      from_secret: email_password
    when:
    status: [ changed, failure ]
  depends_on:
    - Debian bullseye
    - Debian buster
    - Ubuntu focal
    - Download CMake 3.12 installer
    - Ubuntu bionic
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,5 @@
 /examples/example99*
 /translations/*.pot
 /translations/de
 /CMakeUserPresets.json
 /launch.json
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,11 +1,11 @@
-cmake_minimum_required(VERSION 3.12...3.18)
+cmake_minimum_required(VERSION 3.12...3.20)
 # Global build options.
 set(CMAKE_BUILD_TYPE "Release" CACHE STRING "The type of build.")
 set(XGETTEXT_CMD "xgettext" CACHE STRING "The command for xgettext.")
 project(epubgrep
-  VERSION 0.5.0
+  VERSION 0.6.2
  DESCRIPTION "Search tool for EPUB e-books"
  HOMEPAGE_URL "https://schlomp.space/tastytea/epubgrep"
  LANGUAGES CXX)
@ -15,6 +15,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
 # Project build options.
 option(WITH_TESTS "Compile tests." NO)
 option(FALLBACK_BUNDLED "Fall back to bundled libs." YES)
 option(WITH_SANITIZERS "Use sanitizers in debug builds." NO)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -46,6 +47,7 @@ find_package(nlohmann_json REQUIRED CONFIG)
 add_subdirectory(src)
 if(WITH_TESTS)
  include(CTest)
  add_subdirectory(tests)
 endif()
--- a/CMakePresets.json
+++ b/CMakePresets.json
@ -0,0 +1,62 @@
 {
    "version": 2,
    "cmakeMinimumRequired": {
        "major": 3,
        "minor": 20,
        "patch": 0
    },
    "configurePresets": [
        {
            "name": "common",
            "hidden": true,
            "generator": "Unix Makefiles",
            "binaryDir": "build",
            "cacheVariables": {
                "CMAKE_EXPORT_COMPILE_COMMANDS": true
            }
        },
        {
            "name": "dev",
            "displayName": "Developer config",
            "description": "Build with debug symbols and tests enabled",
            "inherits": "common",
            "cacheVariables": {
                "CMAKE_BUILD_TYPE": "Debug",
                "WITH_TESTS": true,
                "WITH_SANITIZERS": false
            }
        },
        {
            "name": "dev_san",
            "displayName": "Developer config, with sanitizers",
            "description": "Build with debug symbols, tests enabled and sanitizers enabled",
            "inherits": "dev",
            "cacheVariables": {
                "WITH_SANITIZERS": true
            }
        },
        {
            "name": "release",
            "displayName": "Release config",
            "description": "Build without debug symbols or tests",
            "inherits": "common",
            "cacheVariables": {
                "CMAKE_BUILD_TYPE": "Release",
                "WITH_TESTS": false
            }
        }
    ],
    "testPresets": [
        {
            "name": "default",
            "configurePreset": "dev",
            "output": {
                "outputOnFailure": true
            },
            "execution": {
                "noTestsAction": "error",
                "stopOnFailure": true
            }
        }
    ]
 }
--- a/CONTRIBUTING.adoc
+++ b/CONTRIBUTING.adoc
@ -63,9 +63,8 @@ directory. Then do the following:
 [source,shell]
 --------------------------------------------------------------------------------
-cd build
+cmake --build build
-cmake --build .
+cd translations
 cd ../translations
 msgmerge --update es.po epubgrep.pot
 --------------------------------------------------------------------------------
--- a/README.adoc
+++ b/README.adoc
@ -64,8 +64,8 @@ sudo apt install epubgrep
 --------------------------------------------------------------------------------
 Replace _[code name]_ with the code name of your installation. Packages are
-available for *buster* (Debian 10), *focal* (Ubuntu 20.04) and *bionic* (Ubuntu
+available for *bullseye* (Debian 11), *buster* (Debian 10), *focal* (Ubuntu
-18.04).
+20.04) and *bionic* (Ubuntu 18.04).
 [TIP]
 If you get the error message that `add-apt-repository` was not found, install
@ -76,7 +76,7 @@ If you get the error message that `add-apt-repository` was not found, install
 ==== Dependencies
 * Tested OS: Linux
-* C\++ compiler with C++17 support (tested: link:{uri-gcc}[GCC] 8/10,
+* C\++ compiler with C++17 support (tested: link:{uri-gcc}[GCC] 8/9/10,
  link:{uri-clang}[clang] 6/11)
 * link:{uri-cmake}[CMake] (at least: 3.12)
 * link:{uri-boost}[Boost] (tested: 1.75.0 / 1.65.0)
@ -94,8 +94,7 @@ If you get the error message that `add-apt-repository` was not found, install
 ===== Install dependencies in Debian or Ubuntu
 Or distributions that are derived from Debian or Ubuntu. You will need at least
-Debian buster (10) or Ubuntu focal (20.04), unless you install a newer version
+Debian buster (10) or Ubuntu focal (20.04).
 of CMake.
 [source,shell]
 --------------------------------------------------------------------------------
@ -105,8 +104,21 @@ sudo apt install build-essential cmake libboost-program-options-dev \
                 nlohmann-json-dev
 --------------------------------------------------------------------------------
-[NOTE]
+[TIP]
-If `nlohmann-json-dev` can not be found, try nlohmann-json3-dev.
+If `nlohmann-json-dev` can not be found, try `nlohmann-json3-dev`.
 ===== Install dependencies in openSUSE
 Tested on openSUSE Leap 15.3.
 [source,shell]
 --------------------------------------------------------------------------------
 sudo zypper install cmake gcc10-c++ rpm-build \
                    libboost_program_options1_75_0-devel \
                    libboost_locale1_75_0-devel libboost_log1_75_0-devel \
                    fmt-devel libarchive-devel pugixml-devel \
                    nlohmann_json-devel asciidoc
 --------------------------------------------------------------------------------
 ==== Get sourcecode
@ -132,18 +144,50 @@ cmake -S . -B build
 cmake --build build --parallel $(nproc --ignore=1)
 --------------------------------------------------------------------------------
-To install, run `sudo cmake --install build`. To run the tests, run `cd
+To install, run `sudo cmake --install build`. To run the tests, run `ctest
-build/tests && ctest`.
+--test-dir build`.
 [TIP]
 If you are using Debian or Ubuntu, or a distribution that is derived from these,
 you can run `cpack -G DEB` in the build directory to generate a .deb-file. You
-can then install it with `apt install ./epubgrep-*.deb`.
+can then install it with `+++apt install ./epubgrep-*.deb+++`.
 If you are using a distribution that uses RPM packages, like openSUSE or Fedora,
 you can generate a package with `cpack -G RPM` and install it with `+++zypper
 install ./epubgrep-*.rpm+++` or `+++dnf install ./epubgrep-*.rpm+++`.
 .CMake options:
 * `-DCMAKE_BUILD_TYPE=Debug` for a debug build.
 * `-DWITH_TESTS=YES` if you want to compile the tests.
 * `-DXGETTEXT_CMD=String` The program to use instead of `xgettext`.
 * `-DFALLBACK_BUNDLED=NO` if you don't want to fall back on bundled libraries.
 * `-DWITH_SANITIZER=YES` to use sanitizers in debug builds.
 == Similar projects
 * link:https://github.com/phiresky/ripgrep-all[ripgrep-all] can search EPUB
  files and strips HTML, but does not display page numbers or headings.
 * zipgrep from link:http://infozip.sourceforge.net/[unzip] can search EPUB files
  but does not strip HTML and does not display page numbers or headings.
 == Performance
 A test with a directory containing 3333 EPUBs and 6269 files in total showed
 this difference between epubgrep-0.6.2 and ripgrep-all-0.9.6:
 [source,shellsession]
 --------------------------------------------------------------------------------
 % hyperfine "epubgrep 'floor' ~/Books" "rga 'floor' ~/Books"
 Benchmark #1: epubgrep 'floor' ~/Books
  Time (mean ± σ):     167.246 s ±  3.848 s    [User: 176.251 s, System: 79.107 s]
  Range (min … max):   161.533 s … 173.647 s    10 runs
 Benchmark #2: rga 'floor' ~/Books
  Time (mean ± σ):      9.219 s ±  0.506 s    [User: 17.540 s, System: 12.773 s]
  Range (min … max):    8.571 s …  9.923 s    10 runs
 Summary
  'rga 'floor' ~/Books' ran
   18.14 ± 1.08 times faster than 'epubgrep 'floor' ~/Books'
 --------------------------------------------------------------------------------
 include::{uri-base}/raw/branch/main/CONTRIBUTING.adoc[]
--- a/cmake/debug_flags.cmake
+++ b/cmake/debug_flags.cmake
@ -24,9 +24,13 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"
    "-Wdouble-promotion"
    "-Wformat=2"
    "-ftrapv"
    "-fsanitize=undefined"
    "-Og"
    "-fno-omit-frame-pointer")
  if(WITH_SANITIZERS)
    list(APPEND tmp_CXXFLAGS
      "-fsanitize=undefined"
      "-fsanitize=address")
  endif()
  if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
    list(APPEND tmp_CXXFLAGS
      "-Wlogical-op"
@ -44,8 +48,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"
  endif()
  add_compile_options("$<$<CONFIG:Debug>:${tmp_CXXFLAGS}>")
-  list(APPEND tmp_LDFLAGS
+  if(WITH_SANITIZERS)
-    "-fsanitize=undefined")
+    list(APPEND tmp_LDFLAGS
      "-fsanitize=undefined"
      "-fsanitize=address")
  endif()
  # add_link_options was introduced in version 3.13.
  if(${CMAKE_VERSION} VERSION_LESS 3.13)
    set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${tmp_LDFLAGS}")
--- a/cmake/packages.cmake
+++ b/cmake/packages.cmake
@ -6,7 +6,9 @@ set(CPACK_PACKAGE_CONTACT "tastytea <tastytea@tastytea.de>")
 # Should be set automatically, but they are not.
 set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
 set(CPACK_PACKAGE_VERSION "${PROJECT_VERSION}")
 set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${CMAKE_PROJECT_DESCRIPTION}")
 # DEB
 # Figure out dependencies automatically.
 set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
@ -26,4 +28,30 @@ endif()
 set(CPACK_DEBIAN_FILE_NAME
  "${CPACK_PACKAGE_NAME}_${CPACK_PACKAGE_VERSION}-0_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}_${DEBIAN_CODENAME}.deb")
 # RPM
 set(CPACK_RPM_PACKAGE_LICENSE "AGPL-3")
 # Figure out dependencies automatically.
 set(CPACK_RPM_PACKAGE_AUTOREQ ON)
 # Should be set automatically, but it is not.
 execute_process(COMMAND uname -m
  OUTPUT_VARIABLE CPACK_RPM_PACKAGE_ARCHITECTURE
  OUTPUT_STRIP_TRAILING_WHITESPACE)
 set(CPACK_PACKAGE_FILE_NAME
  "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-0.${CPACK_RPM_PACKAGE_ARCHITECTURE}")
 execute_process(COMMAND lsb_release --id --short
  OUTPUT_VARIABLE OS
  OUTPUT_STRIP_TRAILING_WHITESPACE)
 if("${OS}" STREQUAL "openSUSE")
  execute_process(COMMAND lsb_release --release --short
    OUTPUT_VARIABLE OS_RELEASE
    OUTPUT_STRIP_TRAILING_WHITESPACE)
  set(CPACK_PACKAGE_FILE_NAME
    "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-0.${CPACK_RPM_PACKAGE_ARCHITECTURE}.opensuse-${OS_RELEASE}")
 endif()
 include(CPack)
--- a/man/epubgrep.1.adoc
+++ b/man/epubgrep.1.adoc
@ -2,7 +2,7 @@
 :doctype:       manpage
 :Author:        tastytea
 :Email:         tastytea@tastytea.de
-:Date:          2021-06-01
+:Date:          2021-07-02
 :Revision:      0.0.0
 :man source:    epubgrep
 :man manual:    General Commands Manual
@ -13,7 +13,7 @@ epubgrep - Search tool for EPUB e-books.
 == SYNOPSIS
-*epubgrep* [_OPTION_]… _PATTERN_ [_FILE_]…
+*epubgrep* [_OPTION_]… _PATTERN_ _FILE_…
 == DESCRIPTION
@ -21,14 +21,44 @@ epubgrep - Search tool for EPUB e-books.
 for command line switches where possible. However, not all grep switches are
 implemented and some additional switches are added.
 This manual is also available at
 <https://man.schlomp.space/tastytea/?program=epubgrep>.
 == EXAMPLES
 .Search for Apple(s) or Orange(s) with 2 words of context around the matches, case insensitively
 [source,shell]
 --------------------------------------------------------------------------------
 epubgrep -PiC2 '(Apple|Orange)s?' file.epub
 --------------------------------------------------------------------------------
 .Extract external hyperlinks
 [source,shell]
 --------------------------------------------------------------------------------
 epubgrep -PC0 --raw --no-filename=all '"http[^"]+"' file.epub | tr -d '"'
 --------------------------------------------------------------------------------
 .Save the search results to an HTML file and output a status message every 20 seconds
 [source,shell]
 --------------------------------------------------------------------------------
 epubgrep -C2 --status --status-interval=20 --html 'Apples' file.epub > result.html
 --------------------------------------------------------------------------------
 == OPTIONS
 === General options
 *-h*, *--help*::
 Display a short help message and exit.
 *V*, *--version*::
 Show version, copyright and license.
 *--debug*::
 Write debug output to the terminal and log file.
 === Search options
 *-G*, *--basic-regexp*::
 _PATTERN_ is a POSIX basic regular expression. This is the default.
@ -46,25 +76,10 @@ _PATTERN_ is a Perl regular expression.
 *-i*, *--ignore-case*::
 Ignore case distinctions in pattern and data.
 *-e* _PATTERN_, *--regexp* _PATTERN_::
 Use additional _PATTERN_ for matching. Can be used more than once.
 *-a*, *--raw*::
 Do not clean up text before searching. No HTML stripping, no newline removal,
 all files will be read (not just the text documents listed in the spine).
 *-C* _NUMBER_, *context* _NUMBER_::
 Print _NUMBER_ words of context around matches.
 *--nocolor*::
 Turn off colors and other decorations.
 *--no-filename* _WHICH_::
 Suppress the mentioning of file names on output. _WHICH_ is ‘filesystem’ for the
 file names on your file systems, ‘in-epub’ for the file names inside the EPUB or
 ‘all’. Chapters and page numbers will still be output.
 *-r*, *--recursive*::
 Read all files under each directory, recursively, following symbolic links only
 if they are on the command line. Silently skips directories that are not
@ -74,20 +89,44 @@ readable by the user.
 Read all files under each directory, recursively. Follow all symbolic
 links. Silently skips directories that are not readable by the user.
 *-e* _PATTERN_, *--regexp* _PATTERN_::
 Use additional _PATTERN_ for matching. Can be used more than once.
 === Output options
 *-C* _NUMBER_, *context* _NUMBER_::
 Print _NUMBER_ words of context around matches.
 *--nocolor*::
 Turn off colors and other decorations.
 *--no-filename* _WHICH_::
 Suppress the mentioning of file names on output. _WHICH_ is ‘filesystem’ for the
 file names on your file systems, ‘in-epub’ for the file names inside the EPUB or
 ‘all’. Chapters and page numbers will still be output.
 *--ignore-archive-errors*::
 Ignore errors about wrong file formats. When you search directories recursively,
 it is likely that there are files which are not EPUB files. This setting
 suppresses errors related to them.
 *--debug*::
 Write debug output to the terminal and log file.
 *--json*::
 Output JSON instead of plain text. JSON will only be output at the end of the
 program. There will be an object named `generator` with the property
 `epubgrep`. The value is the version of the program, as string. The matches are
 in an array named `matches`. I will try not to break the API. 😊
 *--html*::
 Output HTML instead of plain text. HTML will only be output at the end of the
 program.
 *--status*::
 Output status message every *--status-interval* seconds to standard
 error. Default is 30.
 *--status-interval* _NUMBER_::
 Set status message interval to _NUMBER_ seconds.
 == USAGE
 [source,shellsession]
@ -119,15 +158,16 @@ occur more than once are merged.
 ==== Example configuration file
-This example makes epubgrep ignore files which are not EPUB, suppress the file
+This example makes epubgrep always search directories recursively, ignore files
-names on output, print 2 words of context around matches (unless overridden on
+which are not EPUB, not print the file names inside the EPUB, print 2 words of
-the command line) and search for mentions of the words thyme and oregano in
+context around matches (unless overridden on the command line) and search for
-every book.
+mentions of the words thyme and oregano in every book.
 [source,cfg]
 --------------------------------------------------------------------------------
 recursive = 1
 ignore-archive-errors = 1
-no-filename = 1
+no-filename = in-epub
 context = 2
 regexp = [Tt]hyme
 regexp = [Oo]regano
--- a/screenshot.png
+++ b/screenshot.png
--- a/src/book.cpp
+++ b/src/book.cpp
@ -0,0 +1,306 @@
 /*  This file is part of epubgrep.
 *  Copyright © 2021 tastytea <tastytea@tastytea.de>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, version 3.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "book.hpp"
 #include "fs-compat.hpp"
 #include "helpers.hpp"
 #include "log.hpp"
 #include "zip.hpp"
 #include <boost/locale/message.hpp>
 #include <boost/regex.hpp>
 #include <fmt/format.h>
 #include <fmt/ostream.h> // For compatibility with fmt 4.
 #include <pugixml.hpp>
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <string_view>
 #include <vector>
 namespace epubgrep::book
 {
 using boost::locale::translate;
 using fmt::format;
 using std::string;
 book read(const fs::path filepath, const bool raw)
 {
    using helpers::unescape_html;
    DEBUGLOG << "Processing book " << filepath;
    std::vector<string> epub_filepaths{[&filepath, raw]
                                       {
                                           if (!raw)
                                           {
                                               return list_spine(filepath);
                                           }
                                           return zip::list(filepath);
                                       }()};
    book current_book;
    current_book.language = [&filepath]() -> string
    {
        try
        {
            pugi::xml_document xml;
            auto opf_file_path{get_opf_file_path(filepath)};
            const std::string opf_file{
                zip::read_file(filepath, opf_file_path.string())};
            const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
            if (result)
            {
                auto lang{xml.child("package")
                              .child("metadata")
                              .child("dc:language")};
                if (lang == nullptr)
                {
                    lang = xml.child("opf:package")
                               .child("opf:metadata")
                               .child("dc:language");
                }
                return lang.text().as_string();
            }
        }
        catch (epubgrep::zip::exception &e)
        {
            if (e.code != 1) // 1 == container.xml not found.
            {
                LOG(log::sev::error) << e.what();
            }
        }
        return "";
    }();
    DEBUGLOG << "Book language detected: " << current_book.language;
    for (const auto &entry : epub_filepaths)
    {
        DEBUGLOG << "Processing document " << entry;
        document doc;
        if (!raw)
        {
            doc = process_page(unescape_html(zip::read_file(filepath, entry)));
        }
        else
        {
            doc.text_raw = zip::read_file(filepath, entry);
            doc.text = std::make_unique<std::string>(doc.text_raw);
        }
        doc.language = current_book.language; // FIXME: Get language of doc.
        current_book.files.emplace_back(entry, std::move(doc));
    }
    return current_book;
 }
 document process_page(const std::string_view text)
 {
    string output{text};
    static const boost::regex re_header_start{"<[hH][1-6]"};
    static const boost::regex re_header_end{"</[hH][1-6]"};
    static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
                                           "(title|aria-label)"
                                           "=\"([[:alnum:]]+)\""};
    {
        size_t pos{0};
        while ((pos = output.find_first_of("\n\t\r", pos)) != string::npos)
        {
            if (output[pos] == '\r')
            {
                output.erase(pos, 1);
            }
            else
            {
                output.replace(pos, 1, " ");
            }
        }
    }
    {
        size_t pos{0};
        while ((pos = output.find("  ", pos)) != string::npos)
        {
            output.replace(pos, 2, " ");
        }
    }
    size_t pos{0};
    document doc;
    size_t headline_start{string::npos};
    while ((pos = output.find('<', pos)) != string::npos)
    {
        auto endpos{output.find('>', pos) + 1};
        if (boost::regex_match(output.substr(pos, 3), re_header_start))
        {
            headline_start = pos;
        }
        else if (boost::regex_match(output.substr(pos, 4), re_header_end))
        {
            if (headline_start != string::npos)
            {
                doc.headlines.insert(
                    {headline_start,
                     output.substr(headline_start, pos - headline_start)});
                headline_start = string::npos;
            }
        }
        else if (output.substr(pos, 6) == "<span ")
        {
            boost::match_results<string::const_iterator> match;
            using it_size_t = string::const_iterator::difference_type;
            string::const_iterator begin{output.begin()
                                         + static_cast<it_size_t>(pos)};
            string::const_iterator end{output.begin()
                                       + static_cast<it_size_t>(endpos)};
            if (boost::regex_search(begin, end, match, re_pagebreak))
            {
                doc.pages.insert({pos, match[2].str()});
            }
        }
        else if (output.substr(pos, 7) == "<style "
                 || output.substr(pos, 8) == "<script ")
        {
            if (output.find("/>", pos) > endpos)
            {
                endpos = output.find('>', endpos) + 1;
            }
        }
        output.erase(pos, endpos - pos);
    }
    doc.text_cleaned = output;
    doc.text = std::make_unique<string>(doc.text_cleaned);
    return doc;
 }
 std::string headline(const document &doc, const size_t pos)
 {
    std::string_view last;
    for (const auto &pair : doc.headlines)
    {
        if (pair.first > pos)
        {
            break;
        }
        last = pair.second;
    }
    return string(last);
 }
 string page(const document &doc, const size_t pos)
 {
    std::string_view last;
    for (const auto &pair : doc.pages)
    {
        if (pair.first > pos)
        {
            break;
        }
        last = pair.second;
    }
    return string(last);
 }
 fs::path get_opf_file_path(const fs::path &zipfile)
 {
    pugi::xml_document xml;
    const std::string container{
        zip::read_file(zipfile, "META-INF/container.xml")};
    const auto result{xml.load_buffer(&container[0], container.size())};
    if (result)
    {
        return fs::path{xml.child("container")
                            .child("rootfiles")
                            .first_child()
                            .attribute("full-path")
                            .value()};
    }
    LOG(log::sev::error) << result.description() << '\n';
    return fs::path{};
 }
 std::vector<string> list_spine(const fs::path &filepath)
 {
    auto opf_file_path{get_opf_file_path(filepath)};
    std::vector<std::string> spine_filepaths;
    if (!opf_file_path.empty())
    {
        DEBUGLOG << "Parsing " << opf_file_path;
        pugi::xml_document xml;
        const std::string opf_file{
            zip::read_file(filepath, opf_file_path.string())};
        const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
        if (result)
        {
            auto manifest{xml.child("package").child("manifest")};
            if (manifest == nullptr)
            {
                manifest = xml.child("opf:package").child("opf:manifest");
            }
            auto spine{xml.child("package").child("spine")};
            if (spine == nullptr)
            {
                spine = xml.child("opf:package").child("opf:spine");
            }
            for (const auto &itemref : spine)
            {
                const auto &idref{itemref.attribute("idref").value()};
                const auto &item{manifest.find_child_by_attribute("id", idref)};
                auto href{helpers::urldecode(item.attribute("href").value())};
                if (href[0] != '/')
                {
                    href = (opf_file_path.parent_path() /= href);
                }
                DEBUGLOG << "Found in spine: " << href;
                spine_filepaths.emplace_back(href);
            }
        }
        else
        {
            LOG(log::sev::error) << "XML: " << result.description() << '\n';
        }
    }
    if (opf_file_path.empty() || spine_filepaths.empty())
    {
        LOG(log::sev::error)
            << format(translate("{0:s} is damaged. Could not read spine. "
                                "Skipping file.\n")
                          .str()
                          .c_str(),
                      filepath.c_str());
        return {};
    }
    return spine_filepaths;
 }
 } // namespace epubgrep::book
--- a/src/book.hpp
+++ b/src/book.hpp
@ -0,0 +1,73 @@
 /*  This file is part of epubgrep.
 *  Copyright © 2021 tastytea <tastytea@tastytea.de>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, version 3.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #ifndef EPUBGREP_BOOK_HPP
 #define EPUBGREP_BOOK_HPP
 #include "fs-compat.hpp"
 #include <map>
 #include <memory>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>
 namespace epubgrep::book
 {
 using std::string;
 //! Document inside EPUB.
 struct document
 {
    string text_raw;                    //!< HTML page
    string text_cleaned;                //!< Plain text page
    std::unique_ptr<string> text;       //!< Pointer to preferred text version
    std::map<size_t, string> headlines; //!< pos, title
    std::map<size_t, string> pages;     //!< pos, page
    string language;                    //!< Page language
 } __attribute__((aligned(128)));
 //! EPUB file.
 struct book
 {
    std::vector<std::pair<string, document>> files; //!< filename, file
    std::vector<std::pair<string, string>> toc;     //!< title, href
    string language;                                //!< Book language
 } __attribute__((aligned(128)));
 //! Read and process book.
 [[nodiscard]] book read(fs::path filepath, bool raw);
 //! Clean up page and record headlines and page numbers.
 [[nodiscard]] document process_page(std::string_view text);
 //! Return last headline if possible.
 [[nodiscard]] string headline(const document &doc, size_t pos);
 //! Return current page if possible.
 [[nodiscard]] string page(const document &doc, size_t pos);
 //! Returns the file path of the OPF file in the EPUB.
 [[nodiscard]] fs::path get_opf_file_path(const fs::path &zipfile);
 //! Returns the files in the EPUB “spine” (all pages that are actually text).
 [[nodiscard]] std::vector<string> list_spine(const fs::path &filepath);
 } // namespace epubgrep::book
 #endif // EPUBGREP_BOOK_HPP
--- a/src/helpers.cpp
+++ b/src/helpers.cpp
@ -43,7 +43,7 @@ std::string unescape_html(const std::string_view html)
    // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
    //         entity_references#Character_entity_references_in_HTML
-    const std::map<std::string_view, char32_t>
+    static const std::map<std::string_view, char32_t>
        names{{"exclamation", 0x0021}, {"quot", 0x0022},    {"percent", 0x0025},
              {"amp", 0x0026},         {"apos", 0x0027},    {"add", 0x002B},
              {"lt", 0x003C},          {"equal", 0x003D},   {"gt", 0x003E},
@ -150,7 +150,7 @@ std::string unescape_html(const std::string_view html)
        try
        {
            const char32_t codepoint{
-                [&match, &names]
+                [&match]
                {
                    // If it doesn't start with a '#' it is a named entity.
                    if (match[1].str()[0] != '#')
@ -184,7 +184,7 @@ std::string unescape_html(const std::string_view html)
 std::string_view get_env(const std::string_view name)
 {
-    const char *env = std::getenv(name.data());
+    const char *env = std::getenv(name.data()); // NOLINT(concurrency-mt-unsafe)
    if (env != nullptr)
    {
        return env;
--- a/src/main.cpp
+++ b/src/main.cpp
@ -28,6 +28,7 @@
 #include <fmt/format.h>
 #include <fmt/ostream.h> // For compatibility with fmt 4.
 #include <chrono>
 #include <clocale>
 #include <cmath>
 #include <cstdint>
@ -45,6 +46,9 @@
 #include <thread>
 #include <vector>
 constexpr int EXIT_FATAL{2}; // NOLINT(readability-identifier-naming)
 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
 int main(int argc, char *argv[])
 {
    using namespace epubgrep;
@ -56,7 +60,7 @@ int main(int argc, char *argv[])
    // locale_generator("").name.c_str() returns "*" instead of "". That's why
    // the global C locale isn't changed. So we have to set it additionally.
-    std::setlocale(LC_ALL, "");
+    std::setlocale(LC_ALL, ""); // NOLINT(concurrency-mt-unsafe)
    boost::locale::generator locale_generator;
    locale_generator.add_messages_path("translations");
    locale_generator.add_messages_path("/usr/share/locale");
@ -77,7 +81,7 @@ int main(int argc, char *argv[])
    { // Exceptions we can't recover from or ones we don't know.
        LOG(log::sev::fatal)
            << e.what() << translate(" (while parsing options)");
-        return EXIT_FAILURE;
+        return EXIT_FATAL;
    }
    if (opts.debug)
@ -122,8 +126,10 @@ int main(int argc, char *argv[])
                }
                LOG(log::sev::error)
-                    << format(translate("Could not open {0:s}: {1:s}").str(),
+                    << format(translate("Could not open {0:s}: {1:s}")
-                              e.path1(), e.what());
+                                  .str()
                                  .c_str(),
                              e.path1().c_str(), e.what());
                return_code = EXIT_FAILURE;
            }
        }
@ -139,6 +145,7 @@ int main(int argc, char *argv[])
    vector<vector<search::match>> matches_all;
    std::mutex mutex_matches_all;
    vector<std::future<int>> futurepool;
    std::atomic<size_t> books_searched{0};
    auto search_file{
        [&opts, &matches_all, &mutex_matches_all,
@ -170,18 +177,25 @@ int main(int argc, char *argv[])
                catch (const std::ifstream::failure &e)
                {
                    LOG(log::sev::error)
-                        << std::strerror(errno)
+                        << std::strerror(errno) // FIXME: Not thread safe.
-                        << format(translate(" (while opening {0:s})").str(),
+                        << format(translate(" (while opening {0:s})")
-                                  filepath);
+                                      .str()
                                      .c_str(),
                                  filepath.c_str());
                    return EXIT_FAILURE;
                }
                catch (const boost::regex_error &e)
                {
                    LOG(log::sev::fatal) << e.what();
                    return EXIT_FATAL;
                }
            }
            return EXIT_SUCCESS;
        }};
    auto futures_cleanup{
-        [&futurepool, &return_code](const bool wait = false)
+        [&futurepool, &return_code, &books_searched](const bool wait = false)
        {
            using namespace std::chrono_literals;
@ -201,6 +215,7 @@ int main(int argc, char *argv[])
                    }
                }
                futurepool.erase(it);
                ++books_searched;
            }
        }};
@ -212,6 +227,27 @@ int main(int argc, char *argv[])
        }()};
    DEBUGLOG << "max_threads = " << max_threads;
    const auto print_status{
        [&opts, &books_searched, &input_files](std::future<bool> cancel)
        {
            if (!opts.status)
            {
                return;
            }
            while (cancel.wait_for(std::chrono::seconds(opts.status_interval))
                   != std::future_status::ready)
            {
                std::cerr << format(translate("{0:d} of {1:d} books searched.")
                                        .str()
                                        .c_str(),
                                    books_searched, input_files.size())
                          << '\n';
            }
            std::cerr << translate("All books searched.") << '\n';
        }};
    std::promise<bool> promise_status;
    std::thread thread_status{print_status, promise_status.get_future()};
    for (const auto &filepath : input_files)
    {
        while (futurepool.size() >= max_threads)
@ -219,11 +255,15 @@ int main(int argc, char *argv[])
            DEBUGLOG << "Attempting to clean up threads";
            futures_cleanup();
        }
        if (return_code == EXIT_FATAL)
        {
            break;
        }
        futurepool.emplace_back(
            std::async(std::launch::async, search_file, filepath));
        DEBUGLOG << "Launched new thread";
-        if (!matches_all.empty() && !opts.json)
+        if (!matches_all.empty() && !opts.json && !opts.html)
        {
            output::print_matches(matches_all[0], opts,
                                  input_files.size() == 1);
@ -233,11 +273,21 @@ int main(int argc, char *argv[])
    }
    DEBUGLOG << "Waiting for remaining threads to finish";
    futures_cleanup(true);
    promise_status.set_value(true);
    thread_status.join();
    if (return_code == EXIT_FATAL)
    {
        return EXIT_FATAL;
    }
    if (opts.json)
    {
        output::json_all(matches_all);
    }
    else if (opts.html)
    {
        output::html_all(matches_all, opts);
    }
    else
    {
        for (const auto &matches : matches_all)
--- a/src/options.cpp
+++ b/src/options.cpp
@ -49,13 +49,19 @@ using std::cout;
 options parse_options(int argc, char *argv[])
 {
    po::options_description options_visible(translate("Available options"));
    // clang-format off
-    options_visible.add_options()
+    po::options_description options_general(translate("General options"));
    options_general.add_options()
        ("help,h",
         translate("Display this help and exit.").str().data())
        ("version,V",
         translate("Display version information and exit.").str().data())
        ("debug",
         translate("Enable debug output.").str().data())
    ;
    po::options_description options_search(translate("Search options"));
    options_search.add_options()
        ("basic-regexp,G",
         translate("PATTERN is a basic regular expression (default).")
         .str().data())
@ -66,14 +72,25 @@ options parse_options(int argc, char *argv[])
         .str().data())
        ("perl-regexp,P",
         translate("PATTERN is a Perl regular expression.").str().data())
        ("ignore-case,i",
         translate("Ignore case distinctions in pattern and data.")
         .str().data())
        ("raw,a",
         translate("Do not clean up text before searching.").str().data())
        ("recursive,r",
         translate("Read all files under each directory, recursively.")
         .str().data())
        ("dereference-recursive,R",
         translate("Read all files under each directory, recursively, "
                   "following symlinks.").str().data())
        ("regexp,e", po::value<std::vector<std::string>>()
         ->value_name(translate("PATTERN"))->composing()->required(),
         translate("Use additional PATTERN for matching.").str().data())
-        ("raw,a",
+    ;
-         translate("Do not clean up text before searching.").str().data())
+
    po::options_description options_output(translate("Output options"));
    options_output.add_options()
        ("context,C", po::value<std::uint64_t>()
         ->value_name(translate("NUMBER"))->default_value(0),
         translate("Print NUMBER words of context around matches.")
@ -83,18 +100,19 @@ options parse_options(int argc, char *argv[])
        ("no-filename",po::value<std::string>()->value_name(translate("WHICH")),
         translate("Suppress the mentioning of file names on output. "
                   "WHICH is ‘filesystem’, ‘in-epub’ or ‘all’.").str().data())
        ("recursive,r",
         translate("Read all files under each directory, recursively.")
         .str().data())
        ("dereference-recursive,R",
         translate("Read all files under each directory, recursively, "
                   "following symlinks.") .str().data())
        ("ignore-archive-errors",
-         translate("Ignore errors about wrong file formats.") .str().data())
+         translate("Ignore errors about wrong file formats.").str().data())
        ("debug",
         translate("Enable debug output.") .str().data())
        ("json",
-         translate("Output JSON instead of plain text.") .str().data())
+         translate("Output JSON instead of plain text.").str().data())
        ("html",
         translate("Output HTML instead of plain text.").str().data())
        ("status",
         translate("Output status message every STATUS-INTERVAL seconds.")
         .str().data())
        ("status-interval", po::value<std::uint64_t>()
         ->value_name(translate("NUMBER"))->default_value(30),
         translate("Set status message interval to NUMBER seconds.")
         .str().data())
    ;
    po::options_description options_hidden("Hidden options");
@ -103,6 +121,12 @@ options parse_options(int argc, char *argv[])
         ->value_name("FILE"), "Input file to search.")
        ;
    // clang-format on
    po::options_description options_visible;
    options_visible.add(options_general)
        .add(options_search)
        .add(options_output);
    po::options_description options_all("Allowed options");
    options_all.add(options_visible).add(options_hidden);
@ -135,7 +159,7 @@ options parse_options(int argc, char *argv[])
    if (vm.count("help") != 0)
    {
-        cout << translate("Usage: epubgrep [OPTION]… PATTERN [FILE]…\n");
+        cout << translate("Usage: epubgrep [OPTION]… PATTERN FILE…\n");
        cout << options_visible;
        cout << translate("\nYou can access the full manual "
                          "with `man epubgrep`.\n");
@ -235,6 +259,9 @@ options parse_again(const po::variables_map &vm)
    opts.ignore_archive_errors = vm.count("ignore-archive-errors") > 0;
    opts.debug = vm.count("debug") > 0;
    opts.json = vm.count("json") > 0;
    opts.html = vm.count("html") > 0;
    opts.status = vm.count("status") > 0;
    opts.status_interval = vm["status-interval"].as<std::uint64_t>();
    if (vm.count("regexp") > 0)
    {
--- a/src/options.hpp
+++ b/src/options.hpp
@ -58,6 +58,9 @@ struct options
    bool ignore_archive_errors{false};
    bool debug{false};
    bool json{false};
    bool html{false};
    bool status{false};
    uint64_t status_interval{0};
    //! For the debug output.
    friend std::ostream &operator<<(std::ostream &out, const options &opts);
--- a/src/output.cpp
+++ b/src/output.cpp
@ -24,6 +24,7 @@
 #include <nlohmann/json.hpp>
 #include <termcolor/termcolor.hpp>
 #include <cstdint>
 #include <iostream>
 #include <sstream>
@ -37,27 +38,22 @@ using std::cout;
 void print_matches(const std::vector<search::match> &matches,
                   const options::options &opts, bool single_file)
 {
-    fs::path last_epub;
+    if (!single_file && !opts.no_fn_fs)
    {
        if (!opts.nocolor)
        {
            cout << termcolor::yellow;
        }
        cout << format(translate("  In {0:s}: \n").str().c_str(),
                       fs::relative(matches[0].filepath_epub).c_str());
        if (!opts.nocolor)
        {
            cout << termcolor::reset;
        }
    }
    for (const auto &match : matches)
    {
        if (!single_file && !opts.no_fn_fs)
        {
            if (match.filepath_epub != last_epub)
            {
                if (!opts.nocolor)
                {
                    cout << termcolor::yellow;
                }
                cout << format(translate("  In {0:s}: \n").str(),
                               fs::relative(match.filepath_epub));
                last_epub = match.filepath_epub;
                if (!opts.nocolor)
                {
                    cout << termcolor::reset;
                }
            }
        }
        std::vector<std::string> metadata;
        if (!opts.no_fn_epub)
        {
@ -82,22 +78,25 @@ void print_matches(const std::vector<search::match> &matches,
        {
            metadata.emplace_back("page " + match.page);
        }
-        if (!opts.nocolor)
+        if (!metadata.empty())
        {
-            cout << termcolor::italic;
+            if (!opts.nocolor)
        }
        for (const auto &part : metadata)
        {
            cout << part;
            if (part != *(metadata.rbegin()))
            {
-                cout << ", ";
+                cout << termcolor::italic;
            }
            for (const auto &part : metadata)
            {
                cout << part;
                if (part != *(metadata.rbegin()))
                {
                    cout << ", ";
                }
            }
            cout << ": ";
            if (!opts.nocolor)
            {
                cout << termcolor::reset;
            }
        }
        cout << ": ";
        if (!opts.nocolor)
        {
            cout << termcolor::reset;
        }
        cout << match.context.first;
        if (!opts.nocolor)
@ -133,7 +132,95 @@ void json_all(const std::vector<std::vector<search::match>> &matches_all)
        }
    }
-    std::cout << json.dump() << '\n';
+    cout << json.dump() << '\n';
 }
 void html_all(const std::vector<std::vector<search::match>> &matches_all,
              const options::options &opts)
 {
    std::uint64_t count{1};
    cout << "<!DOCTYPE html>\n";
    // Translators: Replace “en” with your language code here.
    cout << format(R"(<html lang="{0:s}">)", translate("en").str());
    cout << "<head><title>epubgrep output</title>"
            "<style>article { margin: 1em; }</style>"
            "</head><body>\n\n";
    for (const auto &matches : matches_all)
    {
        const auto identifier{
            [&opts, count, &matches]
            {
                if (opts.no_fn_fs)
                {
                    return format(translate("File {0:d}").str(), count);
                }
                return fs::relative(matches[0].filepath_epub).string();
            }()};
        // Start article, table and print table header.
        cout << format(R"(<article aria-labelledby="file_{0:d}">)", count)
             << "\n  <table>\n"
             << format(R"(    <caption id="file_{0:d}">{1:s}</caption>)", count,
                       identifier)
             << '\n'
             << "    <tr>\n";
        if (!opts.no_fn_epub)
        {
            cout << format(R"(      <th id="file_path_{0:d}">{1:s}</th>)",
                           count,
                           translate("File path (in EPUB file)").str().c_str())
                 << '\n';
        }
        cout << format(R"(      <th id="headline_{0:d}">{1:s}</th>)", count,
                       translate("Last headline").str().c_str())
             << '\n'
             << format(R"(      <th id="page_{0:d}">{1:s}</th>)", count,
                       translate("Page number").str().c_str())
             << '\n'
             << format(R"(      <th id="match_{0:d}">{1:s}</th>)", count,
                       translate("Match").str().c_str())
             << "\n    </tr>\n";
        for (const auto &match : matches)
        {
            const auto lang{[&match]
                            {
                                if (!match.language.empty())
                                {
                                    return format(R"( lang="{0:s}")",
                                                  match.language);
                                }
                                return std::string{};
                            }()};
            cout << "    <tr>\n";
            if (!opts.no_fn_epub)
            {
                cout << format(
                    R"(      <td headers="file_path_{0:d}">{1:s}</td>)", count,
                    match.filepath_inside)
                     << '\n';
            }
            cout << format(
                R"(      <td headers="headline_{0:d}"{1:s}>{2:s}</td>)", count,
                lang, match.headline)
                 << '\n';
            cout << format(R"(      <td headers="page_{0:d}">{1:s}</td>)",
                           count, match.page)
                 << '\n';
            cout << format(R"(      <td headers="match_{0:d}"{1:s}>{2:s})"
                           R"(<strong>{3:s}</strong>{4:s}</td>)",
                           count, lang, match.context.first, match.text,
                           match.context.second)
                 << '\n';
            cout << "    </tr>\n";
        }
        cout << "  </table>\n</article>\n\n";
        ++count;
    }
    cout << "</body></html>\n";
 }
 } // namespace epubgrep::output
--- a/src/output.hpp
+++ b/src/output.hpp
@ -25,11 +25,17 @@
 namespace epubgrep::output
 {
 // Print the matches of an EPUB.
 void print_matches(const std::vector<search::match> &matches,
                   const options::options &opts, bool single_file);
 //! Print all matches as JSON.
 void json_all(const std::vector<std::vector<search::match>> &matches_all);
 //! Print all matches as HTML.
 void html_all(const std::vector<std::vector<search::match>> &matches_all,
              const options::options &opts);
 } // namespace epubgrep::output
 #endif // EPUBGREP_OUTPUT_HPP
--- a/src/search.cpp
+++ b/src/search.cpp
@ -16,6 +16,7 @@
 #include "search.hpp"
 #include "book.hpp"
 #include "fs-compat.hpp"
 #include "helpers.hpp"
 #include "log.hpp"
@ -27,6 +28,8 @@
 #include <algorithm>
 #include <array>
 #include <iterator>
 #include <memory>
 #include <string>
 #include <string_view>
 #include <vector>
@ -41,8 +44,8 @@ std::vector<match> search(const fs::path &filepath,
                          const std::string_view regex, const settings &opts)
 {
    LOG(log::sev::info)
-        << format(R"(Starting search in {0:s} using regex "{1:s}")", filepath,
+        << format(R"(Starting search in {0:s} using regex "{1:s}")",
-                  regex);
+                  filepath.c_str(), regex);
    boost::regex::flag_type flags{};
    switch (opts.regex)
@ -71,119 +74,37 @@ std::vector<match> search(const fs::path &filepath,
    const boost::regex re(regex.data(), flags);
    std::vector<match> matches;
-    std::vector<string> epub_filepaths{[&opts, &filepath]
+    auto book{book::read(filepath, opts.raw)};
-                                       {
+    for (const auto &file : book.files)
                                           if (!opts.raw)
                                           {
                                               return zip::list_spine(filepath);
                                           }
                                           return zip::list(filepath);
                                       }()};
    for (const auto &entry : epub_filepaths)
    {
-        DEBUGLOG << "Processing " << entry;
+        const auto &doc{file.second};
-        auto document{zip::read_file(filepath, entry)};
+        string::const_iterator begin{doc.text->begin()};
-        if (!opts.raw)
+        string::const_iterator end{doc.text->end()};
-        {
+        auto begin_text{begin};
            cleanup_text(document);
            document = helpers::unescape_html(document);
        }
        string::const_iterator begin{document.begin()};
        string::const_iterator end{document.end()};
        boost::match_results<string::const_iterator> match_result;
        string last_headline;
        string last_page;
        while (boost::regex_search(begin, end, match_result, re,
                                   boost::match_default))
        {
            match match; // FIXME: Rename variable or struct.
            match.filepath_epub = filepath;
-            match.filepath_inside = entry;
+            match.filepath_inside = file.first;
            match.text = match_result[0];
            match.context = context(match_result, opts.context);
-            const auto current_headline{headline(match_result.prefix().str())};
+            const auto pos = static_cast<size_t>(
-            if (!current_headline.empty())
+                std::distance(begin_text, match_result[0].begin()));
-            {
+            match.headline = headline(doc, pos);
-                last_headline = current_headline;
+            match.page = page(doc, pos);
-            }
+            match.language = doc.language; // FIXME: Get language of match.
            match.headline = last_headline;
            const auto current_page{page(match_result.prefix().str())};
            if (!current_page.empty())
            {
                last_page = current_page;
            }
            match.page = last_page;
            matches.emplace_back(match);
-            begin = match_result[0].second;
+            begin = match_result[0].end();
        }
    }
    return matches;
 }
 void cleanup_text(string &text)
 {
    static const boost::regex re_header_start{"<[hH][1-6]"};
    static const boost::regex re_header_end{"</[hH][1-6]"};
    static const boost::regex re_pagebreak{".+pagebreak.+(title|aria-label)"
                                           "=\"([[:alnum:]]+)\".*"};
    size_t pos{};
    while ((pos = text.find('<', pos)) != string::npos)
    {
        // Mark headlines. We need them later on.
        string replacement;
        if (boost::regex_match(text.substr(pos, 3), re_header_start))
        {
            replacement = "<H>";
        }
        else if (boost::regex_match(text.substr(pos, 4), re_header_end))
        {
            replacement = "</H>";
        }
        else if (text.substr(pos, 6) == "<span ")
        {
            auto endpos{text.find('>')};
            boost::match_results<const char *> match;
            if (boost::regex_search(text.substr(pos, endpos).data(), match,
                                    re_pagebreak))
            {
                replacement = "<PAGE " + match[2] + ">";
            }
        }
        else if (text.substr(pos, 7) == "<style "
                 || text.substr(pos, 8) == "<script ")
        {
            pos = text.find('>', pos) + 1;
        }
        text.replace(pos, text.find('>', pos) + 1 - pos, replacement);
        pos += replacement.length();
    }
    pos = 0;
    while ((pos = text.find('\r', pos)) != string::npos)
    {
        text.erase(pos, 1);
    }
    pos = 0;
    while ((pos = text.find_first_of("\n\t", pos)) != string::npos)
    {
        text.replace(pos, 1, " ");
    }
    pos = 0;
    while ((pos = text.find("  ", pos)) != string::npos)
    {
        text.replace(pos, 2, " ");
    }
 }
 match_context context(const boost::match_results<string::const_iterator> &match,
                      std::uint64_t words)
 {
@ -212,12 +133,10 @@ match_context context(const boost::match_results<string::const_iterator> &match,
            pos_before = std::find_first_of(pos_before, rend_before,
                                            whitespace.begin(),
                                            whitespace.end());
-            if (pos_before != rend_before)
+            while (pos_before != rend_before
                   && helpers::is_whitespace(*pos_before))
            {
-                while (helpers::is_whitespace(*pos_before))
+                ++pos_before;
                {
                    ++pos_before;
                }
            }
        }
@ -226,20 +145,17 @@ match_context context(const boost::match_results<string::const_iterator> &match,
            pos_after = std::find_first_of(pos_after, end_after,
                                           whitespace.begin(),
                                           whitespace.end());
-            if (pos_after != end_after)
+            while (pos_after != end_after && helpers::is_whitespace(*pos_after))
            {
-                while (helpers::is_whitespace(*pos_after))
+                ++pos_after;
                {
                    ++pos_after;
                }
            }
        }
        words -= 1;
    }
-    const std::string before_reversed(rbegin_before, pos_before);
+    const string before_reversed(rbegin_before, pos_before);
    string before(before_reversed.rbegin(), before_reversed.rend());
-    std::string after(begin_after, pos_after);
+    string after(begin_after, pos_after);
    while (helpers::is_whitespace(*before.begin()))
    {
        before.erase(0, 1);
@ -252,39 +168,4 @@ match_context context(const boost::match_results<string::const_iterator> &match,
    return {before, after};
 }
 string headline(const std::string_view prefix)
 {
    size_t pos{prefix.length()};
    if ((pos = prefix.rfind("<H>", pos)) != std::string_view::npos)
    {
        pos += 3;
        string result{prefix.substr(pos, prefix.find('<', pos) - pos)};
        while (helpers::is_whitespace(*result.begin()))
        {
            result.erase(0, 1);
        }
        while (helpers::is_whitespace(*result.rbegin()))
        {
            result.erase(result.size() - 1);
        }
        return result;
    }
    return {};
 }
 string page(const std::string_view prefix)
 {
    size_t pos{prefix.length()};
    while ((pos = prefix.rfind("<PAGE ", pos)) != std::string_view::npos)
    {
        pos += 6;
        return string{prefix.substr(pos, prefix.find('>', pos) - pos)};
    }
    return {};
 }
 } // namespace epubgrep::search
--- a/src/search.hpp
+++ b/src/search.hpp
@ -22,7 +22,9 @@
 #include <boost/regex.hpp>
 #include <cstddef>
 #include <cstdint>
 #include <map>
 #include <string>
 #include <string_view>
 #include <utility>
@ -41,7 +43,8 @@ struct match
    std::string filepath_inside; //!< The file path of the matched line.
    std::string headline;        //!< The last headline, if available.
    std::string page;            //!< The page number, if available.
-};
+    std::string language;        //!< Match language.
 } __attribute__((aligned(128)));
 struct settings
 {
@ -50,27 +53,25 @@ struct settings
    bool ignore_case{false};
    bool raw{false};
    std::uint64_t context{0};
-};
+} __attribute__((aligned(16)));
 struct file_in_epub
 {
    std::string text;
    std::map<size_t, std::string> headlines;
    std::map<size_t, std::string> pages;
 } __attribute__((aligned(128)));
 //! Search file, return matches.
 [[nodiscard]] std::vector<match> search(const fs::path &filepath,
                                        std::string_view regex,
                                        const settings &opts);
 //! Strip HTML, remove newlines, condense spaces.
 void cleanup_text(std::string &text);
 //! Return words before and after the match.
 [[nodiscard]] match_context
 context(const boost::match_results<std::string::const_iterator> &match,
        std::uint64_t words);
 //! Return last headline if possible.
 [[nodiscard]] std::string headline(std::string_view prefix);
 //! Return current page if possible.
 [[nodiscard]] std::string page(std::string_view prefix);
 } // namespace epubgrep::search
 #endif // EPUBGREP_SEARCH_HPP
--- a/src/zip.cpp
+++ b/src/zip.cpp
@ -25,7 +25,6 @@
 #include <boost/locale/message.hpp>
 #include <fmt/format.h>
 #include <fmt/ostream.h> // For compatibility with fmt 4.
 #include <pugixml.hpp>
 #include <cstdlib>
 #include <cstring>
@ -56,8 +55,8 @@ std::vector<std::string> list(const fs::path &filepath)
                << format(translate("File in {0:s} is damaged. "
                                    "Skipping in-EPUB file.\n")
                              .str()
-                              .data(),
+                              .c_str(),
-                          filepath);
+                          filepath.c_str());
            continue;
        }
        toc.emplace_back(in_epub_filepath);
@ -85,7 +84,7 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)
                                    "Skipping in-EPUB file.\n")
                              .str()
                              .data(),
-                          filepath);
+                          filepath.c_str());
            continue;
        }
        if (std::strcmp(path, entry_path.data()) == 0)
@ -100,9 +99,9 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)
            {
                close_file(zipfile, filepath);
-                throw exception{
+                throw exception{format(
-                    format(translate("Could not read {0:s} in {1:s}.").str(),
+                    translate("Could not read {0:s} in {1:s}.").str().c_str(),
-                           entry_path, filepath.string())};
+                    entry_path, filepath.string())};
            }
            close_file(zipfile, filepath);
@ -116,7 +115,7 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)
    if (entry_path == "META-INF/container.xml")
    { // File is probably not an EPUB.
-        exception e{format(translate("{0:s} not found in {1:s}.").str(),
+        exception e{format(translate("{0:s} not found in {1:s}.").str().c_str(),
                           entry_path, filepath.string())};
        e.code = 1;
        throw exception{e};
@ -146,7 +145,7 @@ struct archive *open_file(const fs::path &filepath)
    {
        close_file(zipfile, filepath);
-        exception e{format(translate("Could not open {0:s}.").str(),
+        exception e{format(translate("Could not open {0:s}.").str().c_str(),
                           filepath.string())};
        e.code = 1;
        throw exception{e};
@ -160,84 +159,10 @@ void close_file(struct archive *zipfile, const fs::path &filepath)
    auto result{archive_read_free(zipfile)};
    if (result != ARCHIVE_OK)
    {
-        throw exception{format(translate("Could not close {0:s}.").str(),
+        throw exception{
-                               filepath.string())};
+            format(translate("Could not close {0:s}.").str().c_str(),
                   filepath.string())};
    }
 }
 std::vector<std::string> list_spine(const fs::path &filepath)
 {
    const fs::path opf_file_path{
        [&filepath]
        {
            pugi::xml_document xml;
            const std::string container{
                read_file(filepath, "META-INF/container.xml")};
            const auto result{xml.load_buffer(&container[0], container.size())};
            if (result)
            {
                return xml.child("container")
                    .child("rootfiles")
                    .first_child()
                    .attribute("full-path")
                    .value();
            }
            LOG(log::sev::error) << result.description() << '\n';
            return "";
        }()};
    std::vector<std::string> spine_filepaths;
    if (!opf_file_path.empty())
    {
        DEBUGLOG << "Parsing " << opf_file_path;
        pugi::xml_document xml;
        const std::string opf_file{read_file(filepath, opf_file_path.string())};
        const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
        if (result)
        {
            auto manifest{xml.child("package").child("manifest")};
            if (manifest == nullptr)
            {
                manifest = xml.child("opf:package").child("opf:manifest");
            }
            auto spine{xml.child("package").child("spine")};
            if (spine == nullptr)
            {
                spine = xml.child("opf:package").child("opf:spine");
            }
            for (const auto &itemref : spine)
            {
                const auto &idref{itemref.attribute("idref").value()};
                const auto &item{manifest.find_child_by_attribute("id", idref)};
                auto href{helpers::urldecode(item.attribute("href").value())};
                if (href[0] != '/')
                {
                    href = (opf_file_path.parent_path() /= href);
                }
                DEBUGLOG << "Found in spine: " << href;
                spine_filepaths.emplace_back(href);
            }
        }
        else
        {
            LOG(log::sev::error) << "XML: " << result.description() << '\n';
        }
    }
    if (opf_file_path.empty() || spine_filepaths.empty())
    {
        LOG(log::sev::error)
            << format(translate("{0:s} is damaged. Could not read spine. "
                                "Skipping file.\n")
                          .str()
                          .data(),
                      filepath);
        return {};
    }
    return spine_filepaths;
 }
 } // namespace epubgrep::zip
--- a/src/zip.hpp
+++ b/src/zip.hpp
@ -43,9 +43,6 @@ namespace epubgrep::zip
 //! Close zip file.
 void close_file(struct archive *zipfile, const fs::path &filepath);
 //! Returns the files in the EPUB “spine” (all pages that are actually text).
 [[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
 //! It's std::runtime_error, but with another name.
 class exception : public std::runtime_error
 {
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,5 +1,3 @@
 include(CTest)
 file(GLOB sources_tests CONFIGURE_DEPENDS test_*.cpp)
 file(COPY "test.zip" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 file(COPY "test.epub2" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
@ -7,11 +5,16 @@ file(COPY "test.epub3" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 find_package(Catch2 CONFIG)
-if(Catch2_FOUND)                # Catch 2.x
+if(Catch2_FOUND)                # Catch 2.x / 3.x
  include(Catch)
  add_executable(all_tests main.cpp ${sources_tests})
-  target_link_libraries(all_tests
+  if(TARGET Catch2::Catch2WithMain) # Catch 3.x
-    PRIVATE Catch2::Catch2 ${PROJECT_NAME}_lib)
+    target_link_libraries(all_tests
      PRIVATE Catch2::Catch2WithMain ${PROJECT_NAME}_lib)
  else()                        # Catch 2.x
    target_link_libraries(all_tests
      PRIVATE Catch2::Catch2 ${PROJECT_NAME}_lib)
  endif()
  target_include_directories(all_tests PRIVATE "/usr/include/catch2")
  catch_discover_tests(all_tests EXTRA_ARGS "${EXTRA_TEST_ARGS}")
 else()                          # Catch 1.x
--- a/tests/main.cpp
+++ b/tests/main.cpp
@ -1,3 +1,8 @@
 #define CATCH_CONFIG_MAIN
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
--- a/tests/test_helpers.cpp
+++ b/tests/test_helpers.cpp
@ -1,7 +1,12 @@
 #include "fs-compat.hpp"
 #include "helpers.hpp"
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
 #include <array>
 #include <exception>
--- a/tests/test_search_epub.cpp
+++ b/tests/test_search_epub.cpp
@ -2,7 +2,12 @@
 #include "options.hpp"
 #include "search.hpp"
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
 #include <clocale>
 #include <exception>
@ -53,7 +58,7 @@ SCENARIO("Searching EPUB files works")
            {
                try
                {
-                    opts.raw = 1;
+                    opts.raw = true;
                    opts.context = 1;
                    matches = epubgrep::search::search(epubfile, "href", opts);
                }
@ -88,8 +93,7 @@ SCENARIO("Searching EPUB files works")
    GIVEN("Our test EPUB3 file")
    {
        fs::path epubfile{"test.epub3"};
-        std::setlocale(LC_CTYPE,
+        std::setlocale(LC_CTYPE, ""); // Needed for utf-8 support in libarchive.
                       ""); // Needed for utf-8 support in libarchive.
        bool exception{false};
        REQUIRE(fs::exists(epubfile));
@ -128,7 +132,7 @@ SCENARIO("Searching EPUB files works")
                try
                {
                    opts.raw = true;
-                    opts.context = true;
+                    opts.context = 1;
                    matches = epubgrep::search::search(epubfile, "href", opts);
                }
                catch (const std::exception &)
@ -161,6 +165,32 @@ SCENARIO("Searching EPUB files works")
                               R"(media-type="application/xhtml+xml")");
                }
            }
            WHEN("We search for for a phrase at the beginning of the file "
                 "and specify a very high context")
            {
                try
                {
                    opts.context = 69069;
                    matches = epubgrep::search::search(epubfile, "Test for",
                                                       opts);
                }
                catch (const std::exception &)
                {
                    exception = true;
                }
                THEN("No exception is thrown")
                AND_THEN("It returns the match correctly")
                {
                    REQUIRE_FALSE(exception);
                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
                    REQUIRE(matches.at(0).text == "Test for");
                    REQUIRE(matches.at(0).headline.empty());
                    REQUIRE(matches.at(0).context.first.empty());
                    REQUIRE(*matches.at(0).context.second.rbegin() == '.');
                }
            }
        }
    }
 }
--- a/tests/test_search_helpers.cpp
+++ b/tests/test_search_helpers.cpp
@ -1,7 +1,13 @@
 #include "book.hpp"
 #include "fs-compat.hpp"
 #include "search.hpp"
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
 #include <clocale>
 #include <exception>
@ -26,7 +32,7 @@ SCENARIO("Searching helpers work as intended")
                text = "Moss";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -46,7 +52,7 @@ SCENARIO("Searching helpers work as intended")
                text = "💖\r\r🦝";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -54,7 +60,7 @@ SCENARIO("Searching helpers work as intended")
                }
                THEN("No exception is thrown")
-                AND_THEN("The \\r are removed unchanged")
+                AND_THEN("The \\r are removed")
                {
                    REQUIRE_FALSE(exception);
                    REQUIRE(text == "💖🦝");
@ -66,7 +72,7 @@ SCENARIO("Searching helpers work as intended")
                text = "Moss\n\n\n\n\n\nis good.";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -91,8 +97,8 @@ SCENARIO("Searching helpers work as intended")
                text = "… <h3>Soup</h3> …";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    auto file{epubgrep::book::process_page(text)};
-                    text = epubgrep::search::headline(text);
+                    text = epubgrep::book::headline(file, text.size());
                }
                catch (const std::exception &)
                {
@ -113,8 +119,8 @@ SCENARIO("Searching helpers work as intended")
                       "road to nowhere</h2> …";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    auto file{epubgrep::book::process_page(text)};
-                    text = epubgrep::search::headline(text);
+                    text = epubgrep::book::headline(file, text.size());
                }
                catch (const std::exception &)
                {
@ -134,8 +140,8 @@ SCENARIO("Searching helpers work as intended")
                text = "<html><hr>The long<section>road to nowhere</section>";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    auto file{epubgrep::book::process_page(text)};
-                    text = epubgrep::search::headline(text);
+                    text = epubgrep::book::headline(file, text.size());
                }
                catch (const std::exception &)
                {
@ -160,8 +166,8 @@ SCENARIO("Searching helpers work as intended")
                text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    auto file{epubgrep::book::process_page(text)};
-                    text = epubgrep::search::page(text);
+                    text = epubgrep::book::page(file, text.size());
                }
                catch (const std::exception &)
                {
@ -181,8 +187,8 @@ SCENARIO("Searching helpers work as intended")
                text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    auto file{epubgrep::book::process_page(text)};
-                    text = epubgrep::search::page(text);
+                    text = epubgrep::book::page(file, text.size());
                }
                catch (const std::exception &)
                {
--- a/tests/test_search_zip.cpp
+++ b/tests/test_search_zip.cpp
@ -2,10 +2,16 @@
 #include "options.hpp"
 #include "search.hpp"
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
 #include <clocale>
 #include <exception>
 #include <iostream>
 #include <string>
 #include <vector>
@ -32,8 +38,9 @@ SCENARIO("Searching ZIP files works")
                    opts.regex = epubgrep::options::regex_kind::extended;
                    matches = epubgrep::search::search(zipfile, "📙+\\w?", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }
@ -53,8 +60,9 @@ SCENARIO("Searching ZIP files works")
                    opts.context = 1;
                    matches = epubgrep::search::search(zipfile, "📗", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }
@ -78,8 +86,9 @@ SCENARIO("Searching ZIP files works")
                    matches = epubgrep::search::search(zipfile, R"([ \n])",
                                                       opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }
@ -114,12 +123,13 @@ SCENARIO("Searching ZIP files works")
                try
                {
                    opts.context = 1;
-                    opts.regex = epubgrep::options::regex_kind::extended;
+                    opts.regex = epubgrep::options::regex_kind::perl;
                    matches = epubgrep::search::search(
                        zipfile, R"(work\s[\w]+\.\W[\w']+\Wstay)", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }
--- a/tests/test_zip.cpp
+++ b/tests/test_zip.cpp
@ -1,7 +1,12 @@
 #include "fs-compat.hpp"
 #include "zip.hpp"
-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
 #if __has_include(<catch.hpp>)
 #    include <catch.hpp>
 #else
 #    include <catch_all.hpp>
 #endif
 #include <clocale>
 #include <exception>
--- a/translations/de.po
+++ b/translations/de.po
@ -1,22 +1,28 @@
 msgid ""
 msgstr ""
-"Project-Id-Version: epubgrep 0.5.0\n"
+"Project-Id-Version: epubgrep 0.6.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-06-02 09:11+0200\n"
+"POT-Creation-Date: 2021-08-20 17:06+0200\n"
-"PO-Revision-Date: 2021-06-02 09:12+0200\n"
+"PO-Revision-Date: 2021-08-20 17:07+0200\n"
 "Last-Translator: tastytea <tastytea@tastytea.de>\n"
 "Language-Team: tastytea <https://schlomp.space/tastytea/epubgrep>\n"
 "Language: de\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 2.4.3\n"
+"X-Generator: Poedit 3.0\n"
 "X-Poedit-Basepath: ..\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 "X-Poedit-SourceCharset: UTF-8\n"
 "X-Poedit-KeywordsList: translate\n"
 "X-Poedit-SearchPath-0: .\n"
 # „Spine“ ist ein Fachbegriff, daher habe ich ihn nicht übersetzt.
 #: src/book.cpp:284
 msgid "{0:s} is damaged. Could not read spine. Skipping file.\n"
 msgstr ""
 "{0:s} ist beschädigt. Konnte „Spine“ nicht lesen. Überspringe Datei.\n"
 #: src/log.cpp:70
 msgid "WARNING"
 msgstr "WARNUNG"
@ -29,21 +35,29 @@ msgstr "FEHLER"
 msgid "FATAL ERROR"
 msgstr "SCHWERER FEHLER"
-#: src/main.cpp:79
+#: src/main.cpp:83
 msgid " (while parsing options)"
 msgstr " (während Optionen interpretiert wurden)"
-#: src/main.cpp:125
+#: src/main.cpp:129
 msgid "Could not open {0:s}: {1:s}"
 msgstr "Konnte {0:s} nicht öffnen: {1:s}"
-#: src/main.cpp:174
+#: src/main.cpp:179
 msgid " (while opening {0:s})"
 msgstr " (während {0:s} durchsucht wurde)"
-#: src/options.cpp:52
+#: src/main.cpp:237
-msgid "Available options"
+msgid "{0:d} of {1:d} books searched."
-msgstr "Verfügbare Optionen"
+msgstr "{0:d} von {1:d} Büchern durchsucht."
 #: src/main.cpp:241
 msgid "All books searched."
 msgstr "Alle Bücher durchsucht."
 #: src/options.cpp:53
 msgid "General options"
 msgstr "Allgemeine Optionen"
 #: src/options.cpp:56
 msgid "Display this help and exit."
@ -54,55 +68,76 @@ msgid "Display version information and exit."
 msgstr "Versionsinformationen ausgeben und beenden."
 #: src/options.cpp:60
 msgid "Enable debug output."
 msgstr "Debug-Ausgabe einschalten."
 #: src/options.cpp:63
 msgid "Search options"
 msgstr "Suchoptionen"
 #: src/options.cpp:66
 msgid "PATTERN is a basic regular expression (default)."
 msgstr "MUSTER ist eine „basic regular expression“ (standard)."
-#: src/options.cpp:63
+#: src/options.cpp:69
 msgid "PATTERN is an extended regular expression."
 msgstr "MUSTER ist eine „extended regular expression“."
-#: src/options.cpp:65
+#: src/options.cpp:71
 msgid "Use grep-variation of regular expressions with -G and -E."
 msgstr "Benutze grep-Variante von regulären ausdrücken mit -G und -E."
-#: src/options.cpp:68
+#: src/options.cpp:74
 msgid "PATTERN is a Perl regular expression."
 msgstr "MUSTER ist ein regulärer Ausdruck, wie Perl ihn akzeptiert."
-#: src/options.cpp:70
+#: src/options.cpp:77
 msgid "Ignore case distinctions in pattern and data."
 msgstr "Unterschied zwischen Groß- und Kleinschreibung ignorieren."
-#: src/options.cpp:73
+#: src/options.cpp:80
 msgid "PATTERN"
 msgstr "MUSTER"
 #: src/options.cpp:74
 msgid "Use additional PATTERN for matching."
 msgstr "Benutze zusätzliches MUSTER zum Abgleich."
 #: src/options.cpp:76
 msgid "Do not clean up text before searching."
 msgstr "Nicht den Text vor dem suchen säubern."
-#: src/options.cpp:78
+#: src/options.cpp:82
 msgid "Read all files under each directory, recursively."
 msgstr "Lies rekursiv alle Dateien unter jedem Verzeichnis."
 #: src/options.cpp:85
 msgid "Read all files under each directory, recursively, following symlinks."
 msgstr ""
 "Lies rekursiv alle Dateien unter jedem Verzeichnis und folge dabei symlinks."
 #: src/options.cpp:88
 msgid "PATTERN"
 msgstr "MUSTER"
 #: src/options.cpp:89
 msgid "Use additional PATTERN for matching."
 msgstr "Benutze zusätzliches MUSTER zum Abgleich."
 #: src/options.cpp:92
 msgid "Output options"
 msgstr "Ausgabeoptionen"
 #: src/options.cpp:95 src/options.cpp:113
 msgid "NUMBER"
 msgstr "ANZAHL"
-#: src/options.cpp:79
+#: src/options.cpp:96
 msgid "Print NUMBER words of context around matches."
 msgstr "ANZAHL Wörter an Kontext um die Treffer herum ausgeben."
-#: src/options.cpp:81
+#: src/options.cpp:98
 msgid "Turn off colors and other decorations."
 msgstr "Schalte Farben und andere Dekorationen aus."
 # Bezieht sich auf --no-filename.
-#: src/options.cpp:83
+#: src/options.cpp:100
 msgid "WHICH"
 msgstr "WELCHE"
-#: src/options.cpp:84
+#: src/options.cpp:101
 msgid ""
 "Suppress the mentioning of file names on output. WHICH is ‘filesystem’, ‘in-"
 "epub’ or ‘all’."
@ -110,32 +145,31 @@ msgstr ""
 "Unterdrücke die Erwähnung der Dateinamens in der Ausgabe. WELCHE kann "
 "‚filesystem‘, ‚in-epub‘ or ‚all‘ sein."
-#: src/options.cpp:87
+#: src/options.cpp:104
 msgid "Read all files under each directory, recursively."
 msgstr "Lies rekursiv alle Dateien unter jedem Verzeichnis."
 #: src/options.cpp:90
 msgid "Read all files under each directory, recursively, following symlinks."
 msgstr ""
 "Lies rekursiv alle Dateien unter jedem Verzeichnis und folge dabei symlinks."
 #: src/options.cpp:93
 msgid "Ignore errors about wrong file formats."
 msgstr "Ignoriere Fehlermeldungen wegen des falschen Dateiformats."
-#: src/options.cpp:95
+#: src/options.cpp:106
 msgid "Enable debug output."
 msgstr "Debug-Ausgabe einschalten."
 #: src/options.cpp:97
 msgid "Output JSON instead of plain text."
 msgstr "Gib JSON statt Klartext aus."
-#: src/options.cpp:138
+#: src/options.cpp:108
-msgid "Usage: epubgrep [OPTION]… PATTERN [FILE]…\n"
+msgid "Output HTML instead of plain text."
-msgstr "Aufruf: epubgrep [OPTION]… MUSTER [DATEI]…\n"
+msgstr "Output HTML instead of plain text."
-#: src/options.cpp:140
+#: src/options.cpp:110
 msgid "Output status message every STATUS-INTERVAL seconds."
 msgstr "Gebe alle STATUS-INTERVAL Sekunden eine Statusmeldung aus."
 #: src/options.cpp:114
 msgid "Set status message interval to NUMBER seconds."
 msgstr "Setze Intervall für Statusmeldungen auf ANZAHL Sekunden."
 #: src/options.cpp:162
 msgid "Usage: epubgrep [OPTION]… PATTERN FILE…\n"
 msgstr "Aufruf: epubgrep [OPTION]… MUSTER DATEI…\n"
 #: src/options.cpp:164
 msgid ""
 "\n"
 "You can access the full manual with `man epubgrep`.\n"
@ -143,7 +177,7 @@ msgstr ""
 "\n"
 "Du kannst mit `man epubgrep` auf das vollständige Handbuch zugreifen.\n"
-#: src/options.cpp:147
+#: src/options.cpp:171
 msgid ""
 "Copyright © 2021 tastytea <tastytea@tastytea.de>\n"
 "License AGPL-3.0-only <https://gnu.org/licenses/agpl.html>.\n"
@ -155,32 +189,51 @@ msgstr ""
 "Für dieses Programm besteht KEINERLEI GARANTIE. Dies ist freie Software,\n"
 "die Sie unter bestimmten Bedingungen weitergeben dürfen.\n"
-#: src/output.cpp:51
+#: src/output.cpp:47
 msgid "  In {0:s}: \n"
 msgstr "  In {0:s}:\n"
-#: src/zip.cpp:56 src/zip.cpp:84
+# Sprache der Benutzeroberfläche.
 #: src/output.cpp:145
 msgid "en"
 msgstr "de"
 #: src/output.cpp:157
 msgid "File {0:d}"
 msgstr "Datei {0:d}"
 #: src/output.cpp:172
 msgid "File path (in EPUB file)"
 msgstr "Dateipfad (innerhalb der EPUB Datei)"
 #: src/output.cpp:176
 msgid "Last headline"
 msgstr "Letzte Überschrift"
 #: src/output.cpp:179
 msgid "Page number"
 msgstr "Seitennummer"
 #: src/output.cpp:182
 msgid "Match"
 msgstr "Treffer"
 #: src/zip.cpp:55 src/zip.cpp:83
 msgid "File in {0:s} is damaged. Skipping in-EPUB file.\n"
 msgstr "Datei in {0:s} ist beschädigt. Überspringe Datei in der EPUB.\n"
-#: src/zip.cpp:104
+#: src/zip.cpp:103
 msgid "Could not read {0:s} in {1:s}."
 msgstr "Konnte {0:s} in {1:s} nicht lesen."
-#: src/zip.cpp:119 src/zip.cpp:126
+#: src/zip.cpp:118 src/zip.cpp:125
 msgid "{0:s} not found in {1:s}."
 msgstr "{0:s} nicht gefunden in {1:s}."
-#: src/zip.cpp:149
+#: src/zip.cpp:148
 msgid "Could not open {0:s}."
 msgstr "Konnte {0:s} nicht öffnen."
-#: src/zip.cpp:163
+#: src/zip.cpp:162
 msgid "Could not close {0:s}."
 msgstr "Konnte {0:s} nicht schließen."
 # „Spine“ ist ein Fachbegriff, daher habe ich ihn nicht übersetzt.
 #: src/zip.cpp:232
 msgid "{0:s} is damaged. Could not read spine. Skipping file.\n"
 msgstr ""
 "{0:s} ist beschädigt. Konnte „Spine“ nicht lesen. Überspringe Datei.\n"
Author	SHA1	Message	Date
tastytea	449e315397	add performance section to readme continuous-integration/drone/push Build is passing Details	2022-10-01 20:41:23 +02:00
tastytea	7eae29031f	disable cmake-format for now	2022-08-30 23:04:22 +02:00
tastytea	531a409124	clang-tidy: change MinimumVariableNameLength to 2	2022-08-19 01:41:34 +02:00
tastytea	22a50ef661	up the cognitive threshold to 30 continuous-integration/drone/push Build is passing Details	2022-08-16 21:42:13 +02:00
tastytea	94555621d8	fix release upload continuous-integration/drone/push Build is passing Details	2022-08-16 19:14:19 +02:00
tastytea	cfe274f1e1	fix tests (copy paste error) continuous-integration/drone/push Build was killed Details	2022-08-16 19:03:08 +02:00
tastytea	eb4630d738	version bump 0.6.2 continuous-integration/drone/push Build was killed Details botched the 0.6.1 release 😅	2022-08-16 18:38:04 +02:00
tastytea	bbc412db45	add support for testing with catch 3 continuous-integration/drone/push Build was killed Details	2022-08-16 18:35:00 +02:00
tastytea	c0a2f7e779	pass c strings to fmt (…) continuous-integration/drone/push Build was killed Details	2022-08-16 18:15:21 +02:00
tastytea	4b5e6898cd	pass c strings to fmt (and one more) continuous-integration/drone/push Build was killed Details	2022-08-16 18:10:15 +02:00
tastytea	c16265683f	pass c strings to fmt (found some more) continuous-integration/drone/push Build is failing Details	2022-08-16 17:59:03 +02:00
tastytea	d438e2292f	pass c strings to fmt (forgot some) continuous-integration/drone/push Build is passing Details	2022-08-16 17:42:42 +02:00
tastytea	089eac4cfc	CI: install file on Debian and Ubuntu for .dev generation	2022-08-16 17:36:53 +02:00
tastytea	63a8ab2683	pass c strings to fmt continuous-integration/drone/push Build is passing Details boost strings and filesystem paths used to be automatically converted, but that doesn't happen anymore with fmt 9	2022-08-16 16:26:17 +02:00
tastytea	cd03898039	update .clang-tidy continuous-integration/drone/push Build is passing Details	2022-08-16 05:30:59 +02:00
tastytea	550a1143a5	Don't install useless asciidoc dependencies.	2021-12-22 20:22:16 +01:00
tastytea	d1083b7dca	CI: Fix dependencies.	2021-08-21 00:39:03 +02:00
tastytea	1058903def	Add more information about RPMs to readme. continuous-integration/drone/push Build is passing Details	2021-08-21 00:14:04 +02:00
tastytea	5d28b1f4ef	CI: Modify zypper repos more elegantly.	2021-08-20 21:54:22 +02:00
tastytea	bb37e53207	CI: refresh zypper data, resolve build dir conflict. continuous-integration/drone/push Build is passing Details	2021-08-20 21:41:04 +02:00
tastytea	1bddad7083	CI: Fix openSUSE dependencies. continuous-integration/drone/push Build is failing Details	2021-08-20 21:26:41 +02:00
tastytea	7daade6425	CI: Fix sed command.	2021-08-20 21:10:41 +02:00
tastytea	c41f3a2485	CI: Add package generation for openSUSE Leap 15. continuous-integration/drone/push Build was killed Details	2021-08-20 21:07:16 +02:00
tastytea	3e23dc2cd9	CI: Build apt and zypper steps in parallel. continuous-integration/drone/push Build was killed Details	2021-08-20 20:42:05 +02:00
tastytea	9c6dd5ca64	CI: Add rpm package cache.	2021-08-20 20:41:52 +02:00
tastytea	c62799e00f	CI Add openSUSE with GCC 9. continuous-integration/drone/push Build was killed Details	2021-08-20 20:30:17 +02:00
tastytea	636e84408c	Compile with debug flags and sanitizers in CI.	2021-08-20 18:58:23 +02:00
tastytea	ef77a9e4fb	Make sanitizers optional.	2021-08-20 18:54:27 +02:00
tastytea	552df1a49e	Don't crash if language detection fails. continuous-integration/drone/push Build is passing Details If there is no container.xml or something unexpected happens, we just return an empty string.	2021-08-20 17:51:44 +02:00
tastytea	1e0cde8a4b	Fix test, print exceptions.	2021-08-20 17:38:12 +02:00
tastytea	2bede91fb7	Remove some superfluous “std::”. continuous-integration/drone/push Build is failing Details	2021-08-20 17:07:25 +02:00
tastytea	165592982a	Update german translation.	2021-08-20 17:07:12 +02:00
tastytea	b1dcdea95e	Add language attribute to HTML output. Bug: #16	2021-08-20 17:05:06 +02:00
tastytea	299063e02c	Add language to books, documents and matches. Currently only the book's language is actually read and applied down the line.	2021-08-20 16:57:29 +02:00
tastytea	fca719634a	Move OPF file path detection into own function.	2021-08-20 15:35:10 +02:00
tastytea	d2aff45018	Move spine_filepaths() from zip:: to book::.	2021-08-20 15:29:55 +02:00
tastytea	b134bd0301	Add pointer to preferred text version (raw or cleaned) to document.	2021-08-20 15:07:00 +02:00
tastytea	d0738891c2	Ensure the correct order of files and the TOC. continuous-integration/drone/push Build is passing Details	2021-08-17 14:22:28 +02:00
tastytea	b53e99306c	Re-add support for raw text searching.	2021-08-17 13:55:53 +02:00
tastytea	84ef5d1bf3	Move book processing into own file. continuous-integration/drone/push Build is failing Details	2021-08-17 13:05:14 +02:00
tastytea	97fecd37f0	Revert "Remove generator from CMake presets." – it is required. continuous-integration/drone/push Build is passing Details This reverts commit `49de44f729`.	2021-08-05 20:27:17 +02:00
tastytea	e154b62201	Add “Similar projects” to readme. continuous-integration/drone/push Build is passing Details	2021-07-10 12:12:30 +02:00
tastytea	90eb30fa3e	Add sub-headings for option categories in man page.	2021-07-02 14:26:02 +02:00
tastytea	9cc1823b3b	clang-tidy: Set cognitive complexity threshold to 30. 25 is a bit low with a try-catch-block in a for-loop.	2021-06-29 02:09:40 +02:00
tastytea	2489c444df	Add experimental RPM packe config to CMake config.	2021-06-29 02:00:40 +02:00
tastytea	c99c01162d	Silence some clang-tidy warnings. - Thread-unsafe std::getenv and std::setlocale doesn't matter for us. - It is unlikely that we can make main() less complex without making it more complex elsewhere. - Thread-unsafe std::strerror stays unsolved for now.	2021-06-29 01:58:53 +02:00
tastytea	49de44f729	Remove generator from CMake presets.	2021-06-29 01:22:46 +02:00
tastytea	bdcf153b47	Fix usage quick-help. continuous-integration/drone/push Build is passing Details FILE is not optional.	2021-06-26 15:14:57 +02:00
tastytea	57c87ca5e7	Fix typo in german translation. continuous-integration/drone/push Build is passing Details	2021-06-24 19:20:07 +02:00
tastytea	5c0ca46c9d	Version bump 0.6.0. continuous-integration/drone/push Build is passing Details	2021-06-24 18:48:37 +02:00
tastytea	691dea092d	typo. # Previous commits: # `07ec6f7` Update german translation. # `979dc93` Re-order --help messages. # `961deff` Add --status and --status-interval. # `1cf6306` Add linter-exception for EXIT_FATAL.	2021-06-24 18:48:22 +02:00
tastytea	07ec6f789f	Update german translation.	2021-06-24 18:48:08 +02:00
tastytea	979dc9334c	Re-order --help messages. Divided them into 3 categories: - general - search - output Closes: #12	2021-06-24 18:34:07 +02:00
tastytea	961deff41d	Add --status and --status-interval. --status prints a status message to stderr at regular intervals. --status-interval sets the interval for status messages. Closes: #10	2021-06-24 18:13:40 +02:00
tastytea	1cf6306f4b	Add linter-exception for EXIT_FATAL.	2021-06-24 13:16:18 +02:00
tastytea	822bff1955	Don't crash on regex errors. continuous-integration/drone/push Build is passing Details The error will be reported once for each initial thread. Closes: #14	2021-06-24 13:13:49 +02:00
tastytea	18c3d8f58d	Do not show prefix in output if there is no metadata. Previously epubgrep would output “: match” instead of “match”. Closes: #15	2021-06-24 12:53:09 +02:00
tastytea	b2a6f9217b	Add examples to man page.	2021-06-24 12:34:48 +02:00
tastytea	babd7e4f61	Ignore dap-mode file. dap-mode is an Emacs-mode for debugging, the file describes which debugger to use and how to start the program.	2021-06-22 17:23:26 +02:00
tastytea	ed86f3a56d	Add link to HTML version of of the man page.	2021-06-08 20:35:32 +02:00
tastytea	f8270369b6	Make whitespace-reduction a bit more efficient. continuous-integration/drone/push Build is passing Details We now use 2 passes instead of 3.	2021-06-08 17:30:29 +02:00
tastytea	3966b99c3f	Update german translation.	2021-06-08 17:12:26 +02:00
tastytea	37c2fe1bb1	Add HTML output. continuous-integration/drone/push Build is passing Details Prints a simple HTML document with tables wrapped in articles to stdout. Closes: #9	2021-06-08 17:11:32 +02:00
tastytea	a77b90c8b1	Document output::json_all().	2021-06-08 04:26:29 +02:00
tastytea	42e5e52e1b	Update dependencies-on-debian section in readme. continuous-integration/drone/push Build is passing Details There are more hacks required on bionic.	2021-06-07 15:47:03 +02:00
tastytea	1eb763fc37	Mention bullseye-packages in apt-repo.	2021-06-07 15:08:20 +02:00
tastytea	633274e3dd	Version bump 0.5.1.	2021-06-07 15:05:16 +02:00
tastytea	51f8422929	Make HTML entity map static. continuous-integration/drone/push Build is passing Details Drastically reduces allocations.	2021-06-07 01:20:29 +02:00
tastytea	8b5c0d289e	print_matches(): Don't check file name for each match. We only process one file at a time.	2021-06-07 01:09:37 +02:00
tastytea	7b817c42d1	Remove pseudo-HTML bug from man page.	2021-06-07 01:06:51 +02:00
tastytea	be4d8aecd0	Mixed up types.	2021-06-07 00:44:42 +02:00
tastytea	1c8279f96f	Add test that asks for more context than is available. continuous-integration/drone/push Build is passing Details	2021-06-07 00:05:33 +02:00
tastytea	f59c86e20d	Don't search for whitespace beyond the start/end of the text.	2021-06-06 23:48:06 +02:00
tastytea	0470acb00e	Make --raw work again. continuous-integration/drone/push Build is passing Details	2021-06-06 22:37:09 +02:00
tastytea	1e29608c7e	Fix positioning of matches in search::search().	2021-06-06 22:34:52 +02:00
tastytea	5e89a71e00	Update tests for search::headline() and search::page().	2021-06-06 22:11:45 +02:00
tastytea	9708bb69c8	Don't attempt to access a pointer to nowhere.	2021-06-06 21:34:48 +02:00
tastytea	b8431019b7	Don't inject page numbers and headline-markers into the text. continuous-integration/drone/push Build is failing Details The metadata is recorded in position → data pairs. Closes: #13	2021-06-06 21:26:09 +02:00
tastytea	ebb8b63830	Mention inserted page markers in man page. continuous-integration/drone/push Build is passing Details	2021-06-06 16:14:50 +02:00
tastytea	a49c500d0f	Fix <style> and <script> erasure. I didn't take into account that <script […]/> is possible.	2021-06-06 16:06:14 +02:00
tastytea	262aab6671	Add debug log for replacements.	2021-06-06 15:52:09 +02:00
tastytea	9067b387ef	Fix pagebreak-iterators. Oopsie! 😄	2021-06-06 15:50:13 +02:00
tastytea	99e1cd8e98	Re-enabled address sanitizer. continuous-integration/drone/push Build is passing Details Found out what was wrong: I fed boost::regex_search() the pointer to a substring that was created in-place. match[2] was a pointer to a substring inside that. The problem was, that match was declared outside of the if-block. So after the if-block match[2] would point to a now freed memory address. It didn't have any effects because I didn't use match afterwards. I rewrote the whole thing with iterators. Slightly less readable, slightly better performance (probably).	2021-06-05 17:45:07 +02:00
tastytea	bdf9a86651	Fix pagebreak-regex and range in which pagebreaks are searched.	2021-06-05 17:18:35 +02:00
tastytea	f1a0015f28	Disable address sanitizer. It complains about boost/regex/v5/sub_match.hpp:57:30 and I can't figure out what's wrong or how to ignore it.	2021-06-05 14:24:53 +02:00
tastytea	fbf86f51d1	CMake: Add presets for GCC and clang. # Previous commits: # d630fe9 Make text formatting more readable. # `4026937` Don't return pointer to freed memory address. # `cb2aee8` Add address sanitizer to debug flags. # `4b09158` Add automatic package generation for Debian bullseye (11). # Previous commits: # `12e1c64` Make text formatting more readable. # `4026937` Don't return pointer to freed memory address. # `cb2aee8` Add address sanitizer to debug flags. # `4b09158` Add automatic package generation for Debian bullseye (11).	2021-06-05 13:34:52 +02:00
tastytea	12e1c64fc0	Make text formatting more readable.	2021-06-05 13:34:48 +02:00
tastytea	4026937f08	Don't return pointer to freed memory address.	2021-06-04 23:14:36 +02:00
tastytea	cb2aee847f	Add address sanitizer to debug flags.	2021-06-04 23:08:50 +02:00
tastytea	4b09158037	Add automatic package generation for Debian bullseye (11).	2021-06-04 15:18:32 +02:00
tastytea	226b66b77b	Update maximum supported CMake version. continuous-integration/drone/push Build is passing Details	2021-06-04 14:39:53 +02:00
tastytea	92b8281242	Fix CI recipe. continuous-integration/drone/push Build is passing Details	2021-06-03 16:18:01 +02:00
tastytea	2b51229518	Change ctest verbosity to default. continuous-integration/drone/push Build is failing Details	2021-06-03 15:57:56 +02:00
tastytea	bc34a3a515	Make ctest work from build directory.	2021-06-03 15:56:08 +02:00
tastytea	85a00bb23c	Add CMake presets.	2021-06-03 15:44:31 +02:00
tastytea	d83f485fb4	Ignore CMake user presets. <https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html>	2021-06-03 15:05:42 +02:00
tastytea	7252463fbb	Update rebuild-commands in translator guide. continuous-integration/drone/push Build is passing Details	2021-06-02 16:00:33 +02:00
tastytea	7d8cf7de91	Cosmetic fixes.	2021-06-02 14:49:59 +02:00
tastytea	5af10f6767	Fix configuration file example. continuous-integration/drone/push Build is passing Details	2021-06-02 11:21:27 +02:00
tastytea	d1c74d244d	Update screenshot. continuous-integration/drone/push Build is passing Details	2021-06-02 10:04:27 +02:00