add performance section to readme

disable cmake-format for now
clang-tidy: change MinimumVariableNameLength to 2
2022-10-01 20:41:23 +02:00 · 2022-08-30 23:04:22 +02:00 · 2022-08-19 01:41:34 +02:00 · 2022-08-16 21:42:13 +02:00 · 2022-08-16 19:14:19 +02:00 · 2022-08-16 19:03:08 +02:00
43 changed files with 2998 additions and 445 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -1,5 +1,4 @@
-# -*- mode: conf; fill-column: 100; -*-
-# Written for clang-tidy 11.
+# Written for clang-tidy 14.

 ---
 Checks:         '*,
@ -29,7 +28,9 @@ Checks:         '*,
                -fuchsia-multiple-inheritance,
                -llvmlibc*,
                -cppcoreguidelines-avoid-non-const-global-variables,
-                -cert-*-c'
+                -cert-*-c,
+                -abseil-string-find-*,
+                -altera-*'
 FormatStyle:    file            # Use .clang-format.
 CheckOptions:   # ↓ Clashes with static private member prefix. (static int _var;) ↓
                - { key: readability-identifier-naming.VariableCase,          value: lower_case }
@ -39,9 +40,15 @@ CheckOptions:   # ↓ Clashes with static private member prefix. (static int _va
                - { key: readability-identifier-naming.ProtectedMemberCase,   value: lower_case }
                - { key: readability-identifier-naming.ProtectedMemberPrefix, value: _          }

-                - { key: readability-identifier-naming.ClassCase,             value: lower_case  }
+                - { key: readability-identifier-naming.ClassCase,             value: lower_case }
                - { key: readability-identifier-naming.StructCase,            value: lower_case }
                - { key: readability-identifier-naming.EnumCase,              value: lower_case }
                - { key: readability-identifier-naming.FunctionCase,          value: lower_case }
                - { key: readability-identifier-naming.ParameterCase,         value: lower_case }
+
+                - { key: readability-function-cognitive-complexity.Threshold, value: 30         }
+                - { key: readability-identifier-length.MinimumVariableNameLength, value: 2      }
 ...
+
+# -*- mode: yaml; fill-column: 100; -*-
+# vim: set fenc=utf-8 tw=100 et ft=yaml:
--- a/.cmake-format.json
+++ b/.cmake-format.json
@ -0,0 +1,5 @@
+{
+    "format": {
+        "disable": true
+    }
+}
--- a/.drone.yml
+++ b/.drone.yml
@ -1,11 +1,15 @@
 # -*- fill-column: 1000 -*-
-kind: pipeline
 name: Build x86_64
+kind: pipeline
+type: docker

 volumes:
- name: debian-package-cache
+- name: deb-package-cache
  host:
-    path: /var/cache/debian-package-cache
+    path: /var/cache/deb-package-cache
+- name: rpm-package-cache
+  host:
+    path: /var/cache/rpm-package-cache

 trigger:
  event:
@ -13,7 +17,7 @@ trigger:
    - tag

 steps:
- name: GCC 10 / clang 11
+- name: GCC 10 / clang 11 (debug)
  image: debian:bullseye-slim
  pull: always
  environment:
@ -26,20 +30,20 @@ steps:
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
  - apt-get install -qq build-essential cmake clang locales
-  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev gettext libarchive-dev libfmt-dev asciidoc
-  - rm -rf build && mkdir -p build && cd build
-  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
+  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev nlohmann-json3-dev
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - cmake -DCMAKE_BUILD_TYPE=Debug -G "Unix Makefiles" -DWITH_TESTS=YES -DWITH_SANITIZERS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
-  - cd ../../
-  - rm -rf build && mkdir -p build && cd build
-  - CXX="clang++" cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
+  - ctest -V
+  - cd ../
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - CXX="clang++" cmake -DCMAKE_BUILD_TYPE=Debug -G "Unix Makefiles" -DWITH_TESTS=YES -DWITH_SANITIZERS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives

 - name: Download CMake 3.12 installer
@ -47,36 +51,64 @@ steps:
  settings:
    source: https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh
    destination: cmake_installer.sh
+    skip_verify: true
+    sha256: 052b7daa2adab40211c6644da200ef95096d2adbcebd4cc5e60230d9023168bd

- name: GCC 7 / clang 6
+- name: GCC 8 / clang 6
  image: ubuntu:bionic
  pull: always
  environment:
-    CXX: g++-7
+    CXX: g++-8
    CXXFLAGS: -pipe -O2
    DEBIAN_FRONTEND: noninteractive
    LANG: C.UTF-8
+    pugixml_DIR: "/usr/share/libpugixml-dev/cmake"
+    nlohmann_json_DIR: "/usr/lib/cmake"
  commands:
  - rm /etc/apt/apt.conf.d/docker-clean
  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
  - apt-get update -q
-  - apt-get install -qq build-essential clang locales
-  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev gettext libarchive-dev libfmt-dev asciidoc
+  - apt-get install -qq g++-8 build-essential clang locales
+  - apt-get install -qq catch libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev  nlohmann-json-dev
  - sh cmake_installer.sh --skip-license --exclude-subdir --prefix=/usr/local
-  - rm -rf build && mkdir -p build && cd build
+  - cp /usr/lib/x86_64-linux-gnu/libpugixml* /lib/x86_64-linux-gnu/
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
-  - cd ../../
-  - rm -rf build && mkdir -p build && cd build
+  - ctest -V
+  - cd ../
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
  - CXX="clang++" cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
  - make VERBOSE=1
  - make install DESTDIR=install
-  - cd tests && ctest -V
+  - ctest -V
  volumes:
-  - name: debian-package-cache
+  - name: deb-package-cache
    path: /var/cache/apt/archives
+  depends_on:
+    - GCC 10 / clang 11 (debug)
+    - Download CMake 3.12 installer
+
+- name: GCC 9
+  image: opensuse/leap:15
+  pull: always
+  environment:
+    CXX: g++-9
+    CXXFLAGS: -pipe -O2
+    LANG: C.UTF-8
+  commands:
+  - zypper --non-interactive modifyrepo --all --keep-packages
+  - zypper --non-interactive install cmake gcc9-c++ rpm-build
+  - zypper --non-interactive install Catch2-devel libboost_program_options1_75_0-devel libboost_locale1_75_0-devel libboost_log1_75_0-devel fmt-devel libarchive-devel pugixml-devel nlohmann_json-devel asciidoc
+  - rm -rf build_rpm && mkdir -p build_rpm && cd build_rpm
+  - cmake -G "Unix Makefiles" -DWITH_TESTS=YES ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - ctest -V
+  volumes:
+  - name: rpm-package-cache
+    path: /var/cache/zypp/packages

 - name: notify
  image: drillster/drone-email
@ -90,3 +122,210 @@ steps:
      from_secret: email_password
  when:
    status: [ changed, failure ]
+  depends_on:
+    - GCC 10 / clang 11 (debug)
+    - Download CMake 3.12 installer
+    - GCC 9
+    - GCC 8 / clang 6
+
+---
+name: Packages x86_64
+kind: pipeline
+type: docker
+
+volumes:
+- name: deb-package-cache
+  host:
+    path: /var/cache/deb-package-cache
+
+trigger:
+  event:
+  - tag
+
+steps:
+- name: Debian bullseye
+  image: debian:bullseye-slim
+  pull: always
+  environment:
+    CXX: g++-10
+    CXXFLAGS: -pipe -O2
+    DEBIAN_FRONTEND: noninteractive
+    LANG: C.UTF-8
+  commands:
+  - rm /etc/apt/apt.conf.d/docker-clean
+  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
+  - apt-get update -q
+  - apt-get install -qq build-essential cmake clang locales lsb-release file
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json3-dev
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - cpack -G DEB
+  - cp -v epubgrep_${DRONE_TAG}-0_amd64_bullseye.deb ..
+  volumes:
+  - name: deb-package-cache
+    path: /var/cache/apt/archives
+
+- name: Debian buster
+  image: debian:buster-slim
+  pull: always
+  environment:
+    CXX: g++-8
+    CXXFLAGS: -pipe -O2
+    DEBIAN_FRONTEND: noninteractive
+    LANG: C.UTF-8
+    nlohmann_json_DIR: "/usr/lib/cmake"
+  commands:
+  - rm /etc/apt/apt.conf.d/docker-clean
+  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
+  - apt-get update -q
+  - apt-get install -qq build-essential cmake clang locales lsb-release file
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json-dev
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - cpack -G DEB
+  - cp -v epubgrep_${DRONE_TAG}-0_amd64_buster.deb ..
+  volumes:
+  - name: deb-package-cache
+    path: /var/cache/apt/archives
+  depends_on:
+    - Debian bullseye
+
+- name: Ubuntu focal
+  image: ubuntu:focal
+  pull: always
+  environment:
+    CXX: g++-9
+    CXXFLAGS: -pipe -O2
+    DEBIAN_FRONTEND: noninteractive
+    LANG: C.UTF-8
+  commands:
+  - rm /etc/apt/apt.conf.d/docker-clean
+  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
+  - apt-get update -q
+  - apt-get install -qq build-essential cmake clang locales lsb-release file
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json3-dev
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - cpack -G DEB
+  - cp -v epubgrep_${DRONE_TAG}-0_amd64_focal.deb ..
+  volumes:
+  - name: deb-package-cache
+    path: /var/cache/apt/archives
+  depends_on:
+    - Debian bullseye
+    - Debian buster
+
+- name: Download CMake 3.12 installer
+  image: plugins/download
+  settings:
+    source: https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh
+    destination: cmake_installer.sh
+    skip_verify: true
+    sha256: 052b7daa2adab40211c6644da200ef95096d2adbcebd4cc5e60230d9023168bd
+
+- name: Ubuntu bionic
+  image: ubuntu:bionic
+  pull: always
+  environment:
+    CXX: g++-8
+    CXXFLAGS: -pipe -O2
+    DEBIAN_FRONTEND: noninteractive
+    LANG: C.UTF-8
+    pugixml_DIR: "/usr/share/libpugixml-dev/cmake"
+    nlohmann_json_DIR: "/usr/lib/cmake"
+  commands:
+  - rm /etc/apt/apt.conf.d/docker-clean
+  - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get'
+  - apt-get update -q
+  - apt-get install -qq g++-8 build-essential clang locales lsb-release file
+  - apt-get install -qq libboost-program-options-dev libboost-locale-dev libboost-regex-dev libboost-log-dev gettext libarchive-dev libfmt-dev libpugixml-dev nlohmann-json-dev
+  - apt-get install -qq --no-install-recommends asciidoc xsltproc
+  - sh cmake_installer.sh --skip-license --exclude-subdir --prefix=/usr/local
+  - cp /usr/lib/x86_64-linux-gnu/libpugixml* /lib/x86_64-linux-gnu/
+  - rm -rf build_deb && mkdir -p build_deb && cd build_deb
+  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - cpack -G DEB
+  - cp -v epubgrep_${DRONE_TAG}-0_amd64_bionic.deb ..
+  volumes:
+  - name: deb-package-cache
+    path: /var/cache/apt/archives
+  depends_on:
+    - Debian bullseye
+    - Debian buster
+    - Ubuntu focal
+    - Download CMake 3.12 installer
+
+- name: openSUSE Leap 15
+  image: opensuse/leap:15
+  pull: always
+  environment:
+    CXX: g++-9
+    CXXFLAGS: -pipe -O2
+    LANG: C.UTF-8
+  commands:
+  - zypper --non-interactive modifyrepo --all --keep-packages
+  - zypper --non-interactive install cmake gcc9-c++ rpm-build lsb-release
+  - zypper --non-interactive install libboost_program_options1_75_0-devel libboost_locale1_75_0-devel libboost_log1_75_0-devel fmt-devel libarchive-devel pugixml-devel nlohmann_json-devel asciidoc
+  - rm -rf build_rpm && mkdir -p build_rpm && cd build_rpm
+  - cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/usr ..
+  - make VERBOSE=1
+  - make install DESTDIR=install
+  - cpack -G RPM
+  - cp -v epubgrep-${DRONE_TAG}-0.x86_64.opensuse-$(lsb_release --release --short).rpm ..
+  volumes:
+  - name: rpm-package-cache
+    path: /var/cache/zypp/packages
+
+- name: gitea_release
+  image: plugins/gitea-release
+  pull: always
+  settings:
+    base_url: https://schlomp.space
+    api_key:
+      from_secret: gitea_token
+    title: ${DRONE_TAG}
+    prerelease: true
+    files:
+      - epubgrep_${DRONE_TAG}-0_amd64_buster.deb
+      - epubgrep_${DRONE_TAG}-0_amd64_bullseye.deb
+      - epubgrep_${DRONE_TAG}-0_amd64_focal.deb
+      - epubgrep_${DRONE_TAG}-0_amd64_bionic.deb
+      - epubgrep-${DRONE_TAG}-0.x86_64.opensuse-$(lsb_release --release --short).rpm
+    checksum:
+      - sha512
+  depends_on:
+    - Debian bullseye
+    - Debian buster
+    - Ubuntu focal
+    - Ubuntu bionic
+    - openSUSE Leap 15
+
+- name: notification
+  image: drillster/drone-email
+  pull: always
+  settings:
+    host: mail.tzend.de
+    from: drone@tzend.de
+    username:
+      from_secret: email_username
+    password:
+      from_secret: email_password
+    when:
+    status: [ changed, failure ]
+  depends_on:
+    - Debian bullseye
+    - Debian buster
+    - Ubuntu focal
+    - Download CMake 3.12 installer
+    - Ubuntu bionic
--- a/.editorconfig
+++ b/.editorconfig
@ -18,7 +18,3 @@ tab_width = 4
 [{CMakeLists.txt,*.cmake}]
 indent_size = 2
 tab_width = 2
-
-[{*.qml,*.qrc}]
-indent_size = 4
-tab_width = 4
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,5 @@
 /examples/example99*
 /translations/*.pot
 /translations/de
+/CMakeUserPresets.json
+/launch.json
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,12 +1,13 @@
-cmake_minimum_required(VERSION 3.12...3.18)
+cmake_minimum_required(VERSION 3.12...3.20)

 # Global build options.
 set(CMAKE_BUILD_TYPE "Release" CACHE STRING "The type of build.")
 set(XGETTEXT_CMD "xgettext" CACHE STRING "The command for xgettext.")

 project(epubgrep
-  VERSION 0.2.0
+  VERSION 0.6.2
  DESCRIPTION "Search tool for EPUB e-books"
+  HOMEPAGE_URL "https://schlomp.space/tastytea/epubgrep"
  LANGUAGES CXX)

 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
@ -14,6 +15,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
 # Project build options.
 option(WITH_TESTS "Compile tests." NO)
 option(FALLBACK_BUNDLED "Fall back to bundled libs." YES)
+option(WITH_SANITIZERS "Use sanitizers in debug builds." NO)

 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -22,7 +24,9 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 include(cmake/debug_flags.cmake)

 # All dependencies except test dependencies.
-find_package(Boost 1.65.0 REQUIRED COMPONENTS locale program_options regex)
+set(CMAKE_FIND_PACKAGE_PREFER_CONFIG TRUE)
+find_package(Boost 1.65.0 REQUIRED
+  COMPONENTS locale log_setup log program_options regex)
 find_package(Gettext REQUIRED)
 find_package(Filesystem REQUIRED COMPONENTS Final Experimental)
 find_package(LibArchive 3.2 REQUIRED)
@ -30,18 +34,25 @@ find_package(fmt 4 REQUIRED CONFIG)
 find_package(termcolor CONFIG)
 if(NOT termcolor_FOUND)
  if(FALLBACK_BUNDLED)
+    message(STATUS "Using bundled version of Termcolor.")
    add_subdirectory(dist/termcolor EXCLUDE_FROM_ALL)
  else()
    message(FATAL_ERROR "Termcolor was not found.")
  endif()
 endif()
+find_package(Threads REQUIRED)
+find_package(pugixml REQUIRED CONFIG)
+find_package(nlohmann_json REQUIRED CONFIG)

 add_subdirectory(src)

 if(WITH_TESTS)
+  include(CTest)
  add_subdirectory(tests)
 endif()

 add_subdirectory(translations)

 add_subdirectory(man)
+
+include(cmake/packages.cmake)
--- a/CMakePresets.json
+++ b/CMakePresets.json
@ -0,0 +1,62 @@
+{
+    "version": 2,
+    "cmakeMinimumRequired": {
+        "major": 3,
+        "minor": 20,
+        "patch": 0
+    },
+    "configurePresets": [
+        {
+            "name": "common",
+            "hidden": true,
+            "generator": "Unix Makefiles",
+            "binaryDir": "build",
+            "cacheVariables": {
+                "CMAKE_EXPORT_COMPILE_COMMANDS": true
+            }
+        },
+        {
+            "name": "dev",
+            "displayName": "Developer config",
+            "description": "Build with debug symbols and tests enabled",
+            "inherits": "common",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug",
+                "WITH_TESTS": true,
+                "WITH_SANITIZERS": false
+            }
+        },
+        {
+            "name": "dev_san",
+            "displayName": "Developer config, with sanitizers",
+            "description": "Build with debug symbols, tests enabled and sanitizers enabled",
+            "inherits": "dev",
+            "cacheVariables": {
+                "WITH_SANITIZERS": true
+            }
+        },
+        {
+            "name": "release",
+            "displayName": "Release config",
+            "description": "Build without debug symbols or tests",
+            "inherits": "common",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Release",
+                "WITH_TESTS": false
+            }
+        }
+    ],
+    "testPresets": [
+        {
+            "name": "default",
+            "configurePreset": "dev",
+            "output": {
+                "outputOnFailure": true
+            },
+            "execution": {
+                "noTestsAction": "error",
+                "stopOnFailure": true
+            }
+        }
+    ]
+}
--- a/CONTRIBUTING.adoc
+++ b/CONTRIBUTING.adoc
@ -18,7 +18,10 @@ Read the link:{uri-coc}[Code of Conduct].
 Before reporting a bug, please
 https://schlomp.space/tastytea/{project}/issues[perform a search] to see if the
 problem has already been reported. If it has, add a comment to the existing
-issue instead of opening a new one. Same for enhancements.
+issue instead of opening a new one. Same for enhancements. It is helpful to
+re-run the command producing a bug with `--debug` and attaching the log
+file to the bug report. The log file of the last run is usually in
+`~/.local/state/epubgrep/epubgrep.log`.

 You can also contact me via mailto:{contact-email}[E-Mail],
 link:xmpp:{contact-xmpp}[XMPP] or the {contact-fediverse}[Fediverse] if you
@ -42,9 +45,10 @@ using link:{uri-git-send-email}[git send-email].
 ==== Creating a new translation

 To create a new translation, copy `translations/{project}.pot` to
-`translations/[LANGUAGE ABBREVIATION].po`, (replace [LANGUAGE ABBREVIATION] with
+`translations/[LANGUAGE ABBREVIATION].po` (Replace [LANGUAGE ABBREVIATION] with
 the correct abbreviation for your language, e.g. “es” if you are translating
-into Spanish or “pt_BR” if you are translating into Brazilian Portuguese).
+into Spanish or “pt_BR” if you are translating into Brazilian Portuguese). The
+pot-file is generated during the build process.

 Open your new po file with your favourite text editor, (or you may prefer to use
 a translation catalog editor like link:{uri-poedit}[Poedit]), and first edit the
@ -59,9 +63,8 @@ directory. Then do the following:

 [source,shell]
 --------------------------------------------------------------------------------
-cd build
-cmake --build .
-cd ../translations
+cmake --build build
+cd translations
 msgmerge --update es.po epubgrep.pot
 --------------------------------------------------------------------------------

--- a/10
+++ b/10
@ -39,3 +39,13 @@ epubgrep makes direct use of the following libraries and programs:
    From: Ihor Kalnytskyi
          https://termcolor.readthedocs.io/
    License: BSD-3-Clause
+
+  pugixml
+    From: pugixml community
+          https://pugixml.org/
+    License: MIT
+
+  nlohmann-json
+    From: Niels Lohmann and community
+          https://github.com/nlohmann/json
+    License: MIT
--- a/README.adoc
+++ b/README.adoc
@ -15,6 +15,8 @@
 :uri-fmt: https://github.com/fmtlib/fmt
 :uri-asciidoc: http://asciidoc.org/
 :uri-termcolor: https://termcolor.readthedocs.io/
+:uri-pugixml: https://pugixml.org/
+:uri-json: https://nlohmann.github.io/json/

 :license: https://schlomp.space/tastytea/{project}/src/branch/main/LICENSE
 :license-termcolor: https://schlomp.space/tastytea/{project}/src/branch/main/dist/termcolor/LICENSE
@ -24,14 +26,13 @@ on whole files. All newlines will be replaced by spaces and HTML will be
 stripped. This means you can search for text spanning multiple lines and don't
 have to worry about HTML tags in the text.

-{project} is licensed under the
-link:{license}[AGPL-3.0-only]
-license. The bundled link:{uri-termcolor}[Termcolor] is licensed under the
+{project} is licensed under the link:{license}[AGPL-3.0-only]. The bundled
+link:{uri-termcolor}[Termcolor] is licensed under the
 link:{license-termcolor}[BSD-3-Clause] license.

 == Usage

-[alt="Screenshot of epupgrep, showing the output of 2 book searches."]
+[alt="Screenshot of epubgrep, showing the output of 2 book searches."]
 image::{uri-base}/raw/branch/main/screenshot.png[]

 See
@ -47,30 +48,78 @@ image::https://repology.org/badge/vertical-allrepos/epubgrep.svg[]

 [source,shell]
 --------------------------------------------------------------------------------
-eselect repository enable guru
-echo 'app-text/epubgrep' >> /etc/portage/package.accept_keywords/epubgrep
-emaint sync -r guru
-emerge -a app-text/epubgrep
+sudo eselect repository enable guru
+echo 'app-text/epubgrep' | sudo tee -a /etc/portage/package.accept_keywords/epubgrep
+sudo emaint sync -r guru
+sudo emerge -a app-text/epubgrep
 --------------------------------------------------------------------------------

+=== Debian and Ubuntu
+
+[source,shell]
+--------------------------------------------------------------------------------
+wget -O - https://tastytea.de/tastytea.asc | sudo apt-key add -
+sudo add-apt-repository 'deb https://apt.schlomp.space/[code name] [code name] main'
+sudo apt install epubgrep
+--------------------------------------------------------------------------------
+
+Replace _[code name]_ with the code name of your installation. Packages are
+available for *bullseye* (Debian 11), *buster* (Debian 10), *focal* (Ubuntu
+20.04) and *bionic* (Ubuntu 18.04).
+
+[TIP]
+If you get the error message that `add-apt-repository` was not found, install
+`software-properties-common`.
+
 === From source

 ==== Dependencies

 * Tested OS: Linux
-* C\++ compiler with C++17 support (tested: link:{uri-gcc}[GCC] 7/10,
+* C\++ compiler with C++17 support (tested: link:{uri-gcc}[GCC] 8/9/10,
  link:{uri-clang}[clang] 6/11)
 * link:{uri-cmake}[CMake] (at least: 3.12)
 * link:{uri-boost}[Boost] (tested: 1.75.0 / 1.65.0)
 * link:{uri-gettext}[gettext] (tested: 0.21 / 0.19)
 * link:{uri-libarchive}[libarchive] (tested: 3.5 / 3.2)
 * link:{uri-fmt}[fmt] (tested: 7.0 / 4.0)
-* link:{uri-asciidoc}[AsciiDoc] (tested: 8.6 / 9.0)
+* link:{uri-asciidoc}[AsciiDoc] (tested: 9.0 / 8.6)
 * link:{uri-termcolor}[Termcolor] (tested: 2.0) (If not found, the bundled
  version is used.)
+* link:{uri-pugixml}[pugixml] (tested: 1.11 / 1.8)
+* link:{uri-json}[nlohmann_json] (tested: 3.9 / 2.1)
 * Optional
  ** Tests: link:{uri-catch}[Catch] (tested: 2.13 / 1.10)

+===== Install dependencies in Debian or Ubuntu
+
+Or distributions that are derived from Debian or Ubuntu. You will need at least
+Debian buster (10) or Ubuntu focal (20.04).
+
+[source,shell]
+--------------------------------------------------------------------------------
+sudo apt install build-essential cmake libboost-program-options-dev \
+                 libboost-locale-dev libboost-regex-dev libboost-log-dev \
+                 gettext libarchive-dev libfmt-dev asciidoc libpugixml-dev \
+                 nlohmann-json-dev
+--------------------------------------------------------------------------------
+
+[TIP]
+If `nlohmann-json-dev` can not be found, try `nlohmann-json3-dev`.
+
+===== Install dependencies in openSUSE
+
+Tested on openSUSE Leap 15.3.
+
+[source,shell]
+--------------------------------------------------------------------------------
+sudo zypper install cmake gcc10-c++ rpm-build \
+                    libboost_program_options1_75_0-devel \
+                    libboost_locale1_75_0-devel libboost_log1_75_0-devel \
+                    fmt-devel libarchive-devel pugixml-devel \
+                    nlohmann_json-devel asciidoc
+--------------------------------------------------------------------------------
+
 ==== Get sourcecode

 ===== Release
@ -86,18 +135,59 @@ git clone https://schlomp.space/tastytea/epubgrep.git

 ==== Compile

+In a terminal, go to the directory where you unpacked / cloned the source code
+and then:
+
 [source,shell]
 --------------------------------------------------------------------------------
-mkdir -p build && cd build
-cmake ..
-cmake --build . -- --jobs=$(nproc --ignore=1)
-# cd tests && ctest
+cmake -S . -B build
+cmake --build build --parallel $(nproc --ignore=1)
 --------------------------------------------------------------------------------

+To install, run `sudo cmake --install build`. To run the tests, run `ctest
+--test-dir build`.
+
+[TIP]
+If you are using Debian or Ubuntu, or a distribution that is derived from these,
+you can run `cpack -G DEB` in the build directory to generate a .deb-file. You
+can then install it with `+++apt install ./epubgrep-*.deb+++`.
+If you are using a distribution that uses RPM packages, like openSUSE or Fedora,
+you can generate a package with `cpack -G RPM` and install it with `+++zypper
+install ./epubgrep-*.rpm+++` or `+++dnf install ./epubgrep-*.rpm+++`.
+
 .CMake options:
 * `-DCMAKE_BUILD_TYPE=Debug` for a debug build.
 * `-DWITH_TESTS=YES` if you want to compile the tests.
 * `-DXGETTEXT_CMD=String` The program to use instead of `xgettext`.
 * `-DFALLBACK_BUNDLED=NO` if you don't want to fall back on bundled libraries.
+* `-DWITH_SANITIZER=YES` to use sanitizers in debug builds.
+
+== Similar projects
+
+* link:https://github.com/phiresky/ripgrep-all[ripgrep-all] can search EPUB
+  files and strips HTML, but does not display page numbers or headings.
+* zipgrep from link:http://infozip.sourceforge.net/[unzip] can search EPUB files
+  but does not strip HTML and does not display page numbers or headings.
+
+== Performance
+
+A test with a directory containing 3333 EPUBs and 6269 files in total showed
+this difference between epubgrep-0.6.2 and ripgrep-all-0.9.6:
+
+[source,shellsession]
+--------------------------------------------------------------------------------
+% hyperfine "epubgrep 'floor' ~/Books" "rga 'floor' ~/Books"
+Benchmark #1: epubgrep 'floor' ~/Books
+  Time (mean ± σ):     167.246 s ±  3.848 s    [User: 176.251 s, System: 79.107 s]
+  Range (min … max):   161.533 s … 173.647 s    10 runs
+
+Benchmark #2: rga 'floor' ~/Books
+  Time (mean ± σ):      9.219 s ±  0.506 s    [User: 17.540 s, System: 12.773 s]
+  Range (min … max):    8.571 s …  9.923 s    10 runs
+
+Summary
+  'rga 'floor' ~/Books' ran
+   18.14 ± 1.08 times faster than 'epubgrep 'floor' ~/Books'
+--------------------------------------------------------------------------------

 include::{uri-base}/raw/branch/main/CONTRIBUTING.adoc[]
--- a/cmake/debug_flags.cmake
+++ b/cmake/debug_flags.cmake
@ -24,9 +24,13 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"
    "-Wdouble-promotion"
    "-Wformat=2"
    "-ftrapv"
-    "-fsanitize=undefined"
    "-Og"
    "-fno-omit-frame-pointer")
+  if(WITH_SANITIZERS)
+    list(APPEND tmp_CXXFLAGS
+      "-fsanitize=undefined"
+      "-fsanitize=address")
+  endif()
  if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
    list(APPEND tmp_CXXFLAGS
      "-Wlogical-op"
@ -44,8 +48,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"
  endif()
  add_compile_options("$<$<CONFIG:Debug>:${tmp_CXXFLAGS}>")

-  list(APPEND tmp_LDFLAGS
-    "-fsanitize=undefined")
+  if(WITH_SANITIZERS)
+    list(APPEND tmp_LDFLAGS
+      "-fsanitize=undefined"
+      "-fsanitize=address")
+  endif()
  # add_link_options was introduced in version 3.13.
  if(${CMAKE_VERSION} VERSION_LESS 3.13)
    set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${tmp_LDFLAGS}")
--- a/cmake/packages.cmake
+++ b/cmake/packages.cmake
@ -0,0 +1,57 @@
+include(GNUInstallDirs)
+
+set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE")
+set(CPACK_PACKAGE_CONTACT "tastytea <tastytea@tastytea.de>")
+
+# Should be set automatically, but they are not.
+set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
+set(CPACK_PACKAGE_VERSION "${PROJECT_VERSION}")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${CMAKE_PROJECT_DESCRIPTION}")
+
+# DEB
+# Figure out dependencies automatically.
+set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
+
+# Should be set automatically, but it is not.
+execute_process(COMMAND dpkg --print-architecture
+  OUTPUT_VARIABLE CPACK_DEBIAN_PACKAGE_ARCHITECTURE
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+execute_process(COMMAND lsb_release --codename --short
+  OUTPUT_VARIABLE DEBIAN_CODENAME
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+if ("${DEBIAN_CODENAME}" STREQUAL "n/a")
+  set(DEBIAN_CODENAME "unknown")
+endif()
+
+# The default does not produce valid Debian package names.
+set(CPACK_DEBIAN_FILE_NAME
+  "${CPACK_PACKAGE_NAME}_${CPACK_PACKAGE_VERSION}-0_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}_${DEBIAN_CODENAME}.deb")
+
+# RPM
+set(CPACK_RPM_PACKAGE_LICENSE "AGPL-3")
+
+# Figure out dependencies automatically.
+set(CPACK_RPM_PACKAGE_AUTOREQ ON)
+
+# Should be set automatically, but it is not.
+execute_process(COMMAND uname -m
+  OUTPUT_VARIABLE CPACK_RPM_PACKAGE_ARCHITECTURE
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(CPACK_PACKAGE_FILE_NAME
+  "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-0.${CPACK_RPM_PACKAGE_ARCHITECTURE}")
+
+execute_process(COMMAND lsb_release --id --short
+  OUTPUT_VARIABLE OS
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+if("${OS}" STREQUAL "openSUSE")
+  execute_process(COMMAND lsb_release --release --short
+    OUTPUT_VARIABLE OS_RELEASE
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  set(CPACK_PACKAGE_FILE_NAME
+    "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-0.${CPACK_RPM_PACKAGE_ARCHITECTURE}.opensuse-${OS_RELEASE}")
+endif()
+
+include(CPack)
--- a/man/epubgrep.1.adoc
+++ b/man/epubgrep.1.adoc
@ -2,18 +2,18 @@
 :doctype:       manpage
 :Author:        tastytea
 :Email:         tastytea@tastytea.de
-:Date:          2021-05-25
+:Date:          2021-07-02
 :Revision:      0.0.0
 :man source:    epubgrep
 :man manual:    General Commands Manual

 == NAME

-epubgrep - Search tool for EPUB ebooks.
+epubgrep - Search tool for EPUB e-books.

 == SYNOPSIS

-*epubgrep* [_OPTION_]… _PATTERN_ [_FILE_]…
+*epubgrep* [_OPTION_]… _PATTERN_ _FILE_…

 == DESCRIPTION

@ -21,14 +21,44 @@ epubgrep - Search tool for EPUB ebooks.
 for command line switches where possible. However, not all grep switches are
 implemented and some additional switches are added.

+This manual is also available at
+<https://man.schlomp.space/tastytea/?program=epubgrep>.
+
+== EXAMPLES
+
+.Search for Apple(s) or Orange(s) with 2 words of context around the matches, case insensitively
+[source,shell]
+--------------------------------------------------------------------------------
+epubgrep -PiC2 '(Apple|Orange)s?' file.epub
+--------------------------------------------------------------------------------
+
+.Extract external hyperlinks
+[source,shell]
+--------------------------------------------------------------------------------
+epubgrep -PC0 --raw --no-filename=all '"http[^"]+"' file.epub | tr -d '"'
+--------------------------------------------------------------------------------
+
+.Save the search results to an HTML file and output a status message every 20 seconds
+[source,shell]
+--------------------------------------------------------------------------------
+epubgrep -C2 --status --status-interval=20 --html 'Apples' file.epub > result.html
+--------------------------------------------------------------------------------
+
 == OPTIONS

+=== General options
+
 *-h*, *--help*::
 Display a short help message and exit.

 *V*, *--version*::
 Show version, copyright and license.

+*--debug*::
+Write debug output to the terminal and log file.
+
+=== Search options
+
 *-G*, *--basic-regexp*::
 _PATTERN_ is a POSIX basic regular expression. This is the default.

@ -46,17 +76,56 @@ _PATTERN_ is a Perl regular expression.
 *-i*, *--ignore-case*::
 Ignore case distinctions in pattern and data.

+*-a*, *--raw*::
+Do not clean up text before searching. No HTML stripping, no newline removal,
+all files will be read (not just the text documents listed in the spine).
+
+*-r*, *--recursive*::
+Read all files under each directory, recursively, following symbolic links only
+if they are on the command line. Silently skips directories that are not
+readable by the user.
+
+*-R*, *--dereference-recursive*::
+Read all files under each directory, recursively. Follow all symbolic
+links. Silently skips directories that are not readable by the user.
+
 *-e* _PATTERN_, *--regexp* _PATTERN_::
 Use additional _PATTERN_ for matching. Can be used more than once.

-*-a*, *--raw*::
-Do not clean up text before searching. No HTML stripping, no newline removal.
+=== Output options

 *-C* _NUMBER_, *context* _NUMBER_::
 Print _NUMBER_ words of context around matches.

 *--nocolor*::
-Do not color matches.
+Turn off colors and other decorations.
+
+*--no-filename* _WHICH_::
+Suppress the mentioning of file names on output. _WHICH_ is ‘filesystem’ for the
+file names on your file systems, ‘in-epub’ for the file names inside the EPUB or
+‘all’. Chapters and page numbers will still be output.
+
+*--ignore-archive-errors*::
+Ignore errors about wrong file formats. When you search directories recursively,
+it is likely that there are files which are not EPUB files. This setting
+suppresses errors related to them.
+
+*--json*::
+Output JSON instead of plain text. JSON will only be output at the end of the
+program. There will be an object named `generator` with the property
+`epubgrep`. The value is the version of the program, as string. The matches are
+in an array named `matches`. I will try not to break the API. 😊
+
+*--html*::
+Output HTML instead of plain text. HTML will only be output at the end of the
+program.
+
+*--status*::
+Output status message every *--status-interval* seconds to standard
+error. Default is 30.
+
+*--status-interval* _NUMBER_::
+Set status message interval to _NUMBER_ seconds.

 == USAGE

@ -89,14 +158,19 @@ occur more than once are merged.

 ==== Example configuration file

-This example makes epubgrep always use Perl regular expressions and search for
+This example makes epubgrep always search directories recursively, ignore files
+which are not EPUB, not print the file names inside the EPUB, print 2 words of
+context around matches (unless overridden on the command line) and search for
 mentions of the words thyme and oregano in every book.

 [source,cfg]
 --------------------------------------------------------------------------------
-perl-regexp = 1
-regexp = \b[Tt]hyme\b
-regexp = \b[Oo]regano\b
+recursive = 1
+ignore-archive-errors = 1
+no-filename = in-epub
+context = 2
+regexp = [Tt]hyme
+regexp = [Oo]regano
 --------------------------------------------------------------------------------

 // == EXAMPLES
@ -105,10 +179,15 @@ regexp = \b[Oo]regano\b
 == FILES

 *Configuration file*::
-* If `XDG_CONFIG_HOME` is defined: `${XDG_CONFIG_HOME}/epubgrep.conf`
-* If `HOME` is defined: `${HOME}/.config/epubgrep.conf`
+* If `XDG_CONFIG_HOME` is defined: `${XDG_CONFIG_HOME}/epubgrep/epubgrep.conf`
+* If `HOME` is defined: `${HOME}/.config/epubgrep/epubgrep.conf`
 * Otherwise: `epubgrep.conf`

+*Log file*::
+* If `XDG_STATE_HOME` is defined: `${XDG_STATE_HOME}/epubgrep/epubgrep.log`
+* If `HOME` is defined: `${HOME}/.local/state/epubgrep/epubgrep.log`
+* Otherwise: `epubgrep.log`
+

 == KNOWN BUGS

--- a/screenshot.png
+++ b/screenshot.png
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -6,23 +6,34 @@ configure_file("fs-compat.hpp.in" "fs-compat.hpp" @ONLY)
 # The library is only here for the tests.
 add_library(${PROJECT_NAME}_lib STATIC)

-file(GLOB_RECURSE sources_src *.cpp)
-file(GLOB_RECURSE headers_src *.hpp)
+file(GLOB sources_src CONFIGURE_DEPENDS *.cpp)
+file(GLOB headers_src CONFIGURE_DEPENDS *.hpp)
 list(REMOVE_ITEM sources_src "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp")

 target_sources(${PROJECT_NAME}_lib
-  PUBLIC "${sources_src}" "${headers_src}")
+  PRIVATE "${sources_src}" "${headers_src}")
 unset(sources_src)
 unset(headers_src)

+# Older CMake versions apparently need this, but I don't know in which version
+# it changed. Theoretically Boost::dynamic_linking should take care of it.
+add_compile_definitions("BOOST_LOG_DYN_LINK")
+
 target_link_libraries(${PROJECT_NAME}_lib
  PUBLIC
+  Boost::dynamic_linking
  Boost::locale
+  Boost::log_setup
+  Boost::log
  Boost::program_options
  Boost::regex
  std::filesystem
  fmt::fmt
-  termcolor::termcolor)
+  termcolor::termcolor
+  Threads::Threads
+  m
+  pugixml
+  nlohmann_json)

 if(${CMAKE_VERSION} VERSION_LESS 3.17)
  target_link_libraries(${PROJECT_NAME}_lib
--- a/src/book.cpp
+++ b/src/book.cpp
@ -0,0 +1,306 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "book.hpp"
+
+#include "fs-compat.hpp"
+#include "helpers.hpp"
+#include "log.hpp"
+#include "zip.hpp"
+
+#include <boost/locale/message.hpp>
+#include <boost/regex.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h> // For compatibility with fmt 4.
+#include <pugixml.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace epubgrep::book
+{
+
+using boost::locale::translate;
+using fmt::format;
+using std::string;
+
+book read(const fs::path filepath, const bool raw)
+{
+    using helpers::unescape_html;
+
+    DEBUGLOG << "Processing book " << filepath;
+
+    std::vector<string> epub_filepaths{[&filepath, raw]
+                                       {
+                                           if (!raw)
+                                           {
+                                               return list_spine(filepath);
+                                           }
+                                           return zip::list(filepath);
+                                       }()};
+
+    book current_book;
+    current_book.language = [&filepath]() -> string
+    {
+        try
+        {
+            pugi::xml_document xml;
+            auto opf_file_path{get_opf_file_path(filepath)};
+            const std::string opf_file{
+                zip::read_file(filepath, opf_file_path.string())};
+
+            const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
+            if (result)
+            {
+                auto lang{xml.child("package")
+                              .child("metadata")
+                              .child("dc:language")};
+                if (lang == nullptr)
+                {
+                    lang = xml.child("opf:package")
+                               .child("opf:metadata")
+                               .child("dc:language");
+                }
+                return lang.text().as_string();
+            }
+        }
+        catch (epubgrep::zip::exception &e)
+        {
+            if (e.code != 1) // 1 == container.xml not found.
+            {
+                LOG(log::sev::error) << e.what();
+            }
+        }
+        return "";
+    }();
+    DEBUGLOG << "Book language detected: " << current_book.language;
+
+    for (const auto &entry : epub_filepaths)
+    {
+        DEBUGLOG << "Processing document " << entry;
+        document doc;
+        if (!raw)
+        {
+            doc = process_page(unescape_html(zip::read_file(filepath, entry)));
+        }
+        else
+        {
+            doc.text_raw = zip::read_file(filepath, entry);
+            doc.text = std::make_unique<std::string>(doc.text_raw);
+        }
+        doc.language = current_book.language; // FIXME: Get language of doc.
+        current_book.files.emplace_back(entry, std::move(doc));
+    }
+
+    return current_book;
+}
+
+document process_page(const std::string_view text)
+{
+    string output{text};
+    static const boost::regex re_header_start{"<[hH][1-6]"};
+    static const boost::regex re_header_end{"</[hH][1-6]"};
+    static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
+                                           "(title|aria-label)"
+                                           "=\"([[:alnum:]]+)\""};
+
+    {
+        size_t pos{0};
+        while ((pos = output.find_first_of("\n\t\r", pos)) != string::npos)
+        {
+            if (output[pos] == '\r')
+            {
+                output.erase(pos, 1);
+            }
+            else
+            {
+                output.replace(pos, 1, " ");
+            }
+        }
+    }
+    {
+        size_t pos{0};
+        while ((pos = output.find("  ", pos)) != string::npos)
+        {
+            output.replace(pos, 2, " ");
+        }
+    }
+
+    size_t pos{0};
+    document doc;
+    size_t headline_start{string::npos};
+    while ((pos = output.find('<', pos)) != string::npos)
+    {
+        auto endpos{output.find('>', pos) + 1};
+
+        if (boost::regex_match(output.substr(pos, 3), re_header_start))
+        {
+            headline_start = pos;
+        }
+        else if (boost::regex_match(output.substr(pos, 4), re_header_end))
+        {
+            if (headline_start != string::npos)
+            {
+                doc.headlines.insert(
+                    {headline_start,
+                     output.substr(headline_start, pos - headline_start)});
+                headline_start = string::npos;
+            }
+        }
+        else if (output.substr(pos, 6) == "<span ")
+        {
+            boost::match_results<string::const_iterator> match;
+            using it_size_t = string::const_iterator::difference_type;
+            string::const_iterator begin{output.begin()
+                                         + static_cast<it_size_t>(pos)};
+            string::const_iterator end{output.begin()
+                                       + static_cast<it_size_t>(endpos)};
+
+            if (boost::regex_search(begin, end, match, re_pagebreak))
+            {
+                doc.pages.insert({pos, match[2].str()});
+            }
+        }
+        else if (output.substr(pos, 7) == "<style "
+                 || output.substr(pos, 8) == "<script ")
+        {
+            if (output.find("/>", pos) > endpos)
+            {
+                endpos = output.find('>', endpos) + 1;
+            }
+        }
+
+        output.erase(pos, endpos - pos);
+    }
+
+    doc.text_cleaned = output;
+    doc.text = std::make_unique<string>(doc.text_cleaned);
+
+    return doc;
+}
+
+std::string headline(const document &doc, const size_t pos)
+{
+    std::string_view last;
+
+    for (const auto &pair : doc.headlines)
+    {
+        if (pair.first > pos)
+        {
+            break;
+        }
+        last = pair.second;
+    }
+
+    return string(last);
+}
+
+string page(const document &doc, const size_t pos)
+{
+    std::string_view last;
+
+    for (const auto &pair : doc.pages)
+    {
+        if (pair.first > pos)
+        {
+            break;
+        }
+        last = pair.second;
+    }
+
+    return string(last);
+}
+
+fs::path get_opf_file_path(const fs::path &zipfile)
+{
+    pugi::xml_document xml;
+    const std::string container{
+        zip::read_file(zipfile, "META-INF/container.xml")};
+    const auto result{xml.load_buffer(&container[0], container.size())};
+    if (result)
+    {
+        return fs::path{xml.child("container")
+                            .child("rootfiles")
+                            .first_child()
+                            .attribute("full-path")
+                            .value()};
+    }
+    LOG(log::sev::error) << result.description() << '\n';
+
+    return fs::path{};
+}
+
+std::vector<string> list_spine(const fs::path &filepath)
+{
+    auto opf_file_path{get_opf_file_path(filepath)};
+    std::vector<std::string> spine_filepaths;
+    if (!opf_file_path.empty())
+    {
+        DEBUGLOG << "Parsing " << opf_file_path;
+        pugi::xml_document xml;
+        const std::string opf_file{
+            zip::read_file(filepath, opf_file_path.string())};
+        const auto result{xml.load_buffer(&opf_file[0], opf_file.size())};
+        if (result)
+        {
+            auto manifest{xml.child("package").child("manifest")};
+            if (manifest == nullptr)
+            {
+                manifest = xml.child("opf:package").child("opf:manifest");
+            }
+            auto spine{xml.child("package").child("spine")};
+            if (spine == nullptr)
+            {
+                spine = xml.child("opf:package").child("opf:spine");
+            }
+
+            for (const auto &itemref : spine)
+            {
+                const auto &idref{itemref.attribute("idref").value()};
+                const auto &item{manifest.find_child_by_attribute("id", idref)};
+                auto href{helpers::urldecode(item.attribute("href").value())};
+                if (href[0] != '/')
+                {
+                    href = (opf_file_path.parent_path() /= href);
+                }
+                DEBUGLOG << "Found in spine: " << href;
+                spine_filepaths.emplace_back(href);
+            }
+        }
+        else
+        {
+            LOG(log::sev::error) << "XML: " << result.description() << '\n';
+        }
+    }
+
+    if (opf_file_path.empty() || spine_filepaths.empty())
+    {
+        LOG(log::sev::error)
+            << format(translate("{0:s} is damaged. Could not read spine. "
+                                "Skipping file.\n")
+                          .str()
+                          .c_str(),
+                      filepath.c_str());
+        return {};
+    }
+
+    return spine_filepaths;
+}
+
+} // namespace epubgrep::book
--- a/src/book.hpp
+++ b/src/book.hpp
@ -0,0 +1,73 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EPUBGREP_BOOK_HPP
+#define EPUBGREP_BOOK_HPP
+
+#include "fs-compat.hpp"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+namespace epubgrep::book
+{
+
+using std::string;
+
+//! Document inside EPUB.
+struct document
+{
+    string text_raw;                    //!< HTML page
+    string text_cleaned;                //!< Plain text page
+    std::unique_ptr<string> text;       //!< Pointer to preferred text version
+    std::map<size_t, string> headlines; //!< pos, title
+    std::map<size_t, string> pages;     //!< pos, page
+    string language;                    //!< Page language
+} __attribute__((aligned(128)));
+
+//! EPUB file.
+struct book
+{
+    std::vector<std::pair<string, document>> files; //!< filename, file
+    std::vector<std::pair<string, string>> toc;     //!< title, href
+    string language;                                //!< Book language
+} __attribute__((aligned(128)));
+
+//! Read and process book.
+[[nodiscard]] book read(fs::path filepath, bool raw);
+
+//! Clean up page and record headlines and page numbers.
+[[nodiscard]] document process_page(std::string_view text);
+
+//! Return last headline if possible.
+[[nodiscard]] string headline(const document &doc, size_t pos);
+
+//! Return current page if possible.
+[[nodiscard]] string page(const document &doc, size_t pos);
+
+//! Returns the file path of the OPF file in the EPUB.
+[[nodiscard]] fs::path get_opf_file_path(const fs::path &zipfile);
+
+//! Returns the files in the EPUB “spine” (all pages that are actually text).
+[[nodiscard]] std::vector<string> list_spine(const fs::path &filepath);
+
+} // namespace epubgrep::book
+
+#endif // EPUBGREP_BOOK_HPP
--- a/src/files.cpp
+++ b/src/files.cpp
@ -0,0 +1,53 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "files.hpp"
+
+#include "fs-compat.hpp"
+#include "log.hpp"
+
+#include <exception>
+#include <string_view>
+#include <vector>
+
+namespace epubgrep::files
+{
+
+std::vector<fs::path> list_recursive(const fs::path &directory,
+                                     const bool follow_symlinks)
+{
+    fs::directory_options dir_options{
+        fs::directory_options::skip_permission_denied};
+    if (follow_symlinks)
+    {
+        dir_options |= fs::directory_options::follow_directory_symlink;
+    }
+    fs::recursive_directory_iterator dir_iter{directory, dir_options};
+
+    std::vector<fs::path> paths;
+    for (const auto &path : dir_iter)
+    {
+        if (!path.is_directory())
+        {
+            paths.emplace_back(path);
+            DEBUGLOG << "Added file: " << path;
+        }
+    }
+
+    return paths;
+}
+
+} // namespace epubgrep::files
--- a/src/files.hpp
+++ b/src/files.hpp
@ -0,0 +1,34 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EPUBGREP_FILES_HPP
+#define EPUBGREP_FILES_HPP
+
+#include "fs-compat.hpp"
+
+#include <string_view>
+#include <vector>
+
+namespace epubgrep::files
+{
+
+//! List files in directory recursively.
+[[nodiscard]] std::vector<fs::path> list_recursive(const fs::path &directory,
+                                                   bool follow_symlinks);
+
+} // namespace epubgrep::files
+
+#endif // EPUBGREP_FILES_HPP
--- a/src/helpers.cpp
+++ b/src/helpers.cpp
@ -0,0 +1,196 @@
+#include "helpers.hpp"
+
+#include <boost/regex.hpp>
+
+#include <codecvt>
+#include <locale>
+#include <map>
+#include <string>
+#include <string_view>
+
+namespace epubgrep::helpers
+{
+
+bool is_whitespace(const char check)
+{
+    const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
+    return std::any_of(whitespace.begin(), whitespace.end(),
+                       [&check](const char ws) { return check == ws; });
+}
+
+std::string urldecode(const std::string_view url)
+{ // RFC 3986, section 2.1.
+    size_t pos{0};
+    size_t lastpos{0};
+    std::string decoded;
+    while ((pos = url.find('%', pos)) != std::string_view::npos)
+    {
+        decoded += url.substr(lastpos, pos - lastpos);
+        decoded += static_cast<char>(
+            std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
+        pos += 3;
+        lastpos = pos;
+    }
+    decoded += url.substr(lastpos);
+
+    return decoded;
+}
+
+std::string unescape_html(const std::string_view html)
+{
+    std::string output;
+    output.reserve(html.size());
+
+    // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
+    //         entity_references#Character_entity_references_in_HTML
+    static const std::map<std::string_view, char32_t>
+        names{{"exclamation", 0x0021}, {"quot", 0x0022},    {"percent", 0x0025},
+              {"amp", 0x0026},         {"apos", 0x0027},    {"add", 0x002B},
+              {"lt", 0x003C},          {"equal", 0x003D},   {"gt", 0x003E},
+              {"nbsp", 0x00A0},        {"iexcl", 0x00A1},   {"cent", 0x00A2},
+              {"pound", 0x00A3},       {"curren", 0x00A4},  {"yen", 0x00A5},
+              {"brvbar", 0x00A6},      {"sect", 0x00A7},    {"uml", 0x00A8},
+              {"copy", 0x00A9},        {"ordf", 0x00AA},    {"laquo", 0x00AB},
+              {"not", 0x00AC},         {"shy", 0x00AD},     {"reg", 0x00AE},
+              {"macr", 0x00AF},        {"deg", 0x00B0},     {"plusmn", 0x00B1},
+              {"sup2", 0x00B2},        {"sup3", 0x00B3},    {"acute", 0x00B4},
+              {"micro", 0x00B5},       {"para", 0x00B6},    {"middot", 0x00B7},
+              {"cedil", 0x00B8},       {"sup1", 0x00B9},    {"ordm", 0x00BA},
+              {"raquo", 0x00BB},       {"frac14", 0x00BC},  {"frac12", 0x00BD},
+              {"frac34", 0x00BE},      {"iquest", 0x00BF},  {"Agrave", 0x00C0},
+              {"Aacute", 0x00C1},      {"Acirc", 0x00C2},   {"Atilde", 0x00C3},
+              {"Auml", 0x00C4},        {"Aring", 0x00C5},   {"AElig", 0x00C6},
+              {"Ccedil", 0x00C7},      {"Egrave", 0x00C8},  {"Eacute", 0x00C9},
+              {"Ecirc", 0x00CA},       {"Euml", 0x00CB},    {"Igrave", 0x00CC},
+              {"Iacute", 0x00CD},      {"Icirc", 0x00CE},   {"Iuml", 0x00CF},
+              {"ETH", 0x00D0},         {"Ntilde", 0x00D1},  {"Ograve", 0x00D2},
+              {"Oacute", 0x00D3},      {"Ocirc", 0x00D4},   {"Otilde", 0x00D5},
+              {"Ouml", 0x00D6},        {"times", 0x00D7},   {"Oslash", 0x00D8},
+              {"Ugrave", 0x00D9},      {"Uacute", 0x00DA},  {"Ucirc", 0x00DB},
+              {"Uuml", 0x00DC},        {"Yacute", 0x00DD},  {"THORN", 0x00DE},
+              {"szlig", 0x00DF},       {"agrave", 0x00E0},  {"aacute", 0x00E1},
+              {"acirc", 0x00E2},       {"atilde", 0x00E3},  {"auml", 0x00E4},
+              {"aring", 0x00E5},       {"aelig", 0x00E6},   {"ccedil", 0x00E7},
+              {"egrave", 0x00E8},      {"eacute", 0x00E9},  {"ecirc", 0x00EA},
+              {"euml", 0x00EB},        {"igrave", 0x00EC},  {"iacute", 0x00ED},
+              {"icirc", 0x00EE},       {"iuml", 0x00EF},    {"eth", 0x00F0},
+              {"ntilde", 0x00F1},      {"ograve", 0x00F2},  {"oacute", 0x00F3},
+              {"ocirc", 0x00F4},       {"otilde", 0x00F5},  {"ouml", 0x00F6},
+              {"divide", 0x00F7},      {"oslash", 0x00F8},  {"ugrave", 0x00F9},
+              {"uacute", 0x00FA},      {"ucirc", 0x00FB},   {"uuml", 0x00FC},
+              {"yacute", 0x00FD},      {"thorn", 0x00FE},   {"yuml", 0x00FF},
+              {"OElig", 0x0152},       {"oelig", 0x0153},   {"Scaron", 0x0160},
+              {"scaron", 0x0161},      {"Yuml", 0x0178},    {"fnof", 0x0192},
+              {"circ", 0x02C6},        {"tilde", 0x02DC},   {"Alpha", 0x0391},
+              {"Beta", 0x0392},        {"Gamma", 0x0393},   {"Delta", 0x0394},
+              {"Epsilon", 0x0395},     {"Zeta", 0x0396},    {"Eta", 0x0397},
+              {"Theta", 0x0398},       {"Iota", 0x0399},    {"Kappa", 0x039A},
+              {"Lambda", 0x039B},      {"Mu", 0x039C},      {"Nu", 0x039D},
+              {"Xi", 0x039E},          {"Omicron", 0x039F}, {"Pi", 0x03A0},
+              {"Rho", 0x03A1},         {"Sigma", 0x03A3},   {"Tau", 0x03A4},
+              {"Upsilon", 0x03A5},     {"Phi", 0x03A6},     {"Chi", 0x03A7},
+              {"Psi", 0x03A8},         {"Omega", 0x03A9},   {"alpha", 0x03B1},
+              {"beta", 0x03B2},        {"gamma", 0x03B3},   {"delta", 0x03B4},
+              {"epsilon", 0x03B5},     {"zeta", 0x03B6},    {"eta", 0x03B7},
+              {"theta", 0x03B8},       {"iota", 0x03B9},    {"kappa", 0x03BA},
+              {"lambda", 0x03BB},      {"mu", 0x03BC},      {"nu", 0x03BD},
+              {"xi", 0x03BE},          {"omicron", 0x03BF}, {"pi", 0x03C0},
+              {"rho", 0x03C1},         {"sigmaf", 0x03C2},  {"sigma", 0x03C3},
+              {"tau", 0x03C4},         {"upsilon", 0x03C5}, {"phi", 0x03C6},
+              {"chi", 0x03C7},         {"psi", 0x03C8},     {"omega", 0x03C9},
+              {"thetasym", 0x03D1},    {"upsih", 0x03D2},   {"piv", 0x03D6},
+              {"ensp", 0x2002},        {"emsp", 0x2003},    {"thinsp", 0x2009},
+              {"zwnj", 0x200C},        {"zwj", 0x200D},     {"lrm", 0x200E},
+              {"rlm", 0x200F},         {"ndash", 0x2013},   {"mdash", 0x2014},
+              {"horbar", 0x2015},      {"lsquo", 0x2018},   {"rsquo", 0x2019},
+              {"sbquo", 0x201A},       {"ldquo", 0x201C},   {"rdquo", 0x201D},
+              {"bdquo", 0x201E},       {"dagger", 0x2020},  {"Dagger", 0x2021},
+              {"bull", 0x2022},        {"hellip", 0x2026},  {"permil", 0x2030},
+              {"prime", 0x2032},       {"Prime", 0x2033},   {"lsaquo", 0x2039},
+              {"rsaquo", 0x203A},      {"oline", 0x203E},   {"frasl", 0x2044},
+              {"euro", 0x20AC},        {"image", 0x2111},   {"weierp", 0x2118},
+              {"real", 0x211C},        {"trade", 0x2122},   {"alefsym", 0x2135},
+              {"larr", 0x2190},        {"uarr", 0x2191},    {"rarr", 0x2192},
+              {"darr", 0x2193},        {"harr", 0x2194},    {"crarr", 0x21B5},
+              {"lArr", 0x21D0},        {"uArr", 0x21D1},    {"rArr", 0x21D2},
+              {"dArr", 0x21D3},        {"hArr", 0x21D4},    {"forall", 0x2200},
+              {"part", 0x2202},        {"exist", 0x2203},   {"empty", 0x2205},
+              {"nabla", 0x2207},       {"isin", 0x2208},    {"notin", 0x2209},
+              {"ni", 0x220B},          {"prod", 0x220F},    {"sum", 0x2211},
+              {"minus", 0x2212},       {"lowast", 0x2217},  {"radic", 0x221A},
+              {"prop", 0x221D},        {"infin", 0x221E},   {"ang", 0x2220},
+              {"and", 0x2227},         {"or", 0x2228},      {"cap", 0x2229},
+              {"cup", 0x222A},         {"int", 0x222B},     {"there4", 0x2234},
+              {"sim", 0x223C},         {"cong", 0x2245},    {"asymp", 0x2248},
+              {"ne", 0x2260},          {"equiv", 0x2261},   {"le", 0x2264},
+              {"ge", 0x2265},          {"sub", 0x2282},     {"sup", 0x2283},
+              {"nsub", 0x2284},        {"sube", 0x2286},    {"supe", 0x2287},
+              {"oplus", 0x2295},       {"otimes", 0x2297},  {"perp", 0x22A5},
+              {"sdot", 0x22C5},        {"lceil", 0x2308},   {"rceil", 0x2309},
+              {"lfloor", 0x230A},      {"rfloor", 0x230B},  {"lang", 0x2329},
+              {"rang", 0x232A},        {"loz", 0x25CA},     {"spades", 0x2660},
+              {"clubs", 0x2663},       {"hearts", 0x2665},  {"diams", 0x2666}};
+
+    // Used to convert number to utf-8 char.
+    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c;
+    // Matches numbered entities between 1 and 8 digits, decimal or hexadecimal,
+    // or named entities.
+    static const boost::regex re_entity{"&(#(x)?([[:alnum:]]{1,8})"
+                                        "|[^;[:space:][:punct:]]+);"};
+    std::string::const_iterator begin{html.begin()};
+    std::string::const_iterator end{html.end()};
+    boost::match_results<std::string::const_iterator> match;
+
+    // Used for appending the rest of the text, after the last replacement.
+    std::string::const_iterator end_last_match{begin};
+
+    while (boost::regex_search(begin, end, match, re_entity))
+    {
+        output += match.prefix();
+        try
+        {
+            const char32_t codepoint{
+                [&match]
+                {
+                    // If it doesn't start with a '#' it is a named entity.
+                    if (match[1].str()[0] != '#')
+                    {
+                        return names.at(match[1].str());
+                    }
+                    // 'x' after '#' means the number is hexadecimal.
+                    if (match[2].length() == 1)
+                    {
+                        return static_cast<char32_t>(
+                            std::stoul(match[3].str(), nullptr, 16));
+                    }
+                    // '#' without 'x' means the number is decimal.
+                    return static_cast<char32_t>(
+                        std::stoul(match[3].str(), nullptr, 10));
+                }()};
+            output += u8c.to_bytes(codepoint);
+        }
+        catch (const std::out_of_range &) // Named entity could not be found.
+        {
+            output += match.str();
+        }
+        begin = match[0].end();
+        end_last_match = begin;
+    }
+
+    output += std::string(end_last_match, end);
+
+    return output;
+}
+
+std::string_view get_env(const std::string_view name)
+{
+    const char *env = std::getenv(name.data()); // NOLINT(concurrency-mt-unsafe)
+    if (env != nullptr)
+    {
+        return env;
+    }
+
+    return {};
+}
+
+} // namespace epubgrep::helpers
--- a/src/helpers.hpp
+++ b/src/helpers.hpp
@ -0,0 +1,23 @@
+#ifndef EPUBGREP_HELPERS_HPP
+#define EPUBGREP_HELPERS_HPP
+
+#include <string>
+#include <string_view>
+namespace epubgrep::helpers
+{
+
+//! Return true if check is whitespace.
+[[nodiscard]] bool is_whitespace(char check);
+
+//! Decode percent-encoded text. Used for restricted characters in URLs.
+[[nodiscard]] std::string urldecode(std::string_view url);
+
+//! Un-escape &amp;, &#x20; and so on.
+[[nodiscard]] std::string unescape_html(std::string_view html);
+
+//! Returns environment variable or an empty string_view.
+[[nodiscard]] std::string_view get_env(std::string_view name);
+
+} // namespace epubgrep::helpers
+
+#endif // EPUBGREP_HELPERS_HPP
--- a/src/log.cpp
+++ b/src/log.cpp
@ -0,0 +1,95 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.hpp"
+
+#include "fs-compat.hpp"
+#include "helpers.hpp"
+
+#include <boost/locale/message.hpp>
+#include <boost/log/core.hpp>
+#include <boost/log/sources/logger.hpp>
+#include <boost/log/trivial.hpp>
+#include <boost/log/utility/setup/common_attributes.hpp>
+#include <boost/log/utility/setup/console.hpp>
+#include <boost/log/utility/setup/file.hpp>
+
+#include <iostream>
+
+namespace epubgrep::log
+{
+
+namespace blog = boost::log;
+namespace keywords = boost::log::keywords;
+using boost::locale::translate;
+using sev = boost::log::trivial::severity_level;
+
+inline static global_variables global;
+
+void init()
+{
+    const auto log_path{[]
+                        {
+                            fs::path path{helpers::get_env("XDG_STATE_HOME")};
+                            if (path.empty())
+                            {
+                                path = helpers::get_env("HOME");
+                                if (!path.empty())
+                                {
+                                    path /= ".local";
+                                    path /= "state";
+                                }
+                            }
+                            if (!path.empty())
+                            {
+                                path /= "epubgrep";
+                            }
+                            return path / "epubgrep.log";
+                        }()};
+
+    global.textlog = blog::add_file_log(
+        keywords::file_name = log_path.c_str(),
+        keywords::format = "%LineID% [%TimeStamp%] "
+                           "[%ThreadID%]: [%Severity%] %Message%");
+    global.textlog->set_filter(blog::trivial::severity >= sev::info);
+
+    blog::add_console_log(std::cerr,
+                          keywords::format = translate("WARNING").str()
+                                             + ": %Message%")
+        ->set_filter(blog::trivial::severity == sev::warning);
+    blog::add_console_log(std::cerr, keywords::format = translate("ERROR").str()
+                                                        + ": %Message%")
+        ->set_filter(blog::trivial::severity == sev::error);
+    blog::add_console_log(std::cerr,
+                          keywords::format = translate("FATAL ERROR").str()
+                                             + ": %Message%")
+        ->set_filter(blog::trivial::severity == sev::fatal);
+
+    blog::add_common_attributes();
+}
+
+void enable_debug()
+{
+    global.textlog->set_filter(blog::trivial::severity >= sev::debug);
+
+    blog::add_console_log(std::cerr,
+                          keywords::format = "[%Severity%] %Message%")
+        ->set_filter(blog::trivial::severity <= sev::info);
+
+    LOG(sev::info) << "Debug logging enabled.";
+}
+
+} // namespace epubgrep::log
--- a/src/log.hpp
+++ b/src/log.hpp
@ -0,0 +1,57 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EPUBGREP_LOG_HPP
+#define EPUBGREP_LOG_HPP
+
+#include "fs-compat.hpp"
+
+#include <boost/log/sinks/sync_frontend.hpp>
+#include <boost/log/sinks/text_file_backend.hpp>
+#include <boost/log/sources/global_logger_storage.hpp>
+#include <boost/log/sources/logger.hpp>
+#include <boost/log/trivial.hpp>
+#include <boost/smart_ptr/shared_ptr.hpp>
+
+#define LOG(severity) BOOST_LOG_SEV(epubgrep::log::logger::get(), severity)
+#define DEBUGLOG LOG(epubgrep::log::sev::debug) << __func__ << "(): "
+
+namespace epubgrep::log
+{
+
+namespace blog = boost::log;
+using sev = boost::log::trivial::severity_level;
+
+struct global_variables
+{
+
+    boost::shared_ptr<
+        blog::sinks::synchronous_sink<blog::sinks::text_file_backend>>
+        textlog;
+};
+
+BOOST_LOG_INLINE_GLOBAL_LOGGER_DEFAULT(
+    logger, boost::log::sources::severity_logger_mt<sev>)
+
+//! Call this before doing any logging.
+void init();
+
+//! Enable debug logging. Call after init().
+void enable_debug();
+
+} // namespace epubgrep::log
+
+#endif // EPUBGREP_LOG_HPP
--- a/src/main.cpp
+++ b/src/main.cpp
@ -14,138 +14,289 @@
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "files.hpp"
+#include "fs-compat.hpp"
+#include "log.hpp"
 #include "options.hpp"
+#include "output.hpp"
 #include "search.hpp"
+#include "version.hpp"
+#include "zip.hpp"

 #include <boost/locale/generator.hpp>
 #include <boost/locale/message.hpp>
-#include <boost/program_options/errors.hpp>
-#include <boost/program_options/variables_map.hpp>
-#include <termcolor/termcolor.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h> // For compatibility with fmt 4.

+#include <chrono>
 #include <clocale>
+#include <cmath>
 #include <cstdint>
 #include <cstdlib>
+#include <cstring>
 #include <exception>
+#include <fstream>
+#include <future>
 #include <iostream>
 #include <locale>
+#include <mutex>
 #include <string>
-#include <typeinfo>
+#include <string_view>
+#include <system_error>
+#include <thread>
 #include <vector>

+constexpr int EXIT_FATAL{2}; // NOLINT(readability-identifier-naming)
+
+// NOLINTNEXTLINE(readability-function-cognitive-complexity)
 int main(int argc, char *argv[])
 {
-    namespace po = boost::program_options;
    using namespace epubgrep;

    using boost::locale::translate;
-    using std::cerr;
-    using std::cout;
+    using fmt::format;
+    using std::string;
+    using std::vector;

    // locale_generator("").name.c_str() returns "*" instead of "". That's why
    // the global C locale isn't changed. So we have to set it additionally.
-    std::setlocale(LC_ALL, "");
+    std::setlocale(LC_ALL, ""); // NOLINT(concurrency-mt-unsafe)
    boost::locale::generator locale_generator;
    locale_generator.add_messages_path("translations");
    locale_generator.add_messages_path("/usr/share/locale");
    locale_generator.add_messages_domain("epubgrep");
    std::locale::global(locale_generator(""));
-    cout.imbue(std::locale());
-    cerr.imbue(std::locale());
+    std::cout.imbue(std::locale());
+    std::cerr.imbue(std::locale());

-    po::variables_map vm;
+    log::init();
+    LOG(log::sev::info) << "epubgrep " << version << " started.";
+
+    options::options opts;
    try
    {
-        vm = options::parse_options(argc, argv);
+        opts = options::parse_options(argc, argv);
    }
    catch (std::exception &e)
    { // Exceptions we can't recover from or ones we don't know.
-        cerr << '\n' << translate("ERROR: ") << e.what() << '\n';
-        cerr << translate("Error while parsing options.") << '\n';
-        return EXIT_FAILURE;
+        LOG(log::sev::fatal)
+            << e.what() << translate(" (while parsing options)");
+        return EXIT_FATAL;
    }

-    if (vm.count("help") + vm.count("version") > 0)
+    if (opts.debug)
+    {
+        log::enable_debug();
+    }
+    DEBUGLOG << "Options: " << opts;
+
+    if (opts.help || opts.version)
    {
        return EXIT_SUCCESS;
    }

-    if (vm.count("input-file") == 0)
-    {
-        cout << "NO INPUT FILE\n";
-        // TODO: Read data from stdin.
-    }
-    else
-    {
-        search::options opts;
-        if (vm.count("basic-regexp") > 0)
-        {
-            opts.regex = search::regex_kind::basic;
-        }
-        if (vm.count("extended-regexp") > 0)
-        {
-            opts.regex = search::regex_kind::extended;
-        }
-        if (vm.count("perl-regexp") > 0)
-        {
-            opts.regex = search::regex_kind::perl;
-        }
-        if (vm.count("grep") > 0)
-        {
-            opts.grep_like = true;
-        }
-        if (vm.count("ignore-case") > 0)
-        {
-            opts.ignore_case = true;
-        }
-        if (vm.count("raw") > 0)
-        {
-            opts.raw = true;
-        }
-        opts.context = vm["context"].as<std::uint64_t>();
+    int return_code{EXIT_SUCCESS};

-        for (const auto &filepath :
-             vm["input-file"].as<std::vector<std::string>>())
+    vector<fs::path> input_files;
+    for (const auto &filepath : opts.input_file)
+    {
+        if (!opts.recursive && !opts.dereference_recursive)
        {
-            for (const auto &regex :
-                 vm["regexp"].as<std::vector<std::string>>())
+            input_files.emplace_back(filepath);
+            DEBUGLOG << "Added to input_files: " << filepath;
+        }
+        else
+        {
+            try
            {
-                try
-                {
-                    for (const auto &match :
-                         search::search(filepath, regex, opts))
-                    {
-                        cout << match.filepath;
-                        if (!match.headline.empty())
-                        {
+                auto files_in_dir{
+                    files::list_recursive(filepath,
+                                          opts.dereference_recursive)};
+                input_files.insert(input_files.end(), files_in_dir.begin(),
+                                   files_in_dir.end());
+                DEBUGLOG << "Added directory to input_files.";
+            }
+            catch (const fs::filesystem_error &e)
+            {
+                if (e.code().value() == 20)
+                { // Is not a directory.
+                    input_files.emplace_back(filepath);
+                    DEBUGLOG << "Added to input_files: " << filepath;
+                    continue;
+                }

-                            cout << ", " << match.headline;
-                        }
-                        if (!match.page.empty())
-                        {
-                            cout << ", page " << match.page;
-                        }
-                        cout << ": " << match.context.first;
-                        if (vm.count("nocolor") == 0)
-                        {
-                            cout << termcolor::bright_magenta << match.text
-                                 << termcolor::reset;
-                        }
-                        else
-                        {
-                            cout << match.text;
-                        }
-                        cout << match.context.second << '\n';
-                    }
-                }
-                catch (const std::exception &e)
-                { // Unknown errors.
-                    cerr << '\n' << translate("ERROR: ") << e.what() << '\n';
-                    cerr << translate("Error while searching.") << '\n';
-                    // NOTE: Maybe we should continue with the next regex/file?
-                    return EXIT_FAILURE;
-                }
+                LOG(log::sev::error)
+                    << format(translate("Could not open {0:s}: {1:s}")
+                                  .str()
+                                  .c_str(),
+                              e.path1().c_str(), e.what());
+                return_code = EXIT_FAILURE;
            }
        }
    }
+
+    search::settings search_settings;
+    search_settings.regex = opts.regex;
+    search_settings.grep_like = opts.grep;
+    search_settings.ignore_case = opts.ignore_case;
+    search_settings.raw = opts.raw;
+    search_settings.context = opts.context;
+
+    vector<vector<search::match>> matches_all;
+    std::mutex mutex_matches_all;
+    vector<std::future<int>> futurepool;
+    std::atomic<size_t> books_searched{0};
+
+    auto search_file{
+        [&opts, &matches_all, &mutex_matches_all,
+         &search_settings](const fs::path &filepath)
+        {
+            for (const auto &regex : opts.regexp)
+            {
+                try
+                {
+                    auto matches{
+                        search::search(filepath, regex, search_settings)};
+                    if (!matches.empty())
+                    {
+                        std::lock_guard<std::mutex> guard(mutex_matches_all);
+                        matches_all.emplace_back(matches);
+                    }
+                }
+                catch (const zip::exception &e)
+                {
+                    if (opts.ignore_archive_errors && e.code == 1)
+                    { // File is probably not an EPUB.
+                        LOG(log::sev::info) << e.what();
+                        return EXIT_SUCCESS;
+                    }
+
+                    LOG(log::sev::error) << e.what();
+                    return EXIT_FAILURE;
+                }
+                catch (const std::ifstream::failure &e)
+                {
+                    LOG(log::sev::error)
+                        << std::strerror(errno) // FIXME: Not thread safe.
+                        << format(translate(" (while opening {0:s})")
+                                      .str()
+                                      .c_str(),
+                                  filepath.c_str());
+                    return EXIT_FAILURE;
+                }
+                catch (const boost::regex_error &e)
+                {
+                    LOG(log::sev::fatal) << e.what();
+                    return EXIT_FATAL;
+                }
+            }
+
+            return EXIT_SUCCESS;
+        }};
+
+    auto futures_cleanup{
+        [&futurepool, &return_code, &books_searched](const bool wait = false)
+        {
+            using namespace std::chrono_literals;
+
+            for (auto it{futurepool.begin()}; it != futurepool.end();)
+            {
+                if (!wait && it->wait_for(100ms) != std::future_status::ready)
+                {
+                    ++it;
+                    continue;
+                }
+
+                if (int ret{}; (ret = it->get()) != EXIT_SUCCESS)
+                {
+                    if (return_code == EXIT_SUCCESS)
+                    {
+                        return_code = ret;
+                    }
+                }
+                futurepool.erase(it);
+                ++books_searched;
+            }
+        }};
+
+    const auto max_threads{
+        []
+        {
+            auto n{static_cast<double>(std::thread::hardware_concurrency())};
+            return static_cast<std::uint32_t>(std::ceil(n / 2 + n / 4));
+        }()};
+    DEBUGLOG << "max_threads = " << max_threads;
+
+    const auto print_status{
+        [&opts, &books_searched, &input_files](std::future<bool> cancel)
+        {
+            if (!opts.status)
+            {
+                return;
+            }
+            while (cancel.wait_for(std::chrono::seconds(opts.status_interval))
+                   != std::future_status::ready)
+            {
+                std::cerr << format(translate("{0:d} of {1:d} books searched.")
+                                        .str()
+                                        .c_str(),
+                                    books_searched, input_files.size())
+                          << '\n';
+            }
+            std::cerr << translate("All books searched.") << '\n';
+        }};
+    std::promise<bool> promise_status;
+    std::thread thread_status{print_status, promise_status.get_future()};
+
+    for (const auto &filepath : input_files)
+    {
+        while (futurepool.size() >= max_threads)
+        {
+            DEBUGLOG << "Attempting to clean up threads";
+            futures_cleanup();
+        }
+        if (return_code == EXIT_FATAL)
+        {
+            break;
+        }
+        futurepool.emplace_back(
+            std::async(std::launch::async, search_file, filepath));
+        DEBUGLOG << "Launched new thread";
+
+        if (!matches_all.empty() && !opts.json && !opts.html)
+        {
+            output::print_matches(matches_all[0], opts,
+                                  input_files.size() == 1);
+            std::lock_guard<std::mutex> guard(mutex_matches_all);
+            matches_all.erase(matches_all.begin());
+        }
+    }
+    DEBUGLOG << "Waiting for remaining threads to finish";
+    futures_cleanup(true);
+    promise_status.set_value(true);
+    thread_status.join();
+    if (return_code == EXIT_FATAL)
+    {
+        return EXIT_FATAL;
+    }
+
+    if (opts.json)
+    {
+        output::json_all(matches_all);
+    }
+    else if (opts.html)
+    {
+        output::html_all(matches_all, opts);
+    }
+    else
+    {
+        for (const auto &matches : matches_all)
+        {
+            output::print_matches(matches, opts, input_files.size() == 1);
+        }
+    }
+
+    LOG(log::sev::info) << "Exiting program with return code " << return_code;
+
+    return return_code;
 }
--- a/src/options.cpp
+++ b/src/options.cpp
@ -17,6 +17,7 @@
 #include "options.hpp"

 #include "fs-compat.hpp"
+#include "helpers.hpp"
 #include "version.hpp"

 #include <boost/locale/message.hpp>
@ -25,13 +26,17 @@
 #include <boost/program_options/positional_options.hpp>
 #include <boost/program_options/value_semantic.hpp>
 #include <boost/program_options/variables_map.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h> // For compatibility with fmt 4.

 #include <cstdint>
 #include <cstdlib>
 #include <exception>
 #include <fstream>
 #include <iostream>
+#include <stdexcept>
 #include <string>
+#include <vector>

 namespace epubgrep::options
 {
@ -39,17 +44,24 @@ namespace epubgrep::options
 namespace po = boost::program_options;

 using boost::locale::translate;
+using fmt::format;
 using std::cout;

-po::variables_map parse_options(int argc, char *argv[])
+options parse_options(int argc, char *argv[])
 {
-    po::options_description options_visible(translate("Available options"));
    // clang-format off
-    options_visible.add_options()
+    po::options_description options_general(translate("General options"));
+    options_general.add_options()
        ("help,h",
         translate("Display this help and exit.").str().data())
        ("version,V",
         translate("Display version information and exit.").str().data())
+        ("debug",
+         translate("Enable debug output.").str().data())
+    ;
+
+    po::options_description options_search(translate("Search options"));
+    options_search.add_options()
        ("basic-regexp,G",
         translate("PATTERN is a basic regular expression (default).")
         .str().data())
@ -60,27 +72,61 @@ po::variables_map parse_options(int argc, char *argv[])
         .str().data())
        ("perl-regexp,P",
         translate("PATTERN is a Perl regular expression.").str().data())
+
        ("ignore-case,i",
         translate("Ignore case distinctions in pattern and data.")
         .str().data())
+        ("raw,a",
+         translate("Do not clean up text before searching.").str().data())
+        ("recursive,r",
+         translate("Read all files under each directory, recursively.")
+         .str().data())
+        ("dereference-recursive,R",
+         translate("Read all files under each directory, recursively, "
+                   "following symlinks.").str().data())
        ("regexp,e", po::value<std::vector<std::string>>()
         ->value_name(translate("PATTERN"))->composing()->required(),
         translate("Use additional PATTERN for matching.").str().data())
-        ("raw,a",
-         translate("Do not clean up text before searching.").str().data())
+    ;
+
+    po::options_description options_output(translate("Output options"));
+    options_output.add_options()
        ("context,C", po::value<std::uint64_t>()
         ->value_name(translate("NUMBER"))->default_value(0),
         translate("Print NUMBER words of context around matches.")
         .str().data())
-        ("nocolor", translate("Do not color matches.") .str().data())
+        ("nocolor", translate("Turn off colors and other decorations.")
+         .str().data())
+        ("no-filename",po::value<std::string>()->value_name(translate("WHICH")),
+         translate("Suppress the mentioning of file names on output. "
+                   "WHICH is ‘filesystem’, ‘in-epub’ or ‘all’.").str().data())
+        ("ignore-archive-errors",
+         translate("Ignore errors about wrong file formats.").str().data())
+        ("json",
+         translate("Output JSON instead of plain text.").str().data())
+        ("html",
+         translate("Output HTML instead of plain text.").str().data())
+        ("status",
+         translate("Output status message every STATUS-INTERVAL seconds.")
+         .str().data())
+        ("status-interval", po::value<std::uint64_t>()
+         ->value_name(translate("NUMBER"))->default_value(30),
+         translate("Set status message interval to NUMBER seconds.")
+         .str().data())
    ;

    po::options_description options_hidden("Hidden options");
    options_hidden.add_options()
-        ("input-file", po::value<std::vector<std::string>>()
+        ("input-file", po::value<std::vector<std::string>>()->required()
         ->value_name("FILE"), "Input file to search.")
        ;
    // clang-format on
+
+    po::options_description options_visible;
+    options_visible.add(options_general)
+        .add(options_search)
+        .add(options_output);
+
    po::options_description options_all("Allowed options");
    options_all.add(options_visible).add(options_hidden);

@ -113,7 +159,7 @@ po::variables_map parse_options(int argc, char *argv[])

    if (vm.count("help") != 0)
    {
-        cout << translate("Usage: epubgrep [OPTION]… PATTERN [FILE]…\n");
+        cout << translate("Usage: epubgrep [OPTION]… PATTERN FILE…\n");
        cout << options_visible;
        cout << translate("\nYou can access the full manual "
                          "with `man epubgrep`.\n");
@ -130,26 +176,15 @@ po::variables_map parse_options(int argc, char *argv[])
            "conditions.\n");
    }

-    return vm;
+    return parse_again(vm);
 }

 fs::path get_config_path()
 {
-    const auto get_env{[](const std::string &name)
-                       {
-                           const char *env = std::getenv(name.c_str());
-                           if (env != nullptr)
-                           {
-                               return env;
-                           }
-
-                           return "";
-                       }};
-
-    fs::path path{get_env("XDG_CONFIG_HOME")};
+    fs::path path{helpers::get_env("XDG_CONFIG_HOME")};
    if (path.empty())
    {
-        path = get_env("HOME");
+        path = helpers::get_env("HOME");
        if (!path.empty())
        {
            path /= ".config";
@ -157,10 +192,150 @@ fs::path get_config_path()
    }
    if (!path.empty())
    {
-        return path /= "epubgrep.conf";
+        const auto old_path{path / "epubgrep.conf"};
+        auto new_path{path / "epubgrep" / "epubgrep.conf"};
+
+        if (fs::exists(old_path))
+        {
+            fs::create_directory(path /= "epubgrep");
+            fs::rename(old_path, new_path);
+        }
+
+        return new_path;
    }

    return "epubgrep.conf";
 }

+options parse_again(const po::variables_map &vm)
+{
+    options opts;
+
+    opts.help = vm.count("help") > 0;
+    opts.version = vm.count("version") > 0;
+    if (vm.count("basic-regexp") > 0)
+    {
+        opts.regex = regex_kind::basic;
+    }
+    if (vm.count("extended-regexp") > 0)
+    {
+        opts.regex = regex_kind::extended;
+    }
+    if (vm.count("perl-regexp") > 0)
+    {
+        opts.regex = regex_kind::perl;
+    }
+    opts.grep = vm.count("grep") > 0;
+    opts.ignore_case = vm.count("ignore-case") > 0;
+    opts.raw = vm.count("raw") > 0;
+    if (vm.count("context") > 0)
+    {
+        opts.context = vm["context"].as<std::uint64_t>();
+    }
+    opts.nocolor = vm.count("nocolor") > 0;
+    if (vm.count("no-filename") > 0)
+    {
+        if (vm["no-filename"].as<std::string>() == "filesystem")
+        {
+            opts.no_fn_fs = true;
+        }
+        else if (vm["no-filename"].as<std::string>() == "in-epub")
+        {
+            opts.no_fn_epub = true;
+        }
+        else if (vm["no-filename"].as<std::string>() == "all")
+        {
+            opts.no_fn_fs = true;
+            opts.no_fn_epub = true;
+        }
+        else
+        {
+            throw std::runtime_error{"'--no-filename' must be either "
+                                     "‘filesystem’, ‘in-epub’ or ‘all’."};
+        }
+    }
+    opts.recursive = vm.count("recursive") > 0;
+    opts.dereference_recursive = vm.count("dereference-recursive") > 0;
+    opts.ignore_archive_errors = vm.count("ignore-archive-errors") > 0;
+    opts.debug = vm.count("debug") > 0;
+    opts.json = vm.count("json") > 0;
+    opts.html = vm.count("html") > 0;
+    opts.status = vm.count("status") > 0;
+    opts.status_interval = vm["status-interval"].as<std::uint64_t>();
+
+    if (vm.count("regexp") > 0)
+    {
+        opts.regexp = vm["regexp"].as<std::vector<std::string>>();
+    }
+
+    if (vm.count("input-file") > 0)
+    {
+        opts.input_file = vm["input-file"].as<std::vector<std::string>>();
+    }
+
+    return opts;
+}
+
+std::ostream &operator<<(std::ostream &out, const options &opts)
+{
+    const std::string regex_kind{[&opts]
+                                 {
+                                     switch (opts.regex)
+                                     {
+                                     case regex_kind::basic:
+                                     {
+                                         return "basic";
+                                         break;
+                                     }
+                                     case regex_kind::extended:
+                                     {
+                                         return "extended";
+                                         break;
+                                     }
+                                     case regex_kind::perl:
+                                     {
+                                         return "perl";
+                                         break;
+                                     }
+                                     }
+                                     return "error";
+                                 }()};
+    out << format("help={0:} version={1:} regex={2:s} grep={3:} "
+                  "ignore_case={4:} ",
+                  opts.help, opts.version, regex_kind, opts.grep,
+                  opts.ignore_case);
+
+    out << "regexp={";
+    for (const auto &regexp : opts.regexp)
+    {
+        if (regexp != *opts.regexp.begin())
+        {
+            out << ", ";
+        }
+        out << '"' << regexp << '"';
+    }
+    out << "} ";
+
+    out << format("raw={0:} context={1:d} nocolor={2:} no_fn_fs={3:} "
+                  "no_fn_epub={4:} recursive={5:} dereference_recursive={6:} ",
+                  opts.raw, opts.context, opts.nocolor, opts.no_fn_fs,
+                  opts.no_fn_epub, opts.recursive, opts.dereference_recursive);
+
+    out << "input_file={";
+    for (const auto &input_file : opts.input_file)
+    {
+        if (input_file != *opts.input_file.begin())
+        {
+            out << ", ";
+        }
+        out << '"' << input_file << '"';
+    }
+    out << "} ";
+
+    out << format("ignore_archive={0:} debug={1:}", opts.ignore_archive_errors,
+                  opts.debug);
+
+    return out;
+}
+
 } // namespace epubgrep::options
--- a/src/options.hpp
+++ b/src/options.hpp
@ -21,13 +21,53 @@

 #include <boost/program_options/variables_map.hpp>

+#include <cstddef>
+#include <cstdint>
+#include <ostream>
+#include <string>
+#include <vector>
+
 namespace epubgrep::options
 {

 namespace po = boost::program_options;

+enum class regex_kind
+{
+    basic,
+    extended,
+    perl
+};
+
+struct options
+{
+    bool help{false};
+    bool version{false};
+    regex_kind regex{regex_kind::basic};
+    bool grep{false};
+    bool ignore_case{false};
+    std::vector<std::string> regexp;
+    bool raw{false};
+    std::uint64_t context{0};
+    bool nocolor{false};
+    bool no_fn_fs{false};
+    bool no_fn_epub{false};
+    bool recursive{false};
+    bool dereference_recursive{false};
+    std::vector<std::string> input_file;
+    bool ignore_archive_errors{false};
+    bool debug{false};
+    bool json{false};
+    bool html{false};
+    bool status{false};
+    uint64_t status_interval{0};
+
+    //! For the debug output.
+    friend std::ostream &operator<<(std::ostream &out, const options &opts);
+};
+
 //! Parse options and return them.
-[[nodiscard]] po::variables_map parse_options(int argc, char *argv[]);
+[[nodiscard]] options parse_options(int argc, char *argv[]);

 /*!
 *  @brief Returns the path of the config file.
@ -38,6 +78,9 @@ namespace po = boost::program_options;
 */
 [[nodiscard]] fs::path get_config_path();

+//! Parse variables map and return nice options struct.
+[[nodiscard]] options parse_again(const po::variables_map &vm);
+
 } // namespace epubgrep::options

 #endif // EPUBGREP_OPTIONS_HPP
--- a/src/output.cpp
+++ b/src/output.cpp
@ -0,0 +1,226 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "output.hpp"
+
+#include "version.hpp"
+
+#include <boost/locale/message.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h> // For compatibility with fmt 4.
+#include <nlohmann/json.hpp>
+#include <termcolor/termcolor.hpp>
+
+#include <cstdint>
+#include <iostream>
+#include <sstream>
+
+namespace epubgrep::output
+{
+
+using boost::locale::translate;
+using fmt::format;
+using std::cout;
+
+void print_matches(const std::vector<search::match> &matches,
+                   const options::options &opts, bool single_file)
+{
+    if (!single_file && !opts.no_fn_fs)
+    {
+        if (!opts.nocolor)
+        {
+            cout << termcolor::yellow;
+        }
+        cout << format(translate("  In {0:s}: \n").str().c_str(),
+                       fs::relative(matches[0].filepath_epub).c_str());
+        if (!opts.nocolor)
+        {
+            cout << termcolor::reset;
+        }
+    }
+
+    for (const auto &match : matches)
+    {
+        std::vector<std::string> metadata;
+        if (!opts.no_fn_epub)
+        {
+            metadata.emplace_back(match.filepath_inside);
+        }
+        if (!match.headline.empty())
+        {
+            // <https://github.com/ikalnytskyi/termcolor/issues/45>
+            if (!opts.nocolor && termcolor::_internal::is_colorized(cout))
+            {
+                std::stringstream ss;
+                ss << termcolor::colorize << termcolor::underline
+                   << match.headline << termcolor::reset << termcolor::italic;
+                metadata.emplace_back(ss.str());
+            }
+            else
+            {
+                metadata.emplace_back(match.headline);
+            }
+        }
+        if (!match.page.empty())
+        {
+            metadata.emplace_back("page " + match.page);
+        }
+        if (!metadata.empty())
+        {
+            if (!opts.nocolor)
+            {
+                cout << termcolor::italic;
+            }
+            for (const auto &part : metadata)
+            {
+                cout << part;
+                if (part != *(metadata.rbegin()))
+                {
+                    cout << ", ";
+                }
+            }
+            cout << ": ";
+            if (!opts.nocolor)
+            {
+                cout << termcolor::reset;
+            }
+        }
+        cout << match.context.first;
+        if (!opts.nocolor)
+        {
+            cout << termcolor::bright_magenta;
+        }
+        cout << match.text;
+        if (!opts.nocolor)
+        {
+            cout << termcolor::reset;
+        }
+        cout << match.context.second << '\n';
+    }
+}
+
+void json_all(const std::vector<std::vector<search::match>> &matches_all)
+{
+    nlohmann::json json;
+
+    json["generator"] = {{"epubgrep", std::string(version)}};
+
+    for (const auto &matches : matches_all)
+    {
+        for (const auto &match : matches)
+        {
+            json["matches"].push_back(
+                {{"filepath_epub", match.filepath_epub.string()},
+                 {"filepath_inside", match.filepath_inside},
+                 {"match", match.text},
+                 {"context", {match.context.first, match.context.second}},
+                 {"headline", match.headline},
+                 {"page", match.page}});
+        }
+    }
+
+    cout << json.dump() << '\n';
+}
+
+void html_all(const std::vector<std::vector<search::match>> &matches_all,
+              const options::options &opts)
+{
+    std::uint64_t count{1};
+
+    cout << "<!DOCTYPE html>\n";
+    // Translators: Replace “en” with your language code here.
+    cout << format(R"(<html lang="{0:s}">)", translate("en").str());
+    cout << "<head><title>epubgrep output</title>"
+            "<style>article { margin: 1em; }</style>"
+            "</head><body>\n\n";
+
+    for (const auto &matches : matches_all)
+    {
+        const auto identifier{
+            [&opts, count, &matches]
+            {
+                if (opts.no_fn_fs)
+                {
+                    return format(translate("File {0:d}").str(), count);
+                }
+                return fs::relative(matches[0].filepath_epub).string();
+            }()};
+
+        // Start article, table and print table header.
+        cout << format(R"(<article aria-labelledby="file_{0:d}">)", count)
+             << "\n  <table>\n"
+             << format(R"(    <caption id="file_{0:d}">{1:s}</caption>)", count,
+                       identifier)
+             << '\n'
+             << "    <tr>\n";
+        if (!opts.no_fn_epub)
+        {
+            cout << format(R"(      <th id="file_path_{0:d}">{1:s}</th>)",
+                           count,
+                           translate("File path (in EPUB file)").str().c_str())
+                 << '\n';
+        }
+        cout << format(R"(      <th id="headline_{0:d}">{1:s}</th>)", count,
+                       translate("Last headline").str().c_str())
+             << '\n'
+             << format(R"(      <th id="page_{0:d}">{1:s}</th>)", count,
+                       translate("Page number").str().c_str())
+             << '\n'
+             << format(R"(      <th id="match_{0:d}">{1:s}</th>)", count,
+                       translate("Match").str().c_str())
+             << "\n    </tr>\n";
+
+        for (const auto &match : matches)
+        {
+            const auto lang{[&match]
+                            {
+                                if (!match.language.empty())
+                                {
+                                    return format(R"( lang="{0:s}")",
+                                                  match.language);
+                                }
+                                return std::string{};
+                            }()};
+            cout << "    <tr>\n";
+            if (!opts.no_fn_epub)
+            {
+                cout << format(
+                    R"(      <td headers="file_path_{0:d}">{1:s}</td>)", count,
+                    match.filepath_inside)
+                     << '\n';
+            }
+            cout << format(
+                R"(      <td headers="headline_{0:d}"{1:s}>{2:s}</td>)", count,
+                lang, match.headline)
+                 << '\n';
+            cout << format(R"(      <td headers="page_{0:d}">{1:s}</td>)",
+                           count, match.page)
+                 << '\n';
+            cout << format(R"(      <td headers="match_{0:d}"{1:s}>{2:s})"
+                           R"(<strong>{3:s}</strong>{4:s}</td>)",
+                           count, lang, match.context.first, match.text,
+                           match.context.second)
+                 << '\n';
+            cout << "    </tr>\n";
+        }
+        cout << "  </table>\n</article>\n\n";
+        ++count;
+    }
+
+    cout << "</body></html>\n";
+}
+
+} // namespace epubgrep::output
--- a/src/output.hpp
+++ b/src/output.hpp
@ -0,0 +1,41 @@
+/*  This file is part of epubgrep.
+ *  Copyright © 2021 tastytea <tastytea@tastytea.de>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Affero General Public License as published by
+ *  the Free Software Foundation, version 3.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Affero General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Affero General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EPUBGREP_OUTPUT_HPP
+#define EPUBGREP_OUTPUT_HPP
+
+#include "options.hpp"
+#include "search.hpp"
+
+#include <vector>
+
+namespace epubgrep::output
+{
+
+// Print the matches of an EPUB.
+void print_matches(const std::vector<search::match> &matches,
+                   const options::options &opts, bool single_file);
+
+//! Print all matches as JSON.
+void json_all(const std::vector<std::vector<search::match>> &matches_all);
+
+//! Print all matches as HTML.
+void html_all(const std::vector<std::vector<search::match>> &matches_all,
+              const options::options &opts);
+
+} // namespace epubgrep::output
+
+#endif // EPUBGREP_OUTPUT_HPP
--- a/src/search.cpp
+++ b/src/search.cpp
@ -16,12 +16,20 @@

 #include "search.hpp"

+#include "book.hpp"
 #include "fs-compat.hpp"
+#include "helpers.hpp"
+#include "log.hpp"
 #include "zip.hpp"

 #include <boost/regex.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h> // For compatibility with fmt 4.

 #include <algorithm>
+#include <array>
+#include <iterator>
+#include <memory>
 #include <string>
 #include <string_view>
 #include <vector>
@ -29,24 +37,30 @@
 namespace epubgrep::search
 {

-std::vector<match> search(const fs::path &filepath, std::string_view regex,
-                          const options &opts)
+using fmt::format;
+using std::string;
+
+std::vector<match> search(const fs::path &filepath,
+                          const std::string_view regex, const settings &opts)
 {
+    LOG(log::sev::info)
+        << format(R"(Starting search in {0:s} using regex "{1:s}")",
+                  filepath.c_str(), regex);
    boost::regex::flag_type flags{};

    switch (opts.regex)
    {
-    case regex_kind::basic:
+    case options::regex_kind::basic:
    {
        flags = opts.grep_like ? boost::regex::grep : boost::regex::basic;
        break;
    }
-    case regex_kind::extended:
+    case options::regex_kind::extended:
    {
        flags = opts.grep_like ? boost::regex::egrep : boost::regex::extended;
        break;
    }
-    case regex_kind::perl:
+    case options::regex_kind::perl:
    {
        flags = boost::regex::perl;
        break;
@ -58,136 +72,80 @@ std::vector<match> search(const fs::path &filepath, std::string_view regex,
        flags |= boost::regex::icase;
    }

-    boost::regex re(regex.data(), flags);
+    const boost::regex re(regex.data(), flags);
    std::vector<match> matches;
-    for (const auto &entry : zip::list(filepath))
+    auto book{book::read(filepath, opts.raw)};
+    for (const auto &file : book.files)
    {
-        auto document{zip::read_file(filepath, entry)};
-        if (!opts.raw)
-        {
-            cleanup_text(document);
-        }
-
-        std::string::const_iterator begin{document.begin()};
-        std::string::const_iterator end{document.end()};
-        boost::match_results<std::string::const_iterator> match_result;
-        std::string last_headline;
-        std::string last_page;
+        const auto &doc{file.second};
+        string::const_iterator begin{doc.text->begin()};
+        string::const_iterator end{doc.text->end()};
+        auto begin_text{begin};
+        boost::match_results<string::const_iterator> match_result;

        while (boost::regex_search(begin, end, match_result, re,
                                   boost::match_default))
        {
            match match; // FIXME: Rename variable or struct.
-            match.filepath = entry;
+            match.filepath_epub = filepath;
+            match.filepath_inside = file.first;
            match.text = match_result[0];
            match.context = context(match_result, opts.context);
-            const auto current_headline{headline(match_result.prefix().str())};
-            if (!current_headline.empty())
-            {
-                last_headline = current_headline;
-            }
-            match.headline = last_headline;
-            const auto current_page{page(match_result.prefix().str())};
-            if (!current_page.empty())
-            {
-                last_page = current_page;
-            }
-            match.page = last_page;
+            const auto pos = static_cast<size_t>(
+                std::distance(begin_text, match_result[0].begin()));
+            match.headline = headline(doc, pos);
+            match.page = page(doc, pos);
+            match.language = doc.language; // FIXME: Get language of match.

            matches.emplace_back(match);
-            begin = match_result[0].second;
+            begin = match_result[0].end();
        }
    }

    return matches;
 }

-void cleanup_text(std::string &text)
-{
-    size_t pos{};
-    while ((pos = text.find('<', pos)) != std::string::npos)
-    {
-        // Mark headlines. We need them later on.
-        std::string replacement;
-        if (boost::regex_match(text.substr(pos, 3), boost::regex{"<[hH][1-6]"}))
-        {
-            replacement = "<H>";
-        }
-        else if (boost::regex_match(text.substr(pos, 3),
-                                    boost::regex{"</[hH]"}))
-        {
-            replacement = "</H>";
-        }
-        else if (text.substr(pos, 5) == "<span")
-        {
-            auto endpos{text.find('>')};
-            boost::match_results<const char *> match;
-            const boost::regex re_pagebreak{".+pagebreak.+(title|aria-label)"
-                                            "=\"([[:alnum:]]+)\".*"};
-            if (boost::regex_search(text.substr(pos, endpos).data(), match,
-                                    re_pagebreak))
-            {
-                replacement = "<PAGE " + match[2] + ">";
-            }
-        }
-        text.replace(pos, text.find('>', pos) + 1 - pos, replacement);
-        pos += replacement.length();
-    }
-
-    pos = 0;
-    while ((pos = text.find('\r', pos)) != std::string::npos)
-    {
-        text.erase(pos, 1);
-    }
-
-    pos = 0;
-    while ((pos = text.find('\n', pos)) != std::string::npos)
-    {
-        text.replace(pos, 1, " ");
-    }
-
-    pos = 0;
-    while ((pos = text.find("  ", pos)) != std::string::npos)
-    {
-        text.replace(pos, 2, " ");
-    }
-}
-
-match_context
-context(const boost::match_results<std::string::const_iterator> &match,
-        std::uint64_t words)
+match_context context(const boost::match_results<string::const_iterator> &match,
+                      std::uint64_t words)
 {
    if (words == 0)
    {
        return {};
    }

-    const auto &prefix{match.prefix().str()};
-    const auto &suffix{match.suffix().str()};
-    size_t pos_before{prefix.length()};
-    size_t pos_after{};
-
    ++words;

+    const auto &rbegin_before{std::reverse_iterator(match.prefix().end())};
+    const auto &rend_before{std::reverse_iterator(match.prefix().begin())};
+
+    const auto &begin_after{match.suffix().begin()};
+    const auto &end_after{match.suffix().end()};
+
+    auto pos_before{rbegin_before};
+    auto pos_after{begin_after};
+
+    const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
+
    while (words != 0)
    {
-        if (pos_before != 0)
+        if (pos_before != rend_before)
        {
-            pos_before = prefix.rfind(' ', pos_before);
-            if (pos_before != std::string::npos)
+            pos_before = std::find_first_of(pos_before, rend_before,
+                                            whitespace.begin(),
+                                            whitespace.end());
+            while (pos_before != rend_before
+                   && helpers::is_whitespace(*pos_before))
            {
-                --pos_before;
-            }
-            else
-            {
-                pos_before = 0;
+                ++pos_before;
            }
        }

-        if (pos_after != std::string::npos)
+        if (pos_after != end_after)
        {
-            pos_after = suffix.find(' ', pos_after);
-            if (pos_after != std::string::npos)
+            pos_after = std::find_first_of(pos_after, end_after,
+                                           whitespace.begin(),
+                                           whitespace.end());
+            while (pos_after != end_after && helpers::is_whitespace(*pos_after))
            {
                ++pos_after;
            }
@ -195,40 +153,19 @@ context(const boost::match_results<std::string::const_iterator> &match,
        words -= 1;
    }

-    if (pos_before != 0)
+    const string before_reversed(rbegin_before, pos_before);
+    string before(before_reversed.rbegin(), before_reversed.rend());
+    string after(begin_after, pos_after);
+    while (helpers::is_whitespace(*before.begin()))
    {
-        pos_before += 2;
+        before.erase(0, 1);
    }
-    if (pos_after != std::string::npos)
+    while (helpers::is_whitespace(*after.rbegin()))
    {
-        pos_after -= 1;
+        after.erase(after.size() - 1);
    }

-    return {prefix.substr(pos_before), suffix.substr(0, pos_after)};
-}
-
-std::string headline(const std::string_view prefix)
-{
-    size_t pos{prefix.length()};
-    while ((pos = prefix.rfind("<H>", pos)) != std::string_view::npos)
-    {
-        pos += 3;
-        return std::string{prefix.substr(pos, prefix.find('<', pos) - pos)};
-    }
-
-    return {};
-}
-
-std::string page(const std::string_view prefix)
-{
-    size_t pos{prefix.length()};
-    while ((pos = prefix.rfind("<PAGE ", pos)) != std::string_view::npos)
-    {
-        pos += 6;
-        return std::string{prefix.substr(pos, prefix.find('>', pos) - pos)};
-    }
-
-    return {};
+    return {before, after};
 }

 } // namespace epubgrep::search
--- a/src/search.hpp
+++ b/src/search.hpp
@ -18,10 +18,13 @@
 #define EPUBGREP_SEARCH_HPP

 #include "fs-compat.hpp"
+#include "options.hpp"

 #include <boost/regex.hpp>

+#include <cstddef>
 #include <cstdint>
+#include <map>
 #include <string>
 #include <string_view>
 #include <utility>
@ -34,48 +37,41 @@ using match_context = std::pair<std::string, std::string>;

 struct match
 {
-    std::string text;      //!< Matched string.
-    match_context context; //!< The context around the match.
-    std::string filepath;  //!< The file path of the matched line.
-    std::string headline;  //!< The last headline, if available.
-    std::string page;      //!< The page number, if available.
-};
+    fs::path filepath_epub;      //!< File path of the EPUB.
+    std::string text;            //!< Matched string.
+    match_context context;       //!< The context around the match.
+    std::string filepath_inside; //!< The file path of the matched line.
+    std::string headline;        //!< The last headline, if available.
+    std::string page;            //!< The page number, if available.
+    std::string language;        //!< Match language.
+} __attribute__((aligned(128)));

-enum class regex_kind
+struct settings
 {
-    basic,
-    extended,
-    perl
-};
-
-struct options
-{
-    regex_kind regex{regex_kind::basic};
+    options::regex_kind regex{options::regex_kind::basic};
    bool grep_like{false};
    bool ignore_case{false};
    bool raw{false};
    std::uint64_t context{0};
-};
+} __attribute__((aligned(16)));
+
+struct file_in_epub
+{
+    std::string text;
+    std::map<size_t, std::string> headlines;
+    std::map<size_t, std::string> pages;
+} __attribute__((aligned(128)));

 //! Search file, return matches.
 [[nodiscard]] std::vector<match> search(const fs::path &filepath,
                                        std::string_view regex,
-                                        const options &opts);
-
-//! Strip HTML, remove newlines, condense spaces.
-void cleanup_text(std::string &text);
+                                        const settings &opts);

 //! Return words before and after the match.
 [[nodiscard]] match_context
 context(const boost::match_results<std::string::const_iterator> &match,
        std::uint64_t words);

-//! Return last headline if possible.
-[[nodiscard]] std::string headline(std::string_view prefix);
-
-//! Return current page if possible.
-[[nodiscard]] std::string page(std::string_view prefix);
-
 } // namespace epubgrep::search

 #endif // EPUBGREP_SEARCH_HPP
--- a/src/zip.cpp
+++ b/src/zip.cpp
@ -17,6 +17,8 @@
 #include "zip.hpp"

 #include "fs-compat.hpp"
+#include "helpers.hpp"
+#include "log.hpp"

 #include <archive.h>
 #include <archive_entry.h>
@ -26,7 +28,10 @@

 #include <cstdlib>
 #include <cstring>
+#include <fstream>
+#include <stdexcept>
 #include <string>
+#include <string_view>
 #include <vector>

 namespace epubgrep::zip
@ -43,9 +48,22 @@ std::vector<std::string> list(const fs::path &filepath)
    std::vector<std::string> toc;
    while (archive_read_next_header(zipfile, &entry) == ARCHIVE_OK)
    {
-        toc.emplace_back(archive_entry_pathname_utf8(entry));
+        const auto *in_epub_filepath{archive_entry_pathname_utf8(entry)};
+        if (in_epub_filepath == nullptr)
+        { // If the encoding is broken, we skip the file.
+            LOG(log::sev::warning)
+                << format(translate("File in {0:s} is damaged. "
+                                    "Skipping in-EPUB file.\n")
+                              .str()
+                              .c_str(),
+                          filepath.c_str());
+            continue;
+        }
+        toc.emplace_back(in_epub_filepath);
+        DEBUGLOG << "Found in file: " << in_epub_filepath;
        archive_read_data_skip(zipfile);
    }
+
    close_file(zipfile, filepath);

    return toc;
@ -59,6 +77,16 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)
    while (archive_read_next_header(zipfile, &entry) == ARCHIVE_OK)
    {
        const auto *path{archive_entry_pathname_utf8(entry)};
+        if (path == nullptr)
+        { // If the encoding is broken, we skip the file.
+            LOG(log::sev::warning)
+                << format(translate("File in {0:s} is damaged. "
+                                    "Skipping in-EPUB file.\n")
+                              .str()
+                              .data(),
+                          filepath.c_str());
+            continue;
+        }
        if (std::strcmp(path, entry_path.data()) == 0)
        {
            const auto length{static_cast<size_t>(archive_entry_size(entry))};
@ -69,11 +97,15 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)

            if (result_length != length)
            {
-                throw exception{
-                    format(translate("Could not read {0:s} in {1:s}.").str(),
-                           entry_path, filepath.string())};
+                close_file(zipfile, filepath);
+
+                throw exception{format(
+                    translate("Could not read {0:s} in {1:s}.").str().c_str(),
+                    entry_path, filepath.string())};
            }

+            close_file(zipfile, filepath);
+
            return filecontents;
        }
        archive_read_data_skip(zipfile);
@ -81,12 +113,29 @@ std::string read_file(const fs::path &filepath, std::string_view entry_path)

    close_file(zipfile, filepath);

-    throw exception{format(translate("{0:s} not found in {1:s}.").str(),
+    if (entry_path == "META-INF/container.xml")
+    { // File is probably not an EPUB.
+        exception e{format(translate("{0:s} not found in {1:s}.").str().c_str(),
                           entry_path, filepath.string())};
+        e.code = 1;
+        throw exception{e};
+    }
+
+    LOG(log::sev::warning)
+        << format(translate("{0:s} not found in {1:s}.").str(), entry_path,
+                  filepath.string())
+        << '\n';
+    return {};
 }

 struct archive *open_file(const fs::path &filepath)
 {
+    // Throw exception if we can't open the file.
+    std::ifstream file;
+    file.exceptions(std::ios::failbit);
+    file.open(filepath);
+    file.close();
+
    auto *zipfile{archive_read_new()};
    archive_read_support_filter_all(zipfile);
    archive_read_support_format_zip(zipfile);
@ -94,8 +143,12 @@ struct archive *open_file(const fs::path &filepath)
    auto result{archive_read_open_filename(zipfile, filepath.c_str(), 10240)};
    if (result != ARCHIVE_OK)
    {
-        throw exception{format(translate("Could not open {0:s}.").str(),
-                               filepath.string())};
+        close_file(zipfile, filepath);
+
+        exception e{format(translate("Could not open {0:s}.").str().c_str(),
+                           filepath.string())};
+        e.code = 1;
+        throw exception{e};
    }

    return zipfile;
@ -106,8 +159,9 @@ void close_file(struct archive *zipfile, const fs::path &filepath)
    auto result{archive_read_free(zipfile)};
    if (result != ARCHIVE_OK)
    {
-        throw exception{format(translate("Could not close {0:s}.").str(),
-                               filepath.string())};
+        throw exception{
+            format(translate("Could not close {0:s}.").str().c_str(),
+                   filepath.string())};
    }
 }

--- a/src/zip.hpp
+++ b/src/zip.hpp
@ -21,6 +21,7 @@

 #include <archive.h>

+#include <cstdint>
 #include <stdexcept>
 #include <string>
 #include <string_view>
@ -47,6 +48,8 @@ class exception : public std::runtime_error
 {
 public:
    using std::runtime_error::runtime_error;
+
+    std::uint8_t code{0};
 };

 } // namespace epubgrep::zip
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,15 +1,20 @@
-include(CTest)
-
-file(GLOB sources_tests test_*.cpp)
+file(GLOB sources_tests CONFIGURE_DEPENDS test_*.cpp)
 file(COPY "test.zip" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+file(COPY "test.epub2" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+file(COPY "test.epub3" DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

 find_package(Catch2 CONFIG)

-if(Catch2_FOUND)                # Catch 2.x
+if(Catch2_FOUND)                # Catch 2.x / 3.x
  include(Catch)
  add_executable(all_tests main.cpp ${sources_tests})
-  target_link_libraries(all_tests
-    PRIVATE Catch2::Catch2 ${PROJECT_NAME}_lib)
+  if(TARGET Catch2::Catch2WithMain) # Catch 3.x
+    target_link_libraries(all_tests
+      PRIVATE Catch2::Catch2WithMain ${PROJECT_NAME}_lib)
+  else()                        # Catch 2.x
+    target_link_libraries(all_tests
+      PRIVATE Catch2::Catch2 ${PROJECT_NAME}_lib)
+  endif()
  target_include_directories(all_tests PRIVATE "/usr/include/catch2")
  catch_discover_tests(all_tests EXTRA_ARGS "${EXTRA_TEST_ARGS}")
 else()                          # Catch 1.x
--- a/tests/main.cpp
+++ b/tests/main.cpp
@ -1,3 +1,8 @@
 #define CATCH_CONFIG_MAIN

-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif
--- a/tests/test.epub2
+++ b/tests/test.epub2
--- a/tests/test.epub3
+++ b/tests/test.epub3
--- a/tests/test_helpers.cpp
+++ b/tests/test_helpers.cpp
@ -0,0 +1,116 @@
+#include "fs-compat.hpp"
+#include "helpers.hpp"
+
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif
+
+#include <array>
+#include <exception>
+#include <string>
+
+SCENARIO("Helpers work as intended")
+{
+    bool exception{false};
+    bool result{false};
+
+    SECTION("is_whitespace() does what it should do")
+    {
+        for (const auto c : std::array{' ', '\n', '\r', '\t'})
+        {
+            WHEN(std::string("char is ") + c)
+            {
+                try
+                {
+                    result = epubgrep::helpers::is_whitespace(c);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("Whitespace is detected")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(result);
+                }
+            }
+        }
+    }
+
+    SECTION("urldecode() doesn't fail and returns the decoded string")
+    {
+        GIVEN("The string test%20folder/%2Afile%5Btest%5D%2A")
+        {
+            std::string encoded_text{"test%20folder/%2Afile%5Btest%5D%2A"};
+            std::string decoded_text{};
+
+            try
+            {
+                decoded_text = epubgrep::helpers::urldecode(encoded_text);
+            }
+            catch (const std::exception &)
+            {
+                exception = true;
+            }
+
+            THEN("No exception is thrown")
+            AND_THEN("It returns the decoded text")
+            {
+                REQUIRE_FALSE(exception);
+                REQUIRE(decoded_text == "test folder/*file[test]*");
+            }
+        }
+    }
+
+    SECTION("unescape_html() doesn't fail and returns the decoded text")
+    {
+        GIVEN("A text with a named entity in it.")
+        {
+            std::string encoded_text{"Sleepy &amp; ready for bed"};
+            std::string decoded_text{};
+
+            try
+            {
+                decoded_text = epubgrep::helpers::unescape_html(encoded_text);
+            }
+            catch (const std::exception &)
+            {
+                exception = true;
+            }
+
+            THEN("No exception is thrown")
+            AND_THEN("It returns the unescaped text")
+            {
+                REQUIRE_FALSE(exception);
+                REQUIRE(decoded_text == "Sleepy & ready for bed");
+            }
+        }
+
+        GIVEN("A text with numbered entities in it.")
+        {
+            std::string encoded_text{"Sleepy &#x26; ready for&#32;bed"};
+            std::string decoded_text{};
+
+            try
+            {
+                decoded_text = epubgrep::helpers::unescape_html(encoded_text);
+            }
+            catch (const std::exception &)
+            {
+                exception = true;
+            }
+
+            THEN("No exception is thrown")
+            AND_THEN("It returns the unescaped text")
+            {
+                REQUIRE_FALSE(exception);
+                REQUIRE(decoded_text == "Sleepy & ready for bed");
+            }
+        }
+    }
+}
--- a/tests/test_search_epub.cpp
+++ b/tests/test_search_epub.cpp
@ -0,0 +1,196 @@
+#include "fs-compat.hpp"
+#include "options.hpp"
+#include "search.hpp"
+
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif
+
+#include <clocale>
+#include <exception>
+#include <string>
+#include <vector>
+
+SCENARIO("Searching EPUB files works")
+{
+    GIVEN("Our test EPUB2 file")
+    {
+        fs::path epubfile{"test.epub2"};
+        std::setlocale(LC_CTYPE,
+                       ""); // Needed for utf-8 support in libarchive.
+        bool exception{false};
+
+        REQUIRE(fs::exists(epubfile));
+
+        SECTION("search() doesn't fail and returns the right lines")
+        {
+            std::vector<epubgrep::search::match> matches;
+            epubgrep::search::settings opts;
+
+            WHEN(R"(We search for ‘test-\w+’ using perl regular expressions)")
+            {
+                try
+                {
+                    opts.regex = epubgrep::options::regex_kind::perl;
+                    matches = epubgrep::search::search(epubfile, R"(test-\w+)",
+                                                       opts);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("It returns the match correctly")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
+                    REQUIRE(matches.at(0).text == "test-file");
+                    REQUIRE(matches.at(1).text == "test-suite");
+                    REQUIRE(matches.at(1).headline == "Test for epubgrep");
+                }
+            }
+
+            WHEN("We search for ‘href’ with raw = 1 and context = 1.")
+            {
+                try
+                {
+                    opts.raw = true;
+                    opts.context = 1;
+                    matches = epubgrep::search::search(epubfile, "href", opts);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("It returns the match correctly")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
+                    REQUIRE(matches.at(0).context.first == "<a ");
+                    REQUIRE(matches.at(0).context.second
+                            == R"(="https://schlomp.space/tastytea/)"
+                               R"(epubgrep">epubgrep</a>. Just)");
+                    REQUIRE(matches.at(1).filepath_inside == "metadata.opf");
+                    REQUIRE(matches.at(1).context.first == "<item ");
+                    REQUIRE(matches.at(1).context.second
+                            == R"(="start.xhtml" id="start")");
+                    REQUIRE(matches.at(2).filepath_inside == "metadata.opf");
+                    REQUIRE(matches.at(2).context.first == "<item ");
+                    REQUIRE(matches.at(2).context.second
+                            == R"(="toc.ncx" id="ncx")");
+                }
+            }
+        }
+    }
+
+    // TODO: Figure out how to do this better.
+    GIVEN("Our test EPUB3 file")
+    {
+        fs::path epubfile{"test.epub3"};
+        std::setlocale(LC_CTYPE, ""); // Needed for utf-8 support in libarchive.
+        bool exception{false};
+
+        REQUIRE(fs::exists(epubfile));
+
+        SECTION("search() doesn't fail and returns the right lines")
+        {
+            std::vector<epubgrep::search::match> matches;
+            epubgrep::search::settings opts;
+
+            WHEN(R"(We search for ‘test-\w+’ using perl regular expressions)")
+            {
+                try
+                {
+                    opts.regex = epubgrep::options::regex_kind::perl;
+                    matches = epubgrep::search::search(epubfile, R"(test-\w+)",
+                                                       opts);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("It returns the match correctly")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
+                    REQUIRE(matches.at(0).text == "test-file");
+                    REQUIRE(matches.at(1).text == "test-suite");
+                    REQUIRE(matches.at(1).headline == "Test for epubgrep");
+                }
+            }
+
+            WHEN("We search for ‘href’ with raw = 1 and context = 1.")
+            {
+                try
+                {
+                    opts.raw = true;
+                    opts.context = 1;
+                    matches = epubgrep::search::search(epubfile, "href", opts);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("It returns the match correctly")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
+                    REQUIRE(matches.at(0).context.first == "<a ");
+                    REQUIRE(matches.at(0).context.second
+                            == R"(="https://schlomp.space/tastytea/)"
+                               R"(epubgrep">epubgrep</a>. Just)");
+                    REQUIRE(matches.at(1).filepath_inside == "nav.xhtml");
+                    REQUIRE(matches.at(1).context.first == "<li><a ");
+                    REQUIRE(matches.at(1).context.second
+                            == std::string(R"(="start.xhtml">Start</a></li>)")
+                                   + "\n  </ol>");
+                    REQUIRE(matches.at(2).filepath_inside == "metadata.opf");
+                    REQUIRE(matches.at(2).context.first == "<item ");
+                    REQUIRE(matches.at(2).context.second
+                            == R"(="start.xhtml" id="start")");
+                    REQUIRE(matches.at(3).filepath_inside == "metadata.opf");
+                    REQUIRE(matches.at(3).context.first == R"(id="nav" )");
+                    REQUIRE(matches.at(3).context.second
+                            == R"(="nav.xhtml" )"
+                               R"(media-type="application/xhtml+xml")");
+                }
+            }
+
+            WHEN("We search for for a phrase at the beginning of the file "
+                 "and specify a very high context")
+            {
+                try
+                {
+                    opts.context = 69069;
+                    matches = epubgrep::search::search(epubfile, "Test for",
+                                                       opts);
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("It returns the match correctly")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(matches.at(0).filepath_inside == "start.xhtml");
+                    REQUIRE(matches.at(0).text == "Test for");
+                    REQUIRE(matches.at(0).headline.empty());
+                    REQUIRE(matches.at(0).context.first.empty());
+                    REQUIRE(*matches.at(0).context.second.rbegin() == '.');
+                }
+            }
+        }
+    }
+}
--- a/tests/test_search_helpers.cpp
+++ b/tests/test_search_helpers.cpp
@ -1,7 +1,13 @@
+#include "book.hpp"
 #include "fs-compat.hpp"
 #include "search.hpp"

-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif

 #include <clocale>
 #include <exception>
@ -17,7 +23,7 @@ SCENARIO("Searching helpers work as intended")

        REQUIRE(fs::exists(zipfile));

-        SECTION("cleanup_texts() does what it should do")
+        SECTION("cleanup_text() does what it should do")
        {
            std::string text;

@ -26,7 +32,7 @@ SCENARIO("Searching helpers work as intended")
                text = "Moss";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -46,7 +52,7 @@ SCENARIO("Searching helpers work as intended")
                text = "💖\r\r🦝";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -54,7 +60,7 @@ SCENARIO("Searching helpers work as intended")
                }

                THEN("No exception is thrown")
-                AND_THEN("The \\r are removed unchanged")
+                AND_THEN("The \\r are removed")
                {
                    REQUIRE_FALSE(exception);
                    REQUIRE(text == "💖🦝");
@ -66,7 +72,7 @@ SCENARIO("Searching helpers work as intended")
                text = "Moss\n\n\n\n\n\nis good.";
                try
                {
-                    epubgrep::search::cleanup_text(text);
+                    text = epubgrep::book::process_page(text).text_cleaned;
                }
                catch (const std::exception &)
                {
@ -91,8 +97,8 @@ SCENARIO("Searching helpers work as intended")
                text = "… <h3>Soup</h3> …";
                try
                {
-                    epubgrep::search::cleanup_text(text);
-                    text = epubgrep::search::headline(text);
+                    auto file{epubgrep::book::process_page(text)};
+                    text = epubgrep::book::headline(file, text.size());
                }
                catch (const std::exception &)
                {
@ -106,6 +112,49 @@ SCENARIO("Searching helpers work as intended")
                    REQUIRE(text == "Soup");
                }
            }
+
+            WHEN("There is a <span> in the h2 headline")
+            {
+                text = "… <h2>The <span class=\"long\">long</span> "
+                       "road to nowhere</h2> …";
+                try
+                {
+                    auto file{epubgrep::book::process_page(text)};
+                    text = epubgrep::book::headline(file, text.size());
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("The headline is correctly extracted")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(text == "The long road to nowhere");
+                }
+            }
+
+            WHEN("There are tags that start with h but are not headlines")
+            {
+                text = "<html><hr>The long<section>road to nowhere</section>";
+                try
+                {
+                    auto file{epubgrep::book::process_page(text)};
+                    text = epubgrep::book::headline(file, text.size());
+                }
+                catch (const std::exception &)
+                {
+                    exception = true;
+                }
+
+                THEN("No exception is thrown")
+                AND_THEN("No headline is extracted")
+                {
+                    REQUIRE_FALSE(exception);
+                    REQUIRE(text.empty());
+                }
+            }
        }

        SECTION("page() does what it should do")
@ -117,8 +166,8 @@ SCENARIO("Searching helpers work as intended")
                text = R"(… <span epub:type="pagebreak" … title="69"/> …)";
                try
                {
-                    epubgrep::search::cleanup_text(text);
-                    text = epubgrep::search::page(text);
+                    auto file{epubgrep::book::process_page(text)};
+                    text = epubgrep::book::page(file, text.size());
                }
                catch (const std::exception &)
                {
@ -138,8 +187,8 @@ SCENARIO("Searching helpers work as intended")
                text = R"(… <span role="doc-pagebreak" … aria-label="69"/> …)";
                try
                {
-                    epubgrep::search::cleanup_text(text);
-                    text = epubgrep::search::page(text);
+                    auto file{epubgrep::book::process_page(text)};
+                    text = epubgrep::book::page(file, text.size());
                }
                catch (const std::exception &)
                {
--- a/tests/test_search_zip.cpp
+++ b/tests/test_search_zip.cpp
@ -1,14 +1,21 @@
 #include "fs-compat.hpp"
+#include "options.hpp"
 #include "search.hpp"

-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif

 #include <clocale>
 #include <exception>
+#include <iostream>
 #include <string>
 #include <vector>

-SCENARIO("Searching works")
+SCENARIO("Searching ZIP files works")
 {
    GIVEN("Our test zip file")
    {
@ -21,17 +28,19 @@ SCENARIO("Searching works")
        SECTION("search() doesn't fail and returns the right lines")
        {
            std::vector<epubgrep::search::match> matches;
-            epubgrep::search::options opts;
+            epubgrep::search::settings opts;
+            opts.raw = true;

            WHEN("We search for ‘📙+\\w?’ using extended regular expressions")
            {
                try
                {
-                    opts.regex = epubgrep::search::regex_kind::extended;
+                    opts.regex = epubgrep::options::regex_kind::extended;
                    matches = epubgrep::search::search(zipfile, "📙+\\w?", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
+                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }

@ -39,7 +48,7 @@ SCENARIO("Searching works")
                AND_THEN("It returns the match correctly")
                {
                    REQUIRE_FALSE(exception);
-                    REQUIRE(matches.at(0).filepath == "test folder/😊");
+                    REQUIRE(matches.at(0).filepath_inside == "test folder/😊");
                    REQUIRE(matches.at(0).text == "📙");
                }
            }
@ -51,8 +60,9 @@ SCENARIO("Searching works")
                    opts.context = 1;
                    matches = epubgrep::search::search(zipfile, "📗", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
+                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }

@ -60,41 +70,50 @@ SCENARIO("Searching works")
                AND_THEN("It returns the match correctly")
                {
                    REQUIRE_FALSE(exception);
-                    REQUIRE(matches.at(0).filepath == "test folder/😊");
+                    REQUIRE(matches.at(0).filepath_inside == "test folder/😊");
                    REQUIRE(matches.at(0).text == "📗");
-                    REQUIRE(matches.at(0).context.first == "📖 📘");
-                    REQUIRE(matches.at(0).context.second == "📙 ");
+                    REQUIRE(matches.at(0).context.first == "📖\n\n📘");
+                    REQUIRE(matches.at(0).context.second == "📙");
                }
            }

-            WHEN("We search for ‘ ’ (space) with context = 1.")
+            WHEN("We search for ‘[ \\n]’ with context = 1.")
            {
                try
                {
                    opts.context = 1;
-                    matches = epubgrep::search::search(zipfile, " ", opts);
+                    opts.regex = epubgrep::options::regex_kind::perl;
+                    matches = epubgrep::search::search(zipfile, R"([ \n])",
+                                                       opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
+                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }

                THEN("No exception is thrown")
                AND_THEN("It returns the match correctly")
                {
+                    // I looked at this a week or so after I've written it, and
+                    // I have come to the realization that this is a tiny bit
+                    // more complicated than strictly required. 😄
+                    // TODO: Rewrite test.zip and tests to be better
+                    // understandable.
                    REQUIRE_FALSE(exception);
-                    REQUIRE(matches.at(1).filepath == "test folder/test file");
+                    REQUIRE(matches.at(1).filepath_inside
+                            == "test folder/test file");
                    REQUIRE(matches.at(1).text == " ");
                    REQUIRE(matches.at(1).context.first == "don't");
                    REQUIRE(matches.at(1).context.second == "want to");
-                    REQUIRE(matches.at(10).filepath == "test folder/😊");
-                    REQUIRE(matches.at(10).text == " ");
+                    REQUIRE(matches.at(10).filepath_inside == "test folder/😊");
+                    REQUIRE(matches.at(10).text == "\n");
                    REQUIRE(matches.at(10).context.first == "📖");
-                    REQUIRE(matches.at(10).context.second == "📘📗📙 ");
-                    REQUIRE(matches.at(11).filepath == "test folder/😊");
-                    REQUIRE(matches.at(11).text == " ");
-                    REQUIRE(matches.at(11).context.first == "📘📗📙");
-                    REQUIRE(matches.at(11).context.second == "");
+                    REQUIRE(matches.at(10).context.second == "\n📘📗📙");
+                    REQUIRE(matches.at(12).filepath_inside == "test folder/😊");
+                    REQUIRE(matches.at(12).text == "\n");
+                    REQUIRE(matches.at(12).context.first == "📘📗📙");
+                    REQUIRE(matches.at(12).context.second.empty());
                }
            }

@ -104,12 +123,13 @@ SCENARIO("Searching works")
                try
                {
                    opts.context = 1;
-                    opts.regex = epubgrep::search::regex_kind::extended;
+                    opts.regex = epubgrep::options::regex_kind::perl;
                    matches = epubgrep::search::search(
                        zipfile, R"(work\s[\w]+\.\W[\w']+\Wstay)", opts);
                }
-                catch (const std::exception &)
+                catch (const std::exception &e)
                {
+                    std::cerr << "EXCEPTION: " << e.what() << '\n';
                    exception = true;
                }

@ -117,8 +137,9 @@ SCENARIO("Searching works")
                AND_THEN("It returns the match correctly")
                {
                    REQUIRE_FALSE(exception);
-                    REQUIRE(matches.at(0).filepath == "test folder/test file");
-                    REQUIRE(matches.at(0).text == "work today. I'm stay");
+                    REQUIRE(matches.at(0).filepath_inside
+                            == "test folder/test file");
+                    REQUIRE(matches.at(0).text == "work today.\nI'm stay");
                    REQUIRE(matches.at(0).context.first == "to ");
                    REQUIRE(matches.at(0).context.second == "ing in");
                }
--- a/tests/test_zip.cpp
+++ b/tests/test_zip.cpp
@ -1,7 +1,12 @@
 #include "fs-compat.hpp"
 #include "zip.hpp"

-#include <catch.hpp>
+// catch 3 does not have catch.hpp anymore
+#if __has_include(<catch.hpp>)
+#    include <catch.hpp>
+#else
+#    include <catch_all.hpp>
+#endif

 #include <clocale>
 #include <exception>
--- a/translations/CMakeLists.txt
+++ b/translations/CMakeLists.txt
@ -1,6 +1,7 @@
 set(potfile "${PROJECT_SOURCE_DIR}/translations/${PROJECT_NAME}.pot")
-file(GLOB po_src_files "../src/*pp")
-file(GLOB po_src_files_relative RELATIVE "${PROJECT_SOURCE_DIR}" "../src/*pp")
+file(GLOB po_src_files CONFIGURE_DEPENDS "../src/*pp")
+file(GLOB po_src_files_relative CONFIGURE_DEPENDS
+  RELATIVE "${PROJECT_SOURCE_DIR}" "../src/*pp")

 add_custom_command(OUTPUT ${potfile}
  COMMAND "${XGETTEXT_CMD}"
@ -24,11 +25,11 @@ add_custom_target(${PROJECT_NAME}_pot
 unset(po_src_files)
 unset(po_src_files_relative)

-file(GLOB po_files "*.po")
+file(GLOB po_files CONFIGURE_DEPENDS "*.po")

 # Hack to prevent GETTEXT_CREATE_TRANSLATIONS from updating the .po files.
 set(backup_GETTEXT_MSGMERGE_EXECUTABLE ${GETTEXT_MSGMERGE_EXECUTABLE})
-set(GETTEXT_MSGMERGE_EXECUTABLE "echo")
+set(GETTEXT_MSGMERGE_EXECUTABLE "true")
 # Creates install target automatically.
 GETTEXT_CREATE_TRANSLATIONS(${potfile} ALL ${po_files})
 set(GETTEXT_MSGMERGE_EXECUTABLE ${backup_GETTEXT_MSGMERGE_EXECUTABLE})
--- a/translations/de.po
+++ b/translations/de.po
@ -1,95 +1,175 @@
 msgid ""
 msgstr ""
-"Project-Id-Version: epubgrep 0.2.0\n"
+"Project-Id-Version: epubgrep 0.6.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-05-25 12:32+0200\n"
-"PO-Revision-Date: 2021-05-25 12:32+0200\n"
+"POT-Creation-Date: 2021-08-20 17:06+0200\n"
+"PO-Revision-Date: 2021-08-20 17:07+0200\n"
 "Last-Translator: tastytea <tastytea@tastytea.de>\n"
 "Language-Team: tastytea <https://schlomp.space/tastytea/epubgrep>\n"
 "Language: de\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 2.4.3\n"
+"X-Generator: Poedit 3.0\n"
 "X-Poedit-Basepath: ..\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 "X-Poedit-SourceCharset: UTF-8\n"
 "X-Poedit-KeywordsList: translate\n"
 "X-Poedit-SearchPath-0: .\n"

-#: src/main.cpp:63 src/main.cpp:143
-msgid "ERROR: "
-msgstr "FEHLER: "
+# „Spine“ ist ein Fachbegriff, daher habe ich ihn nicht übersetzt.
+#: src/book.cpp:284
+msgid "{0:s} is damaged. Could not read spine. Skipping file.\n"
+msgstr ""
+"{0:s} ist beschädigt. Konnte „Spine“ nicht lesen. Überspringe Datei.\n"

-#: src/main.cpp:64
-msgid "Error while parsing options."
-msgstr "Fehler während Optionen interpretiert wurden."
+#: src/log.cpp:70
+msgid "WARNING"
+msgstr "WARNUNG"

-#: src/main.cpp:144
-msgid "Error while searching."
-msgstr "Fehler während der Suche."
+#: src/log.cpp:73
+msgid "ERROR"
+msgstr "FEHLER"

-#: src/options.cpp:46
-msgid "Available options"
-msgstr "Verfügbare Optionen"
+#: src/log.cpp:77
+msgid "FATAL ERROR"
+msgstr "SCHWERER FEHLER"

-#: src/options.cpp:50
+#: src/main.cpp:83
+msgid " (while parsing options)"
+msgstr " (während Optionen interpretiert wurden)"
+
+#: src/main.cpp:129
+msgid "Could not open {0:s}: {1:s}"
+msgstr "Konnte {0:s} nicht öffnen: {1:s}"
+
+#: src/main.cpp:179
+msgid " (while opening {0:s})"
+msgstr " (während {0:s} durchsucht wurde)"
+
+#: src/main.cpp:237
+msgid "{0:d} of {1:d} books searched."
+msgstr "{0:d} von {1:d} Büchern durchsucht."
+
+#: src/main.cpp:241
+msgid "All books searched."
+msgstr "Alle Bücher durchsucht."
+
+#: src/options.cpp:53
+msgid "General options"
+msgstr "Allgemeine Optionen"
+
+#: src/options.cpp:56
 msgid "Display this help and exit."
 msgstr "Diese Hilfe ausgeben und beenden."

-#: src/options.cpp:52
+#: src/options.cpp:58
 msgid "Display version information and exit."
 msgstr "Versionsinformationen ausgeben und beenden."

-#: src/options.cpp:54
+#: src/options.cpp:60
+msgid "Enable debug output."
+msgstr "Debug-Ausgabe einschalten."
+
+#: src/options.cpp:63
+msgid "Search options"
+msgstr "Suchoptionen"
+
+#: src/options.cpp:66
 msgid "PATTERN is a basic regular expression (default)."
 msgstr "MUSTER ist eine „basic regular expression“ (standard)."

-#: src/options.cpp:57
+#: src/options.cpp:69
 msgid "PATTERN is an extended regular expression."
 msgstr "MUSTER ist eine „extended regular expression“."

-#: src/options.cpp:59
+#: src/options.cpp:71
 msgid "Use grep-variation of regular expressions with -G and -E."
 msgstr "Benutze grep-Variante von regulären ausdrücken mit -G und -E."

-#: src/options.cpp:62
+#: src/options.cpp:74
 msgid "PATTERN is a Perl regular expression."
 msgstr "MUSTER ist ein regulärer Ausdruck, wie Perl ihn akzeptiert."

-#: src/options.cpp:64
+#: src/options.cpp:77
 msgid "Ignore case distinctions in pattern and data."
 msgstr "Unterschied zwischen Groß- und Kleinschreibung ignorieren."

-#: src/options.cpp:67
-msgid "PATTERN"
-msgstr "MUSTER"
-
-#: src/options.cpp:68
-msgid "Use additional PATTERN for matching."
-msgstr "Benutze zusätzliches MUSTER zum Abgleich."
-
-#: src/options.cpp:70
+#: src/options.cpp:80
 msgid "Do not clean up text before searching."
 msgstr "Nicht den Text vor dem suchen säubern."

-#: src/options.cpp:72
+#: src/options.cpp:82
+msgid "Read all files under each directory, recursively."
+msgstr "Lies rekursiv alle Dateien unter jedem Verzeichnis."
+
+#: src/options.cpp:85
+msgid "Read all files under each directory, recursively, following symlinks."
+msgstr ""
+"Lies rekursiv alle Dateien unter jedem Verzeichnis und folge dabei symlinks."
+
+#: src/options.cpp:88
+msgid "PATTERN"
+msgstr "MUSTER"
+
+#: src/options.cpp:89
+msgid "Use additional PATTERN for matching."
+msgstr "Benutze zusätzliches MUSTER zum Abgleich."
+
+#: src/options.cpp:92
+msgid "Output options"
+msgstr "Ausgabeoptionen"
+
+#: src/options.cpp:95 src/options.cpp:113
 msgid "NUMBER"
 msgstr "ANZAHL"

-#: src/options.cpp:73
+#: src/options.cpp:96
 msgid "Print NUMBER words of context around matches."
 msgstr "ANZAHL Wörter an Kontext um die Treffer herum ausgeben."

-#: src/options.cpp:75
-msgid "Do not color matches."
-msgstr "Färbe die Treffer nicht ein."
+#: src/options.cpp:98
+msgid "Turn off colors and other decorations."
+msgstr "Schalte Farben und andere Dekorationen aus."

-#: src/options.cpp:116
-msgid "Usage: epubgrep [OPTION]… PATTERN [FILE]…\n"
-msgstr "Aufruf: epubgrep [OPTION]… MUSTER [DATEI]…\n"
+# Bezieht sich auf --no-filename.
+#: src/options.cpp:100
+msgid "WHICH"
+msgstr "WELCHE"

-#: src/options.cpp:118
+#: src/options.cpp:101
+msgid ""
+"Suppress the mentioning of file names on output. WHICH is ‘filesystem’, ‘in-"
+"epub’ or ‘all’."
+msgstr ""
+"Unterdrücke die Erwähnung der Dateinamens in der Ausgabe. WELCHE kann "
+"‚filesystem‘, ‚in-epub‘ or ‚all‘ sein."
+
+#: src/options.cpp:104
+msgid "Ignore errors about wrong file formats."
+msgstr "Ignoriere Fehlermeldungen wegen des falschen Dateiformats."
+
+#: src/options.cpp:106
+msgid "Output JSON instead of plain text."
+msgstr "Gib JSON statt Klartext aus."
+
+#: src/options.cpp:108
+msgid "Output HTML instead of plain text."
+msgstr "Output HTML instead of plain text."
+
+#: src/options.cpp:110
+msgid "Output status message every STATUS-INTERVAL seconds."
+msgstr "Gebe alle STATUS-INTERVAL Sekunden eine Statusmeldung aus."
+
+#: src/options.cpp:114
+msgid "Set status message interval to NUMBER seconds."
+msgstr "Setze Intervall für Statusmeldungen auf ANZAHL Sekunden."
+
+#: src/options.cpp:162
+msgid "Usage: epubgrep [OPTION]… PATTERN FILE…\n"
+msgstr "Aufruf: epubgrep [OPTION]… MUSTER DATEI…\n"
+
+#: src/options.cpp:164
 msgid ""
 "\n"
 "You can access the full manual with `man epubgrep`.\n"
@ -97,7 +177,7 @@ msgstr ""
 "\n"
 "Du kannst mit `man epubgrep` auf das vollständige Handbuch zugreifen.\n"

-#: src/options.cpp:125
+#: src/options.cpp:171
 msgid ""
 "Copyright © 2021 tastytea <tastytea@tastytea.de>\n"
 "License AGPL-3.0-only <https://gnu.org/licenses/agpl.html>.\n"
@ -109,18 +189,51 @@ msgstr ""
 "Für dieses Programm besteht KEINERLEI GARANTIE. Dies ist freie Software,\n"
 "die Sie unter bestimmten Bedingungen weitergeben dürfen.\n"

-#: src/zip.cpp:73
+#: src/output.cpp:47
+msgid "  In {0:s}: \n"
+msgstr "  In {0:s}:\n"
+
+# Sprache der Benutzeroberfläche.
+#: src/output.cpp:145
+msgid "en"
+msgstr "de"
+
+#: src/output.cpp:157
+msgid "File {0:d}"
+msgstr "Datei {0:d}"
+
+#: src/output.cpp:172
+msgid "File path (in EPUB file)"
+msgstr "Dateipfad (innerhalb der EPUB Datei)"
+
+#: src/output.cpp:176
+msgid "Last headline"
+msgstr "Letzte Überschrift"
+
+#: src/output.cpp:179
+msgid "Page number"
+msgstr "Seitennummer"
+
+#: src/output.cpp:182
+msgid "Match"
+msgstr "Treffer"
+
+#: src/zip.cpp:55 src/zip.cpp:83
+msgid "File in {0:s} is damaged. Skipping in-EPUB file.\n"
+msgstr "Datei in {0:s} ist beschädigt. Überspringe Datei in der EPUB.\n"
+
+#: src/zip.cpp:103
 msgid "Could not read {0:s} in {1:s}."
 msgstr "Konnte {0:s} in {1:s} nicht lesen."

-#: src/zip.cpp:84
+#: src/zip.cpp:118 src/zip.cpp:125
 msgid "{0:s} not found in {1:s}."
 msgstr "{0:s} nicht gefunden in {1:s}."

-#: src/zip.cpp:97
+#: src/zip.cpp:148
 msgid "Could not open {0:s}."
 msgstr "Konnte {0:s} nicht öffnen."

-#: src/zip.cpp:109
+#: src/zip.cpp:162
 msgid "Could not close {0:s}."
 msgstr "Konnte {0:s} nicht schließen."