2021-05-20 04:30:31 +02:00
|
|
|
/* This file is part of epubgrep.
|
|
|
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
#include "files.hpp"
|
|
|
|
#include "fs-compat.hpp"
|
2021-05-31 19:10:54 +02:00
|
|
|
#include "log.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
#include "options.hpp"
|
2021-05-28 17:07:11 +02:00
|
|
|
#include "output.hpp"
|
2021-05-24 08:15:04 +02:00
|
|
|
#include "search.hpp"
|
2021-05-31 19:10:54 +02:00
|
|
|
#include "version.hpp"
|
2021-05-27 21:39:01 +02:00
|
|
|
#include "zip.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <boost/locale/generator.hpp>
|
2021-05-21 01:48:55 +02:00
|
|
|
#include <boost/locale/message.hpp>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-06-24 18:06:11 +02:00
|
|
|
#include <chrono>
|
2021-05-23 06:32:56 +02:00
|
|
|
#include <clocale>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <cmath>
|
2021-05-24 08:15:04 +02:00
|
|
|
#include <cstdint>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <cstdlib>
|
2021-05-31 22:19:55 +02:00
|
|
|
#include <cstring>
|
2021-05-20 09:05:52 +02:00
|
|
|
#include <exception>
|
2021-05-29 12:42:29 +02:00
|
|
|
#include <fstream>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <future>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <iostream>
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <locale>
|
2021-05-27 20:40:47 +02:00
|
|
|
#include <mutex>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <string>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <string_view>
|
2021-05-29 12:42:29 +02:00
|
|
|
#include <system_error>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <thread>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <vector>
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-06-24 13:16:18 +02:00
|
|
|
constexpr int EXIT_FATAL{2}; // NOLINT(readability-identifier-naming)
|
2021-06-24 13:13:49 +02:00
|
|
|
|
2021-05-20 04:30:31 +02:00
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2021-05-24 08:15:04 +02:00
|
|
|
using namespace epubgrep;
|
2021-05-20 09:05:52 +02:00
|
|
|
|
2021-05-21 01:48:55 +02:00
|
|
|
using boost::locale::translate;
|
2021-05-26 17:23:53 +02:00
|
|
|
using fmt::format;
|
|
|
|
using std::string;
|
|
|
|
using std::vector;
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-23 06:32:56 +02:00
|
|
|
// locale_generator("").name.c_str() returns "*" instead of "". That's why
|
|
|
|
// the global C locale isn't changed. So we have to set it additionally.
|
|
|
|
std::setlocale(LC_ALL, "");
|
2021-05-20 07:07:47 +02:00
|
|
|
boost::locale::generator locale_generator;
|
|
|
|
locale_generator.add_messages_path("translations");
|
|
|
|
locale_generator.add_messages_path("/usr/share/locale");
|
|
|
|
locale_generator.add_messages_domain("epubgrep");
|
|
|
|
std::locale::global(locale_generator(""));
|
2021-05-31 19:10:54 +02:00
|
|
|
std::cout.imbue(std::locale());
|
|
|
|
std::cerr.imbue(std::locale());
|
|
|
|
|
|
|
|
log::init();
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::info) << "epubgrep " << version << " started.";
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
options::options opts;
|
2021-05-20 09:05:52 +02:00
|
|
|
try
|
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
opts = options::parse_options(argc, argv);
|
2021-05-20 09:05:52 +02:00
|
|
|
}
|
|
|
|
catch (std::exception &e)
|
|
|
|
{ // Exceptions we can't recover from or ones we don't know.
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::fatal)
|
2021-05-31 19:10:54 +02:00
|
|
|
<< e.what() << translate(" (while parsing options)");
|
2021-06-24 13:13:49 +02:00
|
|
|
return EXIT_FATAL;
|
2021-05-20 09:05:52 +02:00
|
|
|
}
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-06-01 13:41:54 +02:00
|
|
|
if (opts.debug)
|
|
|
|
{
|
|
|
|
log::enable_debug();
|
|
|
|
}
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Options: " << opts;
|
2021-06-01 13:41:54 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
if (opts.help || opts.version)
|
2021-05-20 04:30:31 +02:00
|
|
|
{
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
int return_code{EXIT_SUCCESS};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
vector<fs::path> input_files;
|
2021-05-27 17:20:00 +02:00
|
|
|
for (const auto &filepath : opts.input_file)
|
2021-05-27 10:14:56 +02:00
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
if (!opts.recursive && !opts.dereference_recursive)
|
2021-05-27 14:44:56 +02:00
|
|
|
{
|
|
|
|
input_files.emplace_back(filepath);
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Added to input_files: " << filepath;
|
2021-05-27 14:44:56 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
auto files_in_dir{
|
2021-05-27 17:20:00 +02:00
|
|
|
files::list_recursive(filepath,
|
|
|
|
opts.dereference_recursive)};
|
2021-05-27 14:44:56 +02:00
|
|
|
input_files.insert(input_files.end(), files_in_dir.begin(),
|
|
|
|
files_in_dir.end());
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Added directory to input_files.";
|
2021-05-27 14:44:56 +02:00
|
|
|
}
|
|
|
|
catch (const fs::filesystem_error &e)
|
|
|
|
{
|
|
|
|
if (e.code().value() == 20)
|
|
|
|
{ // Is not a directory.
|
|
|
|
input_files.emplace_back(filepath);
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Added to input_files: " << filepath;
|
2021-05-27 14:44:56 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::error)
|
2021-05-31 19:10:54 +02:00
|
|
|
<< format(translate("Could not open {0:s}: {1:s}").str(),
|
|
|
|
e.path1(), e.what());
|
2021-05-27 14:44:56 +02:00
|
|
|
return_code = EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
search::settings search_settings;
|
|
|
|
search_settings.regex = opts.regex;
|
|
|
|
search_settings.grep_like = opts.grep;
|
|
|
|
search_settings.ignore_case = opts.ignore_case;
|
|
|
|
search_settings.raw = opts.raw;
|
|
|
|
search_settings.context = opts.context;
|
2021-05-25 10:02:34 +02:00
|
|
|
|
2021-05-26 17:23:53 +02:00
|
|
|
vector<vector<search::match>> matches_all;
|
2021-05-27 20:40:47 +02:00
|
|
|
std::mutex mutex_matches_all;
|
2021-05-26 17:23:53 +02:00
|
|
|
vector<std::future<int>> futurepool;
|
2021-06-24 18:06:11 +02:00
|
|
|
std::atomic<size_t> books_searched{0};
|
2021-05-26 17:23:53 +02:00
|
|
|
|
|
|
|
auto search_file{
|
2021-05-31 22:43:30 +02:00
|
|
|
[&opts, &matches_all, &mutex_matches_all,
|
|
|
|
&search_settings](const fs::path &filepath)
|
2021-05-23 16:23:07 +02:00
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
for (const auto ®ex : opts.regexp)
|
2021-05-23 16:52:32 +02:00
|
|
|
{
|
2021-05-24 19:10:00 +02:00
|
|
|
try
|
2021-05-24 08:15:04 +02:00
|
|
|
{
|
2021-05-27 20:40:47 +02:00
|
|
|
auto matches{
|
|
|
|
search::search(filepath, regex, search_settings)};
|
2021-06-01 20:15:05 +02:00
|
|
|
if (!matches.empty())
|
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> guard(mutex_matches_all);
|
|
|
|
matches_all.emplace_back(matches);
|
|
|
|
}
|
2021-05-24 19:10:00 +02:00
|
|
|
}
|
2021-05-27 21:39:01 +02:00
|
|
|
catch (const zip::exception &e)
|
2021-05-26 20:20:21 +02:00
|
|
|
{
|
2021-05-27 21:48:35 +02:00
|
|
|
if (opts.ignore_archive_errors && e.code == 1)
|
2021-05-29 12:42:29 +02:00
|
|
|
{ // File is probably not an EPUB.
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::info) << e.what();
|
2021-05-27 21:48:35 +02:00
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::error) << e.what();
|
2021-05-24 19:10:00 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-24 08:15:04 +02:00
|
|
|
}
|
2021-05-29 12:42:29 +02:00
|
|
|
catch (const std::ifstream::failure &e)
|
|
|
|
{
|
2021-05-31 22:43:30 +02:00
|
|
|
LOG(log::sev::error)
|
2021-05-31 22:19:55 +02:00
|
|
|
<< std::strerror(errno)
|
2021-05-31 19:10:54 +02:00
|
|
|
<< format(translate(" (while opening {0:s})").str(),
|
|
|
|
filepath);
|
2021-05-31 22:19:55 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-29 12:42:29 +02:00
|
|
|
}
|
2021-06-24 13:13:49 +02:00
|
|
|
catch (const boost::regex_error &e)
|
|
|
|
{
|
|
|
|
LOG(log::sev::fatal) << e.what();
|
|
|
|
return EXIT_FATAL;
|
|
|
|
}
|
2021-05-23 16:52:32 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}};
|
|
|
|
|
|
|
|
auto futures_cleanup{
|
2021-06-24 18:06:11 +02:00
|
|
|
[&futurepool, &return_code, &books_searched](const bool wait = false)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
|
|
|
|
for (auto it{futurepool.begin()}; it != futurepool.end();)
|
|
|
|
{
|
|
|
|
if (!wait && it->wait_for(100ms) != std::future_status::ready)
|
|
|
|
{
|
|
|
|
++it;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int ret{}; (ret = it->get()) != EXIT_SUCCESS)
|
|
|
|
{
|
2021-06-01 17:06:25 +02:00
|
|
|
if (return_code == EXIT_SUCCESS)
|
|
|
|
{
|
|
|
|
return_code = ret;
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
|
|
|
futurepool.erase(it);
|
2021-06-24 18:06:11 +02:00
|
|
|
++books_searched;
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
|
|
|
}};
|
|
|
|
|
|
|
|
const auto max_threads{
|
|
|
|
[]
|
|
|
|
{
|
|
|
|
auto n{static_cast<double>(std::thread::hardware_concurrency())};
|
|
|
|
return static_cast<std::uint32_t>(std::ceil(n / 2 + n / 4));
|
|
|
|
}()};
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "max_threads = " << max_threads;
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-06-24 18:06:11 +02:00
|
|
|
const auto print_status{
|
|
|
|
[&opts, &books_searched, &input_files](std::future<bool> cancel)
|
|
|
|
{
|
|
|
|
if (!opts.status)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
while (cancel.wait_for(std::chrono::seconds(opts.status_interval))
|
|
|
|
!= std::future_status::ready)
|
|
|
|
{
|
|
|
|
std::cerr
|
|
|
|
<< format(translate("{0:d} of {1:d} books searched.").str(),
|
|
|
|
books_searched, input_files.size())
|
|
|
|
<< '\n';
|
|
|
|
}
|
|
|
|
std::cerr << translate("All books searched.") << '\n';
|
|
|
|
}};
|
|
|
|
std::promise<bool> promise_status;
|
|
|
|
std::thread thread_status{print_status, promise_status.get_future()};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
for (const auto &filepath : input_files)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-05-28 11:48:38 +02:00
|
|
|
while (futurepool.size() >= max_threads)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Attempting to clean up threads";
|
2021-05-26 17:23:53 +02:00
|
|
|
futures_cleanup();
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-06-24 13:13:49 +02:00
|
|
|
if (return_code == EXIT_FATAL)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
futurepool.emplace_back(
|
|
|
|
std::async(std::launch::async, search_file, filepath));
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Launched new thread";
|
2021-05-28 17:18:34 +02:00
|
|
|
|
2021-06-08 16:55:35 +02:00
|
|
|
if (!matches_all.empty() && !opts.json && !opts.html)
|
2021-05-28 17:18:34 +02:00
|
|
|
{
|
|
|
|
output::print_matches(matches_all[0], opts,
|
|
|
|
input_files.size() == 1);
|
|
|
|
std::lock_guard<std::mutex> guard(mutex_matches_all);
|
|
|
|
matches_all.erase(matches_all.begin());
|
|
|
|
}
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-06-01 15:32:10 +02:00
|
|
|
DEBUGLOG << "Waiting for remaining threads to finish";
|
2021-05-26 17:23:53 +02:00
|
|
|
futures_cleanup(true);
|
2021-06-24 18:06:11 +02:00
|
|
|
promise_status.set_value(true);
|
|
|
|
thread_status.join();
|
2021-06-24 13:13:49 +02:00
|
|
|
if (return_code == EXIT_FATAL)
|
|
|
|
{
|
|
|
|
return EXIT_FATAL;
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-06-01 20:14:36 +02:00
|
|
|
if (opts.json)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-06-01 20:14:36 +02:00
|
|
|
output::json_all(matches_all);
|
|
|
|
}
|
2021-06-08 16:55:35 +02:00
|
|
|
else if (opts.html)
|
|
|
|
{
|
|
|
|
output::html_all(matches_all, opts);
|
|
|
|
}
|
2021-06-01 20:14:36 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
for (const auto &matches : matches_all)
|
2021-06-01 19:17:44 +02:00
|
|
|
{
|
|
|
|
output::print_matches(matches, opts, input_files.size() == 1);
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
|
|
|
|
2021-06-01 18:22:15 +02:00
|
|
|
LOG(log::sev::info) << "Exiting program with return code " << return_code;
|
|
|
|
|
2021-05-26 17:23:53 +02:00
|
|
|
return return_code;
|
2021-05-20 04:30:31 +02:00
|
|
|
}
|