epubgrep/src/main.cpp

257 lines
7.7 KiB
C++

/* This file is part of epubgrep.
* Copyright © 2021 tastytea <tastytea@tastytea.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "files.hpp"
#include "fs-compat.hpp"
#include "log.hpp"
#include "options.hpp"
#include "output.hpp"
#include "search.hpp"
#include "version.hpp"
#include "zip.hpp"
#include <boost/locale/generator.hpp>
#include <boost/locale/message.hpp>
#include <fmt/format.h>
#include <fmt/ostream.h> // For compatibility with fmt 4.
#include <clocale>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <fstream>
#include <future>
#include <iostream>
#include <locale>
#include <mutex>
#include <string>
#include <string_view>
#include <system_error>
#include <thread>
#include <vector>
int main(int argc, char *argv[])
{
using namespace epubgrep;
using boost::locale::translate;
using fmt::format;
using std::string;
using std::vector;
// locale_generator("").name.c_str() returns "*" instead of "". That's why
// the global C locale isn't changed. So we have to set it additionally.
std::setlocale(LC_ALL, "");
boost::locale::generator locale_generator;
locale_generator.add_messages_path("translations");
locale_generator.add_messages_path("/usr/share/locale");
locale_generator.add_messages_domain("epubgrep");
std::locale::global(locale_generator(""));
std::cout.imbue(std::locale());
std::cerr.imbue(std::locale());
log::init();
LOG(log::sev::info) << "epubgrep " << version << " started.";
options::options opts;
try
{
opts = options::parse_options(argc, argv);
}
catch (std::exception &e)
{ // Exceptions we can't recover from or ones we don't know.
LOG(log::sev::fatal)
<< e.what() << translate(" (while parsing options)");
return EXIT_FAILURE;
}
if (opts.debug)
{
log::enable_debug();
}
DEBUGLOG << "Options: " << opts;
if (opts.help || opts.version)
{
return EXIT_SUCCESS;
}
int return_code{EXIT_SUCCESS};
vector<fs::path> input_files;
for (const auto &filepath : opts.input_file)
{
if (!opts.recursive && !opts.dereference_recursive)
{
input_files.emplace_back(filepath);
DEBUGLOG << "Added to input_files: " << filepath;
}
else
{
try
{
auto files_in_dir{
files::list_recursive(filepath,
opts.dereference_recursive)};
input_files.insert(input_files.end(), files_in_dir.begin(),
files_in_dir.end());
DEBUGLOG << "Added directory to input_files.";
}
catch (const fs::filesystem_error &e)
{
if (e.code().value() == 20)
{ // Is not a directory.
input_files.emplace_back(filepath);
DEBUGLOG << "Added to input_files: " << filepath;
continue;
}
LOG(log::sev::error)
<< format(translate("Could not open {0:s}: {1:s}").str(),
e.path1(), e.what());
return_code = EXIT_FAILURE;
}
}
}
search::settings search_settings;
search_settings.regex = opts.regex;
search_settings.grep_like = opts.grep;
search_settings.ignore_case = opts.ignore_case;
search_settings.raw = opts.raw;
search_settings.context = opts.context;
vector<vector<search::match>> matches_all;
std::mutex mutex_matches_all;
vector<std::future<int>> futurepool;
auto search_file{
[&opts, &matches_all, &mutex_matches_all,
&search_settings](const fs::path &filepath)
{
for (const auto &regex : opts.regexp)
{
try
{
auto matches{
search::search(filepath, regex, search_settings)};
if (!matches.empty())
{
std::lock_guard<std::mutex> guard(mutex_matches_all);
matches_all.emplace_back(matches);
}
}
catch (const zip::exception &e)
{
if (opts.ignore_archive_errors && e.code == 1)
{ // File is probably not an EPUB.
LOG(log::sev::info) << e.what();
return EXIT_SUCCESS;
}
LOG(log::sev::error) << e.what();
return EXIT_FAILURE;
}
catch (const std::ifstream::failure &e)
{
LOG(log::sev::error)
<< std::strerror(errno)
<< format(translate(" (while opening {0:s})").str(),
filepath);
return EXIT_FAILURE;
}
}
return EXIT_SUCCESS;
}};
auto futures_cleanup{
[&futurepool, &return_code](const bool wait = false)
{
using namespace std::chrono_literals;
for (auto it{futurepool.begin()}; it != futurepool.end();)
{
if (!wait && it->wait_for(100ms) != std::future_status::ready)
{
++it;
continue;
}
if (int ret{}; (ret = it->get()) != EXIT_SUCCESS)
{
if (return_code == EXIT_SUCCESS)
{
return_code = ret;
}
}
futurepool.erase(it);
}
}};
const auto max_threads{
[]
{
auto n{static_cast<double>(std::thread::hardware_concurrency())};
return static_cast<std::uint32_t>(std::ceil(n / 2 + n / 4));
}()};
DEBUGLOG << "max_threads = " << max_threads;
for (const auto &filepath : input_files)
{
while (futurepool.size() >= max_threads)
{
DEBUGLOG << "Attempting to clean up threads";
futures_cleanup();
}
futurepool.emplace_back(
std::async(std::launch::async, search_file, filepath));
DEBUGLOG << "Launched new thread";
if (!matches_all.empty() && !opts.json && !opts.html)
{
output::print_matches(matches_all[0], opts,
input_files.size() == 1);
std::lock_guard<std::mutex> guard(mutex_matches_all);
matches_all.erase(matches_all.begin());
}
}
DEBUGLOG << "Waiting for remaining threads to finish";
futures_cleanup(true);
if (opts.json)
{
output::json_all(matches_all);
}
else if (opts.html)
{
output::html_all(matches_all, opts);
}
else
{
for (const auto &matches : matches_all)
{
output::print_matches(matches, opts, input_files.size() == 1);
}
}
LOG(log::sev::info) << "Exiting program with return code " << return_code;
return return_code;
}