2021-05-20 04:30:31 +02:00
|
|
|
/* This file is part of epubgrep.
|
|
|
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
#include "files.hpp"
|
|
|
|
#include "fs-compat.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
#include "options.hpp"
|
2021-05-28 17:07:11 +02:00
|
|
|
#include "output.hpp"
|
2021-05-24 08:15:04 +02:00
|
|
|
#include "search.hpp"
|
2021-05-27 21:39:01 +02:00
|
|
|
#include "zip.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <boost/locale/generator.hpp>
|
2021-05-21 01:48:55 +02:00
|
|
|
#include <boost/locale/message.hpp>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
2021-05-25 11:00:05 +02:00
|
|
|
#include <termcolor/termcolor.hpp>
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-05-23 06:32:56 +02:00
|
|
|
#include <clocale>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <cmath>
|
2021-05-24 08:15:04 +02:00
|
|
|
#include <cstdint>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <cstdlib>
|
2021-05-20 09:05:52 +02:00
|
|
|
#include <exception>
|
2021-05-29 12:42:29 +02:00
|
|
|
#include <fstream>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <future>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <iostream>
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <locale>
|
2021-05-27 20:40:47 +02:00
|
|
|
#include <mutex>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <string>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <string_view>
|
2021-05-29 12:42:29 +02:00
|
|
|
#include <system_error>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <thread>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <vector>
|
2021-05-20 04:30:31 +02:00
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2021-05-24 08:15:04 +02:00
|
|
|
using namespace epubgrep;
|
2021-05-20 09:05:52 +02:00
|
|
|
|
2021-05-21 01:48:55 +02:00
|
|
|
using boost::locale::translate;
|
2021-05-26 17:23:53 +02:00
|
|
|
using fmt::format;
|
2021-05-20 09:05:52 +02:00
|
|
|
using std::cerr;
|
2021-05-20 04:30:31 +02:00
|
|
|
using std::cout;
|
2021-05-26 17:23:53 +02:00
|
|
|
using std::string;
|
|
|
|
using std::vector;
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-23 06:32:56 +02:00
|
|
|
// locale_generator("").name.c_str() returns "*" instead of "". That's why
|
|
|
|
// the global C locale isn't changed. So we have to set it additionally.
|
|
|
|
std::setlocale(LC_ALL, "");
|
2021-05-20 07:07:47 +02:00
|
|
|
boost::locale::generator locale_generator;
|
|
|
|
locale_generator.add_messages_path("translations");
|
|
|
|
locale_generator.add_messages_path("/usr/share/locale");
|
|
|
|
locale_generator.add_messages_domain("epubgrep");
|
|
|
|
std::locale::global(locale_generator(""));
|
|
|
|
cout.imbue(std::locale());
|
2021-05-21 04:10:11 +02:00
|
|
|
cerr.imbue(std::locale());
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
options::options opts;
|
2021-05-20 09:05:52 +02:00
|
|
|
try
|
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
opts = options::parse_options(argc, argv);
|
2021-05-20 09:05:52 +02:00
|
|
|
}
|
|
|
|
catch (std::exception &e)
|
|
|
|
{ // Exceptions we can't recover from or ones we don't know.
|
2021-05-29 12:42:29 +02:00
|
|
|
cerr << translate("ERROR: ") << e.what();
|
|
|
|
cerr << translate(" (while parsing options)") << '\n';
|
2021-05-20 09:05:52 +02:00
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
if (opts.help || opts.version)
|
2021-05-20 04:30:31 +02:00
|
|
|
{
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
int return_code{EXIT_SUCCESS};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
vector<fs::path> input_files;
|
2021-05-27 17:20:00 +02:00
|
|
|
if (opts.input_file.empty())
|
2021-05-23 16:23:07 +02:00
|
|
|
{
|
2021-05-24 08:15:04 +02:00
|
|
|
cout << "NO INPUT FILE\n";
|
|
|
|
// TODO: Read data from stdin.
|
2021-05-26 17:23:53 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-24 08:15:04 +02:00
|
|
|
}
|
2021-05-27 17:20:00 +02:00
|
|
|
for (const auto &filepath : opts.input_file)
|
2021-05-27 10:14:56 +02:00
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
if (!opts.recursive && !opts.dereference_recursive)
|
2021-05-27 14:44:56 +02:00
|
|
|
{
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
input_files.emplace_back(filepath);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
auto files_in_dir{
|
2021-05-27 17:20:00 +02:00
|
|
|
files::list_recursive(filepath,
|
|
|
|
opts.dereference_recursive)};
|
2021-05-27 14:44:56 +02:00
|
|
|
input_files.insert(input_files.end(), files_in_dir.begin(),
|
|
|
|
files_in_dir.end());
|
|
|
|
}
|
|
|
|
catch (const fs::filesystem_error &e)
|
|
|
|
{
|
|
|
|
if (e.code().value() == 20)
|
|
|
|
{ // Is not a directory.
|
|
|
|
input_files.emplace_back(filepath);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-05-27 21:03:42 +02:00
|
|
|
cerr << translate("ERROR: ")
|
2021-05-27 19:07:21 +02:00
|
|
|
<< format(translate("Could not open {0:s}: {1:s}").str(),
|
2021-05-27 14:44:56 +02:00
|
|
|
e.path1(), e.what())
|
|
|
|
<< '\n';
|
|
|
|
return_code = EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-05-27 17:20:00 +02:00
|
|
|
search::settings search_settings;
|
|
|
|
search_settings.regex = opts.regex;
|
|
|
|
search_settings.grep_like = opts.grep;
|
|
|
|
search_settings.ignore_case = opts.ignore_case;
|
|
|
|
search_settings.raw = opts.raw;
|
|
|
|
search_settings.context = opts.context;
|
2021-05-25 10:02:34 +02:00
|
|
|
|
2021-05-26 17:23:53 +02:00
|
|
|
vector<vector<search::match>> matches_all;
|
2021-05-27 20:40:47 +02:00
|
|
|
std::mutex mutex_matches_all;
|
2021-05-26 17:23:53 +02:00
|
|
|
vector<std::future<int>> futurepool;
|
|
|
|
|
|
|
|
auto search_file{
|
2021-05-27 20:40:47 +02:00
|
|
|
[&opts, &matches_all, &mutex_matches_all,
|
|
|
|
&search_settings](const fs::path &filepath)
|
2021-05-23 16:23:07 +02:00
|
|
|
{
|
2021-05-27 17:20:00 +02:00
|
|
|
for (const auto ®ex : opts.regexp)
|
2021-05-23 16:52:32 +02:00
|
|
|
{
|
2021-05-24 19:10:00 +02:00
|
|
|
try
|
2021-05-24 08:15:04 +02:00
|
|
|
{
|
2021-05-27 20:40:47 +02:00
|
|
|
auto matches{
|
|
|
|
search::search(filepath, regex, search_settings)};
|
|
|
|
std::lock_guard<std::mutex> guard(mutex_matches_all);
|
|
|
|
matches_all.emplace_back(matches);
|
2021-05-24 19:10:00 +02:00
|
|
|
}
|
2021-05-27 21:39:01 +02:00
|
|
|
catch (const zip::exception &e)
|
2021-05-26 20:20:21 +02:00
|
|
|
{
|
2021-05-27 21:48:35 +02:00
|
|
|
if (opts.ignore_archive_errors && e.code == 1)
|
2021-05-29 12:42:29 +02:00
|
|
|
{ // File is probably not an EPUB.
|
2021-05-27 21:48:35 +02:00
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-05-29 17:37:41 +02:00
|
|
|
cerr << translate("ERROR: ") << e.what() << '\n';
|
2021-05-24 19:10:00 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-24 08:15:04 +02:00
|
|
|
}
|
2021-05-29 12:42:29 +02:00
|
|
|
catch (const std::ifstream::failure &e)
|
|
|
|
{
|
|
|
|
cerr << translate("ERROR: ");
|
|
|
|
if (e.code() == std::errc::permission_denied)
|
|
|
|
{
|
|
|
|
cerr << translate("Permission denied.");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{ // std::ifstream seems to always return a generic error?
|
|
|
|
cerr << translate("Probably permission denied.") << " ("
|
|
|
|
<< e.what() << ')';
|
|
|
|
}
|
|
|
|
|
|
|
|
cerr << format(translate(" (while opening {0:s})").str(),
|
|
|
|
filepath)
|
|
|
|
<< '\n';
|
|
|
|
}
|
2021-05-23 16:52:32 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}};
|
|
|
|
|
|
|
|
auto futures_cleanup{
|
|
|
|
[&futurepool, &return_code](const bool wait = false)
|
|
|
|
{
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
|
|
|
|
for (auto it{futurepool.begin()}; it != futurepool.end();)
|
|
|
|
{
|
|
|
|
if (!wait && it->wait_for(100ms) != std::future_status::ready)
|
|
|
|
{
|
|
|
|
++it;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int ret{}; (ret = it->get()) != EXIT_SUCCESS)
|
|
|
|
{
|
|
|
|
return_code = ret;
|
|
|
|
}
|
|
|
|
futurepool.erase(it);
|
|
|
|
}
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}};
|
|
|
|
|
|
|
|
const auto max_threads{
|
|
|
|
[]
|
|
|
|
{
|
|
|
|
auto n{static_cast<double>(std::thread::hardware_concurrency())};
|
|
|
|
return static_cast<std::uint32_t>(std::ceil(n / 2 + n / 4));
|
|
|
|
}()};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
for (const auto &filepath : input_files)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-05-28 11:48:38 +02:00
|
|
|
while (futurepool.size() >= max_threads)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
|
|
|
futures_cleanup();
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
futurepool.emplace_back(
|
|
|
|
std::async(std::launch::async, search_file, filepath));
|
2021-05-28 17:18:34 +02:00
|
|
|
|
|
|
|
if (!matches_all.empty())
|
|
|
|
{
|
|
|
|
output::print_matches(matches_all[0], opts,
|
|
|
|
input_files.size() == 1);
|
|
|
|
std::lock_guard<std::mutex> guard(mutex_matches_all);
|
|
|
|
matches_all.erase(matches_all.begin());
|
|
|
|
}
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
futures_cleanup(true);
|
|
|
|
|
2021-05-28 17:07:11 +02:00
|
|
|
for (const auto &matches : matches_all)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-05-28 17:07:11 +02:00
|
|
|
output::print_matches(matches, opts, input_files.size() == 1);
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return return_code;
|
2021-05-20 04:30:31 +02:00
|
|
|
}
|