2021-05-20 04:30:31 +02:00
|
|
|
/* This file is part of epubgrep.
|
|
|
|
* Copyright © 2021 tastytea <tastytea@tastytea.de>
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
#include "files.hpp"
|
|
|
|
#include "fs-compat.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
#include "options.hpp"
|
2021-05-24 08:15:04 +02:00
|
|
|
#include "search.hpp"
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <boost/locale/generator.hpp>
|
2021-05-21 01:48:55 +02:00
|
|
|
#include <boost/locale/message.hpp>
|
2021-05-20 09:05:52 +02:00
|
|
|
#include <boost/program_options/errors.hpp>
|
|
|
|
#include <boost/program_options/variables_map.hpp>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <fmt/ostream.h> // For compatibility with fmt 4.
|
2021-05-25 11:00:05 +02:00
|
|
|
#include <termcolor/termcolor.hpp>
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-05-23 06:32:56 +02:00
|
|
|
#include <clocale>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <cmath>
|
2021-05-24 08:15:04 +02:00
|
|
|
#include <cstdint>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <cstdlib>
|
2021-05-20 09:05:52 +02:00
|
|
|
#include <exception>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <future>
|
2021-05-20 04:30:31 +02:00
|
|
|
#include <iostream>
|
2021-05-20 07:07:47 +02:00
|
|
|
#include <locale>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <string>
|
2021-05-26 17:23:53 +02:00
|
|
|
#include <string_view>
|
|
|
|
#include <thread>
|
2021-05-20 11:25:56 +02:00
|
|
|
#include <vector>
|
2021-05-20 04:30:31 +02:00
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2021-05-20 09:05:52 +02:00
|
|
|
namespace po = boost::program_options;
|
2021-05-24 08:15:04 +02:00
|
|
|
using namespace epubgrep;
|
2021-05-20 09:05:52 +02:00
|
|
|
|
2021-05-21 01:48:55 +02:00
|
|
|
using boost::locale::translate;
|
2021-05-26 17:23:53 +02:00
|
|
|
using fmt::format;
|
2021-05-20 09:05:52 +02:00
|
|
|
using std::cerr;
|
2021-05-20 04:30:31 +02:00
|
|
|
using std::cout;
|
2021-05-26 17:23:53 +02:00
|
|
|
using std::string;
|
|
|
|
using std::vector;
|
2021-05-20 04:30:31 +02:00
|
|
|
|
2021-05-23 06:32:56 +02:00
|
|
|
// locale_generator("").name.c_str() returns "*" instead of "". That's why
|
|
|
|
// the global C locale isn't changed. So we have to set it additionally.
|
|
|
|
std::setlocale(LC_ALL, "");
|
2021-05-20 07:07:47 +02:00
|
|
|
boost::locale::generator locale_generator;
|
|
|
|
locale_generator.add_messages_path("translations");
|
|
|
|
locale_generator.add_messages_path("/usr/share/locale");
|
|
|
|
locale_generator.add_messages_domain("epubgrep");
|
|
|
|
std::locale::global(locale_generator(""));
|
|
|
|
cout.imbue(std::locale());
|
2021-05-21 04:10:11 +02:00
|
|
|
cerr.imbue(std::locale());
|
2021-05-20 07:07:47 +02:00
|
|
|
|
2021-05-20 09:05:52 +02:00
|
|
|
po::variables_map vm;
|
|
|
|
try
|
|
|
|
{
|
2021-05-24 08:15:04 +02:00
|
|
|
vm = options::parse_options(argc, argv);
|
2021-05-20 09:05:52 +02:00
|
|
|
}
|
|
|
|
catch (std::exception &e)
|
|
|
|
{ // Exceptions we can't recover from or ones we don't know.
|
2021-05-23 08:55:15 +02:00
|
|
|
cerr << '\n' << translate("ERROR: ") << e.what() << '\n';
|
2021-05-21 01:48:55 +02:00
|
|
|
cerr << translate("Error while parsing options.") << '\n';
|
2021-05-20 09:05:52 +02:00
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2021-05-20 04:30:31 +02:00
|
|
|
|
|
|
|
if (vm.count("help") + vm.count("version") > 0)
|
|
|
|
{
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
int return_code{EXIT_SUCCESS};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
vector<fs::path> input_files;
|
2021-05-24 08:15:04 +02:00
|
|
|
if (vm.count("input-file") == 0)
|
2021-05-23 16:23:07 +02:00
|
|
|
{
|
2021-05-24 08:15:04 +02:00
|
|
|
cout << "NO INPUT FILE\n";
|
|
|
|
// TODO: Read data from stdin.
|
2021-05-26 17:23:53 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-24 08:15:04 +02:00
|
|
|
}
|
2021-05-27 10:14:56 +02:00
|
|
|
for (const auto &filepath : vm["input-file"].as<vector<string>>())
|
|
|
|
{
|
2021-05-27 14:44:56 +02:00
|
|
|
if (vm.count("recursive") + vm.count("dereference-recursive") == 0)
|
|
|
|
{
|
2021-05-26 17:23:53 +02:00
|
|
|
|
2021-05-27 14:44:56 +02:00
|
|
|
input_files.emplace_back(filepath);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool follow_symlinks{false};
|
|
|
|
if (vm.count("dereference-recursive") > 0)
|
|
|
|
{
|
|
|
|
follow_symlinks = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
auto files_in_dir{
|
|
|
|
files::list_recursive(filepath, follow_symlinks)};
|
|
|
|
input_files.insert(input_files.end(), files_in_dir.begin(),
|
|
|
|
files_in_dir.end());
|
|
|
|
}
|
|
|
|
catch (const fs::filesystem_error &e)
|
|
|
|
{
|
|
|
|
if (e.code().value() == 20)
|
|
|
|
{ // Is not a directory.
|
|
|
|
input_files.emplace_back(filepath);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
cerr << '\n'
|
|
|
|
<< format(translate("ERROR: Could not open {0:s}: {1:s}")
|
|
|
|
.str()
|
|
|
|
.data(),
|
|
|
|
e.path1(), e.what())
|
|
|
|
<< '\n';
|
|
|
|
return_code = EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
|
|
|
search::options opts;
|
|
|
|
if (vm.count("basic-regexp") > 0)
|
2021-05-24 08:15:04 +02:00
|
|
|
{
|
2021-05-26 17:23:53 +02:00
|
|
|
opts.regex = search::regex_kind::basic;
|
|
|
|
}
|
|
|
|
if (vm.count("extended-regexp") > 0)
|
|
|
|
{
|
|
|
|
opts.regex = search::regex_kind::extended;
|
|
|
|
}
|
|
|
|
if (vm.count("perl-regexp") > 0)
|
|
|
|
{
|
|
|
|
opts.regex = search::regex_kind::perl;
|
|
|
|
}
|
|
|
|
if (vm.count("grep") > 0)
|
|
|
|
{
|
|
|
|
opts.grep_like = true;
|
|
|
|
}
|
|
|
|
if (vm.count("ignore-case") > 0)
|
|
|
|
{
|
|
|
|
opts.ignore_case = true;
|
|
|
|
}
|
|
|
|
if (vm.count("raw") > 0)
|
|
|
|
{
|
|
|
|
opts.raw = true;
|
|
|
|
}
|
|
|
|
opts.context = vm["context"].as<std::uint64_t>();
|
2021-05-25 10:02:34 +02:00
|
|
|
|
2021-05-26 17:23:53 +02:00
|
|
|
vector<vector<search::match>> matches_all;
|
|
|
|
vector<std::future<int>> futurepool;
|
|
|
|
|
|
|
|
auto search_file{
|
2021-05-27 10:14:56 +02:00
|
|
|
[&vm, &matches_all, &opts](fs::path filepath)
|
2021-05-23 16:23:07 +02:00
|
|
|
{
|
2021-05-26 17:23:53 +02:00
|
|
|
for (const auto ®ex : vm["regexp"].as<vector<string>>())
|
2021-05-23 16:52:32 +02:00
|
|
|
{
|
2021-05-24 19:10:00 +02:00
|
|
|
try
|
2021-05-24 08:15:04 +02:00
|
|
|
{
|
2021-05-26 17:23:53 +02:00
|
|
|
matches_all.emplace_back(
|
|
|
|
search::search(filepath, regex, opts));
|
2021-05-24 19:10:00 +02:00
|
|
|
}
|
|
|
|
catch (const std::exception &e)
|
2021-05-26 20:20:21 +02:00
|
|
|
{
|
2021-05-24 19:10:00 +02:00
|
|
|
cerr << '\n' << translate("ERROR: ") << e.what() << '\n';
|
2021-05-26 17:23:53 +02:00
|
|
|
cerr << format(translate("Error while searching {0:s}.")
|
|
|
|
.str()
|
|
|
|
.data(),
|
|
|
|
filepath)
|
|
|
|
<< '\n';
|
2021-05-24 19:10:00 +02:00
|
|
|
return EXIT_FAILURE;
|
2021-05-24 08:15:04 +02:00
|
|
|
}
|
2021-05-23 16:52:32 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}};
|
|
|
|
|
|
|
|
auto futures_cleanup{
|
|
|
|
[&futurepool, &return_code](const bool wait = false)
|
|
|
|
{
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
|
|
|
|
for (auto it{futurepool.begin()}; it != futurepool.end();)
|
|
|
|
{
|
|
|
|
if (!wait && it->wait_for(100ms) != std::future_status::ready)
|
|
|
|
{
|
|
|
|
++it;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int ret{}; (ret = it->get()) != EXIT_SUCCESS)
|
|
|
|
{
|
|
|
|
return_code = ret;
|
|
|
|
}
|
|
|
|
futurepool.erase(it);
|
|
|
|
}
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}};
|
|
|
|
|
|
|
|
const auto max_threads{
|
|
|
|
[]
|
|
|
|
{
|
|
|
|
auto n{static_cast<double>(std::thread::hardware_concurrency())};
|
|
|
|
return static_cast<std::uint32_t>(std::ceil(n / 2 + n / 4));
|
|
|
|
}()};
|
|
|
|
|
2021-05-27 10:14:56 +02:00
|
|
|
for (const auto &filepath : input_files)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
|
|
|
if (futurepool.size() >= max_threads)
|
|
|
|
{
|
|
|
|
futures_cleanup();
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
futurepool.emplace_back(
|
|
|
|
std::async(std::launch::async, search_file, filepath));
|
2021-05-23 16:23:07 +02:00
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
futures_cleanup(true);
|
|
|
|
|
|
|
|
for (const auto &matches_file : matches_all)
|
|
|
|
{
|
2021-05-27 14:46:23 +02:00
|
|
|
fs::path last_epub;
|
2021-05-26 17:23:53 +02:00
|
|
|
for (const auto &match : matches_file)
|
|
|
|
{
|
2021-05-27 14:46:23 +02:00
|
|
|
if (input_files.size() <= 1 || vm.count("no-filename") == 0)
|
2021-05-26 17:23:53 +02:00
|
|
|
{
|
2021-05-27 14:46:23 +02:00
|
|
|
if (match.epub_filepath != last_epub)
|
|
|
|
{
|
|
|
|
if (vm.count("nocolor") == 0)
|
|
|
|
{
|
|
|
|
cout << termcolor::yellow;
|
|
|
|
}
|
|
|
|
|
|
|
|
cout << format(translate(" In {0:s}: \n").str().data(),
|
|
|
|
fs::relative(match.epub_filepath));
|
|
|
|
last_epub = match.epub_filepath;
|
|
|
|
|
|
|
|
if (vm.count("nocolor") == 0)
|
|
|
|
{
|
|
|
|
cout << termcolor::reset;
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
2021-05-27 14:46:23 +02:00
|
|
|
cout << match.filepath;
|
2021-05-26 17:23:53 +02:00
|
|
|
if (!match.headline.empty())
|
|
|
|
{
|
2021-05-27 14:46:23 +02:00
|
|
|
cout << ", " << match.headline;
|
2021-05-26 17:23:53 +02:00
|
|
|
}
|
|
|
|
if (!match.page.empty())
|
|
|
|
{
|
|
|
|
cout << ", page " << match.page;
|
|
|
|
}
|
|
|
|
cout << ": " << match.context.first;
|
|
|
|
if (vm.count("nocolor") == 0)
|
|
|
|
{
|
|
|
|
cout << termcolor::bright_magenta << match.text
|
|
|
|
<< termcolor::reset;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cout << match.text;
|
|
|
|
}
|
|
|
|
cout << match.context.second << '\n';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return return_code;
|
2021-05-20 04:30:31 +02:00
|
|
|
}
|