diff --git a/.drone.yml b/.drone.yml index 69301bc..a5a623c 100644 --- a/.drone.yml +++ b/.drone.yml @@ -17,7 +17,7 @@ steps: image: plugins/download settings: source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp - destination: src/popl.hpp + destination: src/cli/popl.hpp - name: gcc6 image: debian:stretch-slim @@ -186,7 +186,7 @@ steps: image: plugins/download settings: source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp - destination: src/popl.hpp + destination: src/cli/popl.hpp - name: deb image: debian:stretch-slim diff --git a/.gitignore b/.gitignore index 676ecee..dc92476 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /build/ +/doc/ +/update_doc.sh *.xpi diff --git a/CMakeLists.txt b/CMakeLists.txt index 203171d..aceea60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,15 +9,6 @@ set(WITH_TESTS "NO" CACHE STRING "WITH_TESTS defaults to \"NO\"") set(WITH_MOZILLA "NO" CACHE STRING "WITH_MOZILLA defaults to \"NO\"") include(GNUInstallDirs) -find_package(PkgConfig REQUIRED) -pkg_check_modules(LIBXDG_BASEDIR REQUIRED libxdg-basedir) -# sqlite3 is not a direct dependency, but vsqlite++ has no cmake- or pkg-config -# module. Since it installs in the same directories as sqlite3, I am adding the -# module here to add the include- and link directories below. It is not REQUIRED -# because the sqlite3 in Debian jessie doesn't come with a pkg-config module. -pkg_check_modules(SQLITE3 sqlite3) -pkg_check_modules(CURLPP REQUIRED curlpp) -find_package(ICU COMPONENTS uc i18n REQUIRED) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -27,33 +18,17 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -ftrapv \ -fsanitize=undefined -g -Og -fno-omit-frame-pointer") -include_directories(${PROJECT_SOURCE_DIR}/src) +include_directories(${PROJECT_SOURCE_DIR}/src/lib) Include_directories(${PROJECT_BINARY_DIR}) -include_directories(${LIBXDG_BASEDIR_INCLUDE_DIRS}) -include_directories(${SQLITE3_INCLUDE_DIRS}) -include_directories(${CURLPP_INCLUDE_DIRS}) -include_directories(${ICU_INCLUDE_DIRS}) - -link_directories(${LIBXDG_BASEDIR_LIBRARY_DIRS}) -link_directories(${SQLITE3_LIBRARY_DIRS}) -link_directories(${CURLPP_LIBRARY_DIRS}) -link_directories(${ICU_LIBRARY_DIRS}) - -set(COMMON_LIBRARIES - ${LIBXDG_BASEDIR_LIBRARIES} vsqlitepp stdc++fs ${CURLPP_LIBRARIES} - ${ICU_LIBRARIES}) - # Write version in header configure_file( "${PROJECT_SOURCE_DIR}/src/version.hpp.in" "${PROJECT_BINARY_DIR}/version.hpp" ) -file(GLOB sources src/*.cpp) -add_executable(${PROJECT_NAME} "${sources}") -target_link_libraries(${PROJECT_NAME} ${COMMON_LIBRARIES}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_subdirectory(src/lib) +add_subdirectory(src/cli) if (WITH_MAN) add_custom_command( @@ -63,6 +38,7 @@ if (WITH_MAN) COMMAND ${CMAKE_SOURCE_DIR}/build_manpage.sh ARGS ${PROJECT_VERSION}) add_custom_target(man ALL DEPENDS "${PROJECT_BINARY_DIR}/${PROJECT_NAME}.1") + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) @@ -73,8 +49,8 @@ if (WITH_MOZILLA) endif() if(WITH_TESTS) - add_library(${PROJECT_NAME}_testlib SHARED ${sources}) - target_link_libraries(${PROJECT_NAME}_testlib ${COMMON_LIBRARIES}) + include_directories(${PROJECT_SOURCE_DIR}/src/cli) + add_library(${PROJECT_NAME}_testlib SHARED src/cli/parse_options.cpp) add_subdirectory(tests) endif() diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..d4ad01f --- /dev/null +++ b/Doxyfile @@ -0,0 +1,27 @@ +# -*- mode: conf-unix -*- +PROJECT_NAME = "remwharead" +PROJECT_NUMBER = 0.0.0 +INPUT = src/lib/ src/lib/export/ +# EXAMPLE_PATH = examples/ +EXAMPLE_RECURSIVE = YES +GENERATE_HTML = YES +HTML_OUTPUT = doc/html +GENERATE_LATEX = NO +ALLOW_UNICODE_NAMES = YES +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ALWAYS_DETAILED_SEC = YES +INLINE_INHERITED_MEMB = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +MARKDOWN_SUPPORT = YES +AUTOLINK_SUPPORT = YES +INLINE_SIMPLE_STRUCTS = NO +QUIET = NO +WARNINGS = YES +BUILTIN_STL_SUPPORT = YES +VERBATIM_HEADERS = YES +INLINE_SOURCES = YES +SEARCHENGINE = YES +SHOW_FILES = YES diff --git a/README.adoc b/README.adoc index 5161d10..e228d3a 100644 --- a/README.adoc +++ b/README.adoc @@ -1,4 +1,5 @@ = remwharead +:toc: preamble *remwharead* saves URIs of things you want to remember in a database along with an URI to the archived version, the current date and time, title, description, @@ -18,7 +19,13 @@ image::https://doc.schlomp.space/.remwharead/example_tags.png[Tags view, width=4 == Usage -See https://schlomp.space/tastytea/remwharead/src/branch/main/remwharead.1.adoc[manpage]. +See +https://schlomp.space/tastytea/remwharead/src/branch/main/remwharead.1.adoc[manpage]. + +=== In your programs + +The complete functionality is implemented in a C++ library, libremwharead. Take +a look at the https://doc.schlomp.space/remwharead/[reference] for more info. == Install @@ -73,7 +80,7 @@ apt-get install g++-6 cmake pkg-config libcurl4-openssl-dev libxdg-basedir-dev \ libvsqlitepp-dev libboost-system-dev libboost-filesystem-dev libicu-dev asciidoc apt-get install -t sid libcurlpp-dev # Inside the source directory: -wget -O src/popl.hpp https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp +wget -O src/cli/popl.hpp https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp export CXX="g++-6" ---- ==== diff --git a/build_doc.sh b/build_doc.sh new file mode 100755 index 0000000..fb51945 --- /dev/null +++ b/build_doc.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +if [[ -f Doxyfile ]]; then + mkdir -p doc + (doxygen -s -g - && cat Doxyfile && echo -n "PROJECT_NUMBER = " && + grep -Eo '[0-9]+.[0-9]+.[0-9]+$' CMakeLists.txt) | doxygen - +fi diff --git a/src/adoc.cpp b/src/adoc.cpp deleted file mode 100644 index dc79fbb..0000000 --- a/src/adoc.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include -#include "version.hpp" -#include "time.hpp" -#include "adoc.hpp" - -using std::string; -using std::cerr; -using std::endl; -using std::regex; -using std::regex_replace; -using tagpair = std::pair>; - -void Export::AsciiDoc::print() const -{ - try - { - _out << "= Visited things\n" - << ":Author: remwharead " << global::version << endl - << ":Date: " - << timepoint_to_string(system_clock::now()) << endl - << ":TOC: right\n" - << ":TOCLevels: 2\n" - << ":!webfonts:\n\n"; - - tagmap alltags; - string day; - for (const Database::entry &entry : _entries) - { - const string newday = get_day(entry); - - if (newday != day) - { - day = newday; - _out << "== " << day << endl << endl; - } - - _out << "[[dt_" << timepoint_to_string(entry.datetime) << "]]\n"; - _out << "* link:" << replace_in_uri(entry.uri); - if (!entry.title.empty()) - { - _out << '[' << replace_in_title(entry.title) << ']'; - } - else - { - _out << "[]"; - } - _out << " +" << endl; - - _out << '_' << get_time(entry).substr(0, 5) << '_'; - if (!entry.archive_uri.empty()) - { - _out << " (link:" << replace_in_uri(entry.archive_uri) - << "[archived version])"; - } - - bool separator = false; - for (const string &tag : entry.tags) - { - if (tag.empty()) - { - continue; - } - if (!separator) - { - _out << "\n| "; - separator = true; - } - - auto globaltag = alltags.find(tag); - if (globaltag != alltags.end()) - { - globaltag->second.push_back(entry); - } - else - { - alltags.insert({ tag, { entry } }); - } - - _out << "xref:t_" << replace_in_tag(tag) << "[" << tag << ']'; - if (tag != *(entry.tags.rbegin())) - { - _out << ", "; - } - } - - if (!entry.description.empty()) - { - _out << " +" << endl << entry.description; - } - _out << endl << endl; - } - - if (!alltags.empty()) - { - print_tags(alltags); - } - } - catch (std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } -} - -const string Export::AsciiDoc::replace(string text, - const replacemap &replacements) const -{ - for (const std::pair &sr : replacements) - { - size_t pos = 0; - while ((pos = text.find(sr.first, pos)) != std::string::npos) - { - text.replace(pos, sr.first.length(), sr.second); - pos += sr.second.length(); - } - } - return text; -} -const string Export::AsciiDoc::replace_in_tag(const string &text) const -{ - // TODO: Find a better solution. - const replacemap replacements = - { - { " ", "-" }, { "§", "-" }, - { "$", "-" }, { "%", "-" }, - { "&", "-" }, { "/", "-" }, - { "=", "-" }, { "^", "-" }, - { "!", "-" }, { "?", "-" }, - { "'", "-" }, { "\"", "-" }, - { "´", "-" }, { "`", "-" }, - { "’", "-" }, { "#", "-" }, - { "₀", "0" }, { "⁰", "0" }, - { "₁", "1" }, { "¹", "1" }, - { "₂", "2" }, { "²", "2" }, - { "₃", "3" }, { "³", "3" }, - { "₄", "4" }, { "⁴", "4" }, - { "₅", "5" }, { "⁵", "5" }, - { "₆", "6" }, { "⁶", "6" }, - { "₇", "7" }, { "⁷", "7" }, - { "₈", "8" }, { "⁸", "8" }, - { "₉", "9" }, { "⁹", "9" } - }; - - return replace(text, replacements); -} - -const string Export::AsciiDoc::replace_in_title(const string &text) const -{ - // [ is implicitly escaped if the corresponding ] is. - return replace(text, {{ "]", "\\]" }}); -} - -const string Export::AsciiDoc::replace_in_uri(const string &text) const -{ - return replace(text, - { - { "[", "%5B" }, { "]", "%5D" } - }); -} - -void Export::AsciiDoc::print_tags(const tagmap &tags) const -{ - _out << "== Tags\n\n"; - vector sortedtags(tags.size()); - std::move(tags.begin(), tags.end(), sortedtags.begin()); - std::sort(sortedtags.begin(), sortedtags.end(), - [](const tagpair &a, tagpair &b) - { - if (a.second.size() != b.second.size()) - { // Sort by number of occurrences if they are different. - return a.second.size() > b.second.size(); - } - else - { // Sort by tag names otherwise. - std::locale loc; - const std::collate &coll = - std::use_facet>(loc); - return (coll.compare( - a.first.data(), a.first.data() - + a.first.length(), - b.first.data(), b.first.data() - + b.first.length()) == -1); - } - }); - - bool othertags = false; // Have we printed “Less used tags” already? - for (const auto &tag : sortedtags) - { - // If we have more than 20 tags, group all tags that occur only 1 time - // under the section “Less used tags”. - if (sortedtags.size() > 20 && tag.second.size() == 1) - { - if (!othertags) - { - _out << "=== Less used tags\n\n"; - othertags = true; - } - _out << "="; - } - - _out << "=== [[t_" << replace_in_tag(tag.first) << "]]" - << tag.first << endl; - for (const Database::entry &entry : tag.second) - { - const string datetime = timepoint_to_string(entry.datetime); - const string date = datetime.substr(0, datetime.find('T')); - string title = replace_in_title(entry.title); - if (title.empty()) - { - title = "++" + entry.uri + "++"; - } - _out << endl << "* xref:dt_" << datetime - << '[' << title << "] _(" << date << ")_" << endl; - } - _out << endl; - } - _out << endl; -} - -const string Export::AsciiDoc::get_day(const Database::entry &entry) const -{ - const string datetime = timepoint_to_string(entry.datetime); - return datetime.substr(0, datetime.find('T')); -} - -const string Export::AsciiDoc::get_time(const Database::entry &entry) const -{ - const string datetime = timepoint_to_string(entry.datetime); - return datetime.substr(datetime.find('T') + 1); -} diff --git a/src/bookmarks.cpp b/src/bookmarks.cpp deleted file mode 100644 index 15885a5..0000000 --- a/src/bookmarks.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include "sqlite.hpp" -#include "bookmarks.hpp" - -using std::chrono::system_clock; -using std::chrono::duration_cast; -using std::chrono::seconds; -using std::string; - -void Export::Bookmarks::print() const -{ - _out << "\n" - "\n" - "Bookmarks from remwharead\n" - "

Bookmarks from remwharead

\n\n" - "

\n" - "

remwharead

\n" - "

\n"; - - for (const Database::entry & entry : _entries) - { - string title = entry.title; - if (title.empty()) - { - title = entry.uri; - } - system_clock::time_point tp = entry.datetime; - system_clock::duration duration = tp.time_since_epoch(); - string time_seconds = - std::to_string(duration_cast(duration).count()); - - _out << "

" - << title << "\n"; - } - _out << "

\n" - << "

\n"; -} diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt new file mode 100644 index 0000000..78d78c6 --- /dev/null +++ b/src/cli/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(${PROJECT_SOURCE_DIR}/src/cli) + +file(GLOB sources_cli *.cpp) + +add_executable(${PROJECT_NAME}-cli ${sources_cli}) +target_link_libraries(${PROJECT_NAME}-cli ${PROJECT_NAME}) +set_target_properties(${PROJECT_NAME}-cli PROPERTIES OUTPUT_NAME ${PROJECT_NAME}) + +install(TARGETS ${PROJECT_NAME}-cli DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/main.cpp b/src/cli/main.cpp similarity index 96% rename from src/main.cpp rename to src/cli/main.cpp index 0ac6782..d69b398 100644 --- a/src/main.cpp +++ b/src/cli/main.cpp @@ -24,12 +24,13 @@ #include "parse_options.hpp" #include "uri.hpp" #include "types.hpp" -#include "csv.hpp" -#include "adoc.hpp" -#include "bookmarks.hpp" -#include "simple.hpp" +#include "export/csv.hpp" +#include "export/adoc.hpp" +#include "export/bookmarks.hpp" +#include "export/simple.hpp" #include "search.hpp" +using namespace remwharead; using std::cout; using std::cerr; using std::endl; diff --git a/src/parse_options.cpp b/src/cli/parse_options.cpp similarity index 100% rename from src/parse_options.cpp rename to src/cli/parse_options.cpp diff --git a/src/parse_options.hpp b/src/cli/parse_options.hpp similarity index 98% rename from src/parse_options.hpp rename to src/cli/parse_options.hpp index 2438d3e..d69e6f7 100644 --- a/src/parse_options.hpp +++ b/src/cli/parse_options.hpp @@ -25,6 +25,7 @@ #include "types.hpp" #include "time.hpp" +using namespace remwharead; using std::string; using std::vector; using std::array; diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt new file mode 100644 index 0000000..38f8eaa --- /dev/null +++ b/src/lib/CMakeLists.txt @@ -0,0 +1,37 @@ +find_package(PkgConfig REQUIRED) +pkg_check_modules(LIBXDG_BASEDIR REQUIRED libxdg-basedir) +# sqlite3 is not a direct dependency, but vsqlite++ has no cmake- or pkg-config +# module. Since it installs in the same directories as sqlite3, I am adding the +# module here to add the include- and link directories below. It is not REQUIRED +# because the sqlite3 in Debian jessie doesn't come with a pkg-config module. +pkg_check_modules(SQLITE3 sqlite3) +pkg_check_modules(CURLPP REQUIRED curlpp) +find_package(ICU COMPONENTS uc i18n REQUIRED) + +include_directories(${LIBXDG_BASEDIR_INCLUDE_DIRS}) +include_directories(${SQLITE3_INCLUDE_DIRS}) +include_directories(${CURLPP_INCLUDE_DIRS}) +include_directories(${ICU_INCLUDE_DIRS}) + +link_directories(${LIBXDG_BASEDIR_LIBRARY_DIRS}) +link_directories(${SQLITE3_LIBRARY_DIRS}) +link_directories(${CURLPP_LIBRARY_DIRS}) +link_directories(${ICU_LIBRARY_DIRS}) + +file(GLOB sources_lib *.cpp export/*.cpp) +file(GLOB headers_lib *.hpp) +file(GLOB headers_lib_export export/*.hpp) + +add_library(${PROJECT_NAME} SHARED ${sources_lib}) +target_link_libraries(${PROJECT_NAME} ${LIBXDG_BASEDIR_LIBRARIES} + vsqlitepp stdc++fs ${CURLPP_LIBRARIES} ${ICU_LIBRARIES}) +set_target_properties(${PROJECT_NAME} PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR} + ) + +install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(FILES ${headers_lib} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}) +install(FILES ${headers_lib_export} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/export) diff --git a/src/lib/export/adoc.cpp b/src/lib/export/adoc.cpp new file mode 100644 index 0000000..2703ebf --- /dev/null +++ b/src/lib/export/adoc.cpp @@ -0,0 +1,255 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include "version.hpp" +#include "time.hpp" +#include "adoc.hpp" + +namespace remwharead +{ + using std::string; + using std::cerr; + using std::endl; + using std::regex; + using std::regex_replace; + using tagpair = std::pair>; + + void Export::AsciiDoc::print() const + { + try + { + _out << "= Visited things\n" + << ":Author: remwharead " << global::version << endl + << ":Date: " + << timepoint_to_string(system_clock::now()) << endl + << ":TOC: right\n" + << ":TOCLevels: 2\n" + << ":!webfonts:\n\n"; + + tagmap alltags; + string day; + for (const Database::entry &entry : _entries) + { + const string newday = get_day(entry); + + if (newday != day) + { + day = newday; + _out << "== " << day << endl << endl; + } + + _out << "[[dt_" << timepoint_to_string(entry.datetime) + << "]]\n" << "* link:" << replace_in_uri(entry.uri); + if (!entry.title.empty()) + { + _out << '[' << replace_in_title(entry.title) << ']'; + } + else + { + _out << "[]"; + } + _out << " +" << endl; + + _out << '_' << get_time(entry).substr(0, 5) << '_'; + if (!entry.archive_uri.empty()) + { + _out << " (link:" << replace_in_uri(entry.archive_uri) + << "[archived version])"; + } + + bool separator = false; + for (const string &tag : entry.tags) + { + if (tag.empty()) + { + continue; + } + if (!separator) + { + _out << "\n| "; + separator = true; + } + + auto globaltag = alltags.find(tag); + if (globaltag != alltags.end()) + { + globaltag->second.push_back(entry); + } + else + { + alltags.insert({ tag, { entry } }); + } + + _out << "xref:t_" << replace_in_tag(tag) + << "[" << tag << ']'; + if (tag != *(entry.tags.rbegin())) + { + _out << ", "; + } + } + + if (!entry.description.empty()) + { + _out << " +" << endl << entry.description; + } + _out << endl << endl; + } + + if (!alltags.empty()) + { + print_tags(alltags); + } + } + catch (std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + } + + const string Export::AsciiDoc::replace(string text, + const replacemap &replacements) const + { + for (const std::pair &sr : replacements) + { + size_t pos = 0; + while ((pos = text.find(sr.first, pos)) != std::string::npos) + { + text.replace(pos, sr.first.length(), sr.second); + pos += sr.second.length(); + } + } + return text; + } + const string Export::AsciiDoc::replace_in_tag(const string &text) const + { + // TODO: Find a better solution. + const replacemap replacements = + { + { " ", "-" }, { "§", "-" }, + { "$", "-" }, { "%", "-" }, + { "&", "-" }, { "/", "-" }, + { "=", "-" }, { "^", "-" }, + { "!", "-" }, { "?", "-" }, + { "'", "-" }, { "\"", "-" }, + { "´", "-" }, { "`", "-" }, + { "’", "-" }, { "#", "-" }, + { "₀", "0" }, { "⁰", "0" }, + { "₁", "1" }, { "¹", "1" }, + { "₂", "2" }, { "²", "2" }, + { "₃", "3" }, { "³", "3" }, + { "₄", "4" }, { "⁴", "4" }, + { "₅", "5" }, { "⁵", "5" }, + { "₆", "6" }, { "⁶", "6" }, + { "₇", "7" }, { "⁷", "7" }, + { "₈", "8" }, { "⁸", "8" }, + { "₉", "9" }, { "⁹", "9" } + }; + + return replace(text, replacements); + } + + const string Export::AsciiDoc::replace_in_title(const string &text) const + { + // [ is implicitly escaped if the corresponding ] is. + return replace(text, {{ "]", "\\]" }}); + } + + const string Export::AsciiDoc::replace_in_uri(const string &text) const + { + return replace(text, + { + { "[", "%5B" }, { "]", "%5D" } + }); + } + + void Export::AsciiDoc::print_tags(const tagmap &tags) const + { + _out << "== Tags\n\n"; + vector sortedtags(tags.size()); + std::move(tags.begin(), tags.end(), sortedtags.begin()); + std::sort(sortedtags.begin(), sortedtags.end(), + [](const tagpair &a, tagpair &b) + { + if (a.second.size() != b.second.size()) + { // Sort by number of occurrences if they are different. + return a.second.size() > b.second.size(); + } + else + { // Sort by tag names otherwise. + std::locale loc; + const std::collate &coll = + std::use_facet>(loc); + return (coll.compare( + a.first.data(), a.first.data() + + a.first.length(), + b.first.data(), b.first.data() + + b.first.length()) == -1); + } + }); + + bool othertags = false; // Have we printed “Less used tags” already? + for (const auto &tag : sortedtags) + { + // If we have more than 20 tags, group all tags that occur only 1 + // time under the section “Less used tags”. + if (sortedtags.size() > 20 && tag.second.size() == 1) + { + if (!othertags) + { + _out << "=== Less used tags\n\n"; + othertags = true; + } + _out << "="; + } + + _out << "=== [[t_" << replace_in_tag(tag.first) << "]]" + << tag.first << endl; + for (const Database::entry &entry : tag.second) + { + const string datetime = timepoint_to_string(entry.datetime); + const string date = datetime.substr(0, datetime.find('T')); + string title = replace_in_title(entry.title); + if (title.empty()) + { + title = "++" + entry.uri + "++"; + } + _out << endl << "* xref:dt_" << datetime + << '[' << title << "] _(" << date << ")_" << endl; + } + _out << endl; + } + _out << endl; + } + + const string Export::AsciiDoc::get_day(const Database::entry &entry) const + { + const string datetime = timepoint_to_string(entry.datetime); + return datetime.substr(0, datetime.find('T')); + } + + const string Export::AsciiDoc::get_time(const Database::entry &entry) const + { + const string datetime = timepoint_to_string(entry.datetime); + return datetime.substr(datetime.find('T') + 1); + } +} diff --git a/src/adoc.hpp b/src/lib/export/adoc.hpp similarity index 87% rename from src/adoc.hpp rename to src/lib/export/adoc.hpp index f94dff8..ad1c7ab 100644 --- a/src/adoc.hpp +++ b/src/lib/export/adoc.hpp @@ -23,10 +23,13 @@ #include "sqlite.hpp" #include "export.hpp" -using std::string; - +namespace remwharead +{ namespace Export { + using std::string; + + //! Export as %AsciiDoc document. class AsciiDoc : protected ExportBase { public: @@ -38,17 +41,28 @@ namespace Export using tagmap = std::map>; using replacemap = const std::map; + //! Replace strings in text. const string replace(string text, const replacemap &replacements) const; + //! Replaces characters in tags that asciidoctor doesn't like. const string replace_in_tag(const string &text) const; + //! Replaces characters in title that asciidoctor doesn't like. const string replace_in_title(const string &text) const; + //! Replaces characters in URI that asciidoctor doesn't like. const string replace_in_uri(const string &text) const; + + //! Print things sorted by tag. void print_tags(const tagmap &tags) const; + + //! Get ISO-8601 day from Database::entry. const string get_day(const Database::entry &entry) const; + + //! Get ISO-8601 time from Database::entry. const string get_time(const Database::entry &entry) const; }; } +} #endif // REMWHAREAD_ADOC_HPP diff --git a/src/lib/export/bookmarks.cpp b/src/lib/export/bookmarks.cpp new file mode 100644 index 0000000..ab6c373 --- /dev/null +++ b/src/lib/export/bookmarks.cpp @@ -0,0 +1,59 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include "sqlite.hpp" +#include "bookmarks.hpp" + +namespace remwharead +{ + using std::chrono::system_clock; + using std::chrono::duration_cast; + using std::chrono::seconds; + using std::string; + + void Export::Bookmarks::print() const + { + _out << "\n" + "\n" + "Bookmarks from remwharead\n" + "

Bookmarks from remwharead

\n\n" + "

\n" + "

remwharead

\n" + "

\n"; + + for (const Database::entry & entry : _entries) + { + string title = entry.title; + if (title.empty()) + { + title = entry.uri; + } + system_clock::time_point tp = entry.datetime; + system_clock::duration duration = tp.time_since_epoch(); + string time_seconds = + std::to_string(duration_cast(duration).count()); + + _out << "

" + << title << "\n"; + } + _out << "

\n" + << "

\n"; + } +} diff --git a/src/bookmarks.hpp b/src/lib/export/bookmarks.hpp similarity index 97% rename from src/bookmarks.hpp rename to src/lib/export/bookmarks.hpp index 9a15c3f..94e7963 100644 --- a/src/bookmarks.hpp +++ b/src/lib/export/bookmarks.hpp @@ -19,6 +19,8 @@ #include "export.hpp" +namespace remwharead +{ namespace Export { //! Export as Netscape bookmark file. @@ -29,5 +31,6 @@ namespace Export virtual void print() const override; }; } +} #endif // REMWHAREAD_BOOKMARKS_HPP diff --git a/src/csv.cpp b/src/lib/export/csv.cpp similarity index 92% rename from src/csv.cpp rename to src/lib/export/csv.cpp index e5852c7..d9ad06f 100644 --- a/src/csv.cpp +++ b/src/lib/export/csv.cpp @@ -17,12 +17,12 @@ #include "time.hpp" #include "csv.hpp" -using std::cerr; -using std::endl; - -namespace Export +namespace remwharead { - void CSV::print() const + using std::cerr; + using std::endl; + + void Export::CSV::print() const { try { @@ -54,7 +54,7 @@ namespace Export } } - const string CSV::quote(string field) const + const string Export::CSV::quote(string field) const { size_t pos = 0; while ((pos = field.find('"', pos)) != std::string::npos) diff --git a/src/csv.hpp b/src/lib/export/csv.hpp similarity index 92% rename from src/csv.hpp rename to src/lib/export/csv.hpp index 93c2e09..a27a614 100644 --- a/src/csv.hpp +++ b/src/lib/export/csv.hpp @@ -20,10 +20,13 @@ #include #include "export.hpp" -using std::string; - +namespace remwharead +{ namespace Export { + using std::string; + + //! Export as Comma Separated Values. class CSV : protected ExportBase { public: @@ -36,5 +39,6 @@ namespace Export const string quote(string field) const; }; } +} #endif // REMWHAREAD_CSV_HPP diff --git a/src/export.cpp b/src/lib/export/export.cpp similarity index 98% rename from src/export.cpp rename to src/lib/export/export.cpp index 22e4312..9aa0a48 100644 --- a/src/export.cpp +++ b/src/lib/export/export.cpp @@ -17,6 +17,8 @@ #include #include "export.hpp" +namespace remwharead +{ namespace Export { ExportBase::ExportBase(const vector &entries, ostream &out) @@ -35,3 +37,4 @@ namespace Export return entries; } } +} diff --git a/src/export.hpp b/src/lib/export/export.hpp similarity index 70% rename from src/export.hpp rename to src/lib/export/export.hpp index 4c1f3d7..fb6d00c 100644 --- a/src/export.hpp +++ b/src/lib/export/export.hpp @@ -25,24 +25,43 @@ using std::vector; using std::ostream; using std::cout; +namespace remwharead +{ namespace Export { + //! Base class for exports. class ExportBase { public: + /*! + * @brief Export vector of Database::entry. + * + * @param entries Vector of Database::entry to export. + * @param out Output stream. + */ explicit ExportBase(const vector &entries, ostream &out = cout); + /*! + * @brief Print output to std::ostream. + */ virtual void print() const = 0; protected: const vector _entries; ostream &_out; - //! Sort entries from newest to oldest. + /*! + * @brief Sort entries from newest to oldest. + * + * @param entries Vector of Database::entry to sort. + * + * @return Sorted vector of Database::entry. + */ const vector sort_entries(vector entries) const; }; } +} #endif // REMWHAREAD_EXPORT_HPP diff --git a/src/simple.cpp b/src/lib/export/simple.cpp similarity index 62% rename from src/simple.cpp rename to src/lib/export/simple.cpp index 5a7c9ae..d7b1a4f 100644 --- a/src/simple.cpp +++ b/src/lib/export/simple.cpp @@ -19,20 +19,23 @@ #include "time.hpp" #include "simple.hpp" -using std::string; - -void Export::Simple::print() const +namespace remwharead { - for (const Database::entry & entry : _entries) - { - const string timestring = timepoint_to_string(entry.datetime); - _out << timestring.substr(0, timestring.find('T')) << ": "; - if (!entry.title.empty()) - { - _out << entry.title << '\n'; - _out << " "; - } + using std::string; - _out << "<" << entry.uri << ">\n"; + void Export::Simple::print() const + { + for (const Database::entry & entry : _entries) + { + const string timestring = timepoint_to_string(entry.datetime); + _out << timestring.substr(0, timestring.find('T')) << ": "; + if (!entry.title.empty()) + { + _out << entry.title << '\n'; + _out << " "; + } + + _out << "<" << entry.uri << ">\n"; + } } } diff --git a/src/simple.hpp b/src/lib/export/simple.hpp similarity index 97% rename from src/simple.hpp rename to src/lib/export/simple.hpp index 1638d9a..39bd625 100644 --- a/src/simple.hpp +++ b/src/lib/export/simple.hpp @@ -19,6 +19,8 @@ #include "export.hpp" +namespace remwharead +{ namespace Export { //! Export as simple list. @@ -29,5 +31,6 @@ namespace Export virtual void print() const override; }; } +} #endif // REMWHAREAD_SIMPLE_HPP diff --git a/src/time.hpp b/src/lib/remwharead.hpp similarity index 54% rename from src/time.hpp rename to src/lib/remwharead.hpp index c4d3565..16a77f3 100644 --- a/src/time.hpp +++ b/src/lib/remwharead.hpp @@ -14,21 +14,33 @@ * along with this program. If not, see . */ -#ifndef REMWHAREAD_TIME_HPP -#define REMWHAREAD_TIME_HPP +#ifndef REMWHAREAD_HPP +#define REMWHAREAD_HPP -#include -#include +/*! + * @mainpage remwharead Reference + * + * @section using Using the library + * + * The easiest way is to include remwharead.hpp, which then includes all other + * headers. + * + * @code + * #include + * @endcode + * + * Compile your code with `g++ -lremwharead`. + */ -using std::string; -using std::chrono::system_clock; -using time_point = system_clock::time_point; +#include "export/adoc.hpp" +#include "export/bookmarks.hpp" +#include "export/csv.hpp" +#include "export/export.hpp" +#include "export/simple.hpp" +#include "search.hpp" +#include "sqlite.hpp" +#include "time.hpp" +#include "types.hpp" +#include "uri.hpp" -// Convert ISO 8601 time-string or SQLite time-string to time_point. -const time_point string_to_timepoint(const string &strtime, - bool sqlite = false); - -// Convert time_point to USO 8601 time-string or SQLite time-string. -const string timepoint_to_string(const time_point &tp, bool sqlite = false); - -#endif // REMWHAREAD_TIME_HPP +#endif // REMWHAREAD_HPP diff --git a/src/lib/search.cpp b/src/lib/search.cpp new file mode 100644 index 0000000..771d61a --- /dev/null +++ b/src/lib/search.cpp @@ -0,0 +1,194 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include "search.hpp" + +namespace remwharead +{ + using std::regex; + using std::regex_search; + using std::smatch; + using std::find; + using std::find_if; + + const vector> parse_expression(string expression) + { + vector> searchlist; + const regex re_or("(.+?) (OR|\\|\\|) "); + const regex re_and("(.+?) (AND|&&) "); + smatch match; + + vector subexpressions; + { // Split expression at OR. + while (regex_search(expression, match, re_or)) + { + subexpressions.push_back(match[1].str()); + expression = match.suffix().str(); + } + subexpressions.push_back(expression); + } + + { + for (string sub : subexpressions) + { // Split each OR-slice at AND. + vector terms; + while (regex_search(sub, match, re_and)) + { + terms.push_back(to_lowercase(match[1].str())); + sub = match.suffix().str(); + } + terms.push_back(to_lowercase(sub)); + searchlist.push_back(terms); + } + } + + return searchlist; + } + + const string to_lowercase(const string &str) + { + icu::UnicodeString uni(str.c_str()); + string out; + uni.toLower().toUTF8String(out); + return out; + } + + const vector search_tags(const vector &entries, + string expression, const bool is_re) + { + vector> searchlist = parse_expression(expression); + vector result; + + for (const vector &tags_or : searchlist) + { + for (const DB::entry &entry : entries) + { // Add entry to result if all tags in an OR-slice match. + bool matched = true; + + for (const string &tag : tags_or) + { + const auto it = find_if( + entry.tags.begin(), entry.tags.end(), + [&tag, is_re](string s) + { + s = to_lowercase(s); + if (is_re) + { + const regex re("^" + tag + "$"); + return regex_search(s, re); + } + else + { + return (s == tag); + } + }); + if (it == entry.tags.end()) + { + matched = false; + } + } + if (matched == true) + { + result.push_back(entry); + } + } + } + + return result; + } + + const vector search_all(const vector &entries, + string expression, const bool is_re) + { + vector> searchlist = parse_expression(expression); + vector result = search_tags(entries, expression, is_re); + + for (const vector &terms_or : searchlist) + { + for (const DB::entry &entry : entries) + { + // Add entry to result if all terms in an OR-slice match title, + // description or full text. + bool matched_title = true; + bool matched_description = true; + bool matched_fulltext = true; + + const auto it = find(result.begin(), result.end(), entry); + if (it != result.end()) + { // Skip if already in result list. + continue; + } + + for (const string &term : terms_or) + { + const string title = to_lowercase(entry.title); + const string description = to_lowercase(entry.description); + const string fulltext = to_lowercase(entry.fulltext); + + // Set matched_* to false if term is not found. + if (is_re) + { + const regex re(term); + + if(!regex_search(title, re)) + { + matched_title = false; + } + + if(!regex_search(description, re)) + { + matched_description = false; + } + + if(!regex_search(fulltext, re)) + { + matched_fulltext = false; + } + } + else + { + if (title.find(term) == string::npos) + { + matched_title = false; + } + + if (description.find(term) == string::npos) + { + matched_description = false; + } + + if (fulltext.find(term) == string::npos) + { + matched_fulltext = false; + } + } + } + if (matched_title == true + || matched_description == true + || matched_fulltext == true) + { + result.push_back(entry); + } + } + } + + return result; + } +} diff --git a/src/lib/search.hpp b/src/lib/search.hpp new file mode 100644 index 0000000..f8d66e5 --- /dev/null +++ b/src/lib/search.hpp @@ -0,0 +1,75 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef REMWHAREAD_SEARCH_HPP +#define REMWHAREAD_SEARCH_HPP + +#include +#include +#include "sqlite.hpp" + +//! @file + +namespace remwharead +{ + using std::vector; + using std::string; + + /*! + * @brief Split expression in subexpressions. + * + * First it splits at `OR` or `||`, then it splits the subexpressions at + * `AND` or `&&`. The first vector contains all tags before the first `OR`. + * + * @return Vector of `OR`-vectors of `AND`-tags. + */ + const vector> parse_expression(string expression); + + //! Convert str to lowercase. Works with unicode. + const string to_lowercase(const string &str); + + /*! + * @brief Search in tags of database entries. + * + * Only matches whole tags, *Pill* does not match *Pillow*. + * + * @param entries Vector of Database::entry to search. + * @param expression Search expression. + * @param is_re Is it a regular expression? + * + * @return Vector of matching Database::entry. + */ + const vector + search_tags(const vector &entries, string expression, + const bool is_re); + + /*! + * @brief Search in full text of database entries. + * + * Searches in tags, title, description and full text. + * + * @param entries Vector of Database::entry to search. + * @param expression Search expression. + * @param is_re Is it a regular expression? + * + * @return Vector of matching Database::entry. + */ + const vector + search_all(const vector &entries, string expression, + const bool is_re); +} + +#endif // REMWHAREAD_SEARCH_HPP diff --git a/src/lib/sqlite.cpp b/src/lib/sqlite.cpp new file mode 100644 index 0000000..4f67741 --- /dev/null +++ b/src/lib/sqlite.cpp @@ -0,0 +1,164 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include "time.hpp" +#include "sqlite.hpp" + +namespace remwharead +{ + using std::cerr; + using std::endl; + + Database::Database() + : _connected(false) + { + try + { + xdgHandle xdg; + xdgInitHandle(&xdg); + _dbpath = xdgDataHome(&xdg) / fs::path("remwharead"); + xdgWipeHandle(&xdg); + + if (!fs::exists(_dbpath)) + { + fs::create_directories(_dbpath); + } + _dbpath /= "database.sqlite"; + + _con = std::make_unique(_dbpath); + sqlite::execute(*_con, "CREATE TABLE IF NOT EXISTS remwharead(" + "uri TEXT, archive_uri TEXT, datetime TEXT, " + "tags TEXT, title TEXT, description TEXT, " + "fulltext TEXT);", true); + + _connected = true; + } + catch (std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + } + + Database::operator bool() const + { + return _connected; + } + + bool operator ==(const Database::entry &a, const Database::entry &b) + { + if (a.datetime == b.datetime) + { + return true; + } + + return false; + } + + const string Database::entry::fulltext_oneline() const + { + string oneline = fulltext; + size_t pos = 0; + while ((pos = oneline.find('\n', pos)) != std::string::npos) + { + oneline.replace(pos, 1, "\\n"); + } + return oneline; + } + + void Database::store(const Database::entry &data) const + { + try + { + const string strdatetime = timepoint_to_string(data.datetime, true); + string strtags; + for (const string &tag : data.tags) + { + strtags += tag; + if (tag != *(data.tags.rbegin())) + { + strtags += ","; + } + } + + sqlite::execute ins(*_con, "INSERT INTO remwharead " + "VALUES(?, ?, ?, ?, ?, ?, ?);"); + ins % data.uri % data.archive_uri % strdatetime % strtags + % data.title % data.description % data.fulltext; + ins(); + } + catch (std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + } + + const vector Database::retrieve( + const time_point &start, const time_point &end) const + { + try + { + const string query = "SELECT * FROM remwharead WHERE datetime " + "BETWEEN '" + timepoint_to_string(start, true) + + "' AND '" + timepoint_to_string(end, true) + + "' ORDER BY datetime DESC;"; + + sqlite::query q(*_con, query); + sqlite::result_type res = q.get_result(); + vector entries; + + while(res->next_row()) + { + vector tags; + const string strtags = res->get_string(3); + size_t pos = 0; + while (pos != std::string::npos) + { + const size_t newpos = strtags.find(',', pos); + tags.push_back(strtags.substr(pos, newpos - pos)); + pos = newpos; + if (pos != std::string::npos) + { + ++pos; + } + } + entries.push_back + ({ + res->get_string(0), + res->get_string(1), + string_to_timepoint(res->get_string(2), true), + tags, + res->get_string(4), + res->get_string(5), + res->get_string(6) + }); + } + + return entries; + } + catch (std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + + return {}; + } +} diff --git a/src/lib/sqlite.hpp b/src/lib/sqlite.hpp new file mode 100644 index 0000000..f30b9cd --- /dev/null +++ b/src/lib/sqlite.hpp @@ -0,0 +1,80 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef REMWHAREAD_SQLITE_HPP +#define REMWHAREAD_SQLITE_HPP + +#include +#include +#include +#include +#include +#include + +namespace remwharead +{ + namespace fs = std::experimental::filesystem; + using std::string; + using std::vector; + using std::chrono::system_clock; + using time_point = system_clock::time_point; + + //! Store and retrieve files from/to SQLite. + class Database + { + public: + //! Describes a database entry. + typedef struct entry + { + string uri; + string archive_uri; + time_point datetime; + vector tags; + string title; + string description; + string fulltext; + + //! Returns true if date and time are equal. + friend bool operator ==(const Database::entry &a, + const Database::entry &b); + //! The full text in one line. + const string fulltext_oneline() const; + } entry; + + //! Connects to the database and creates it if necessary. + Database(); + + //! Returns true if connected to the database. + operator bool() const; + + //! Store a Database::entry in the database. + void store(const entry &data) const; + + //! Retrieve a vector of Database::entry from the database. + const vector retrieve( + const time_point &start = time_point(), + const time_point &end = system_clock::now()) const; + + private: + fs::path _dbpath; + std::unique_ptr _con; + bool _connected; + }; + + using DB = Database; +} + +#endif // REMWHAREAD_SQLITE_HPP diff --git a/src/lib/time.cpp b/src/lib/time.cpp new file mode 100644 index 0000000..5ede440 --- /dev/null +++ b/src/lib/time.cpp @@ -0,0 +1,61 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include "time.hpp" + +namespace remwharead +{ + const time_point string_to_timepoint(const string &strtime, bool sqlite) + { + std::stringstream sstime(strtime); + struct std::tm tm = {}; + tm.tm_isdst = -1; // Detect daylight saving time. + if (sqlite) + { + sstime >> std::get_time(&tm, "%Y-%m-%d %T"); + } + else + { + sstime >> std::get_time(&tm, "%Y-%m-%dT%T"); + } + std::time_t time = timelocal(&tm); // Assume time is local. + return system_clock::from_time_t(time); + } + + const string timepoint_to_string(const time_point &tp, bool sqlite) + { + constexpr std::uint16_t bufsize = 32; + std::time_t time = system_clock::to_time_t(tp); + std::tm *tm; + tm = std::localtime(&time); + + char buffer[bufsize]; + if (sqlite) + { + std::strftime(buffer, bufsize, "%F %T", tm); + } + else + { + std::strftime(buffer, bufsize, "%FT%T", tm); + } + + return static_cast(buffer); + } +} diff --git a/src/lib/time.hpp b/src/lib/time.hpp new file mode 100644 index 0000000..0df526e --- /dev/null +++ b/src/lib/time.hpp @@ -0,0 +1,53 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef REMWHAREAD_TIME_HPP +#define REMWHAREAD_TIME_HPP + +#include +#include + +//! @file + +namespace remwharead +{ + using std::string; + using std::chrono::system_clock; + using time_point = system_clock::time_point; + + /*! + * @brief Convert ISO 8601 or SQLite time-string to time_point. + * + * The SQLite format is *YY-MM-DD hh:mm:ss* instead of *YY-MM-DDThh:mm:ss*. + * + * @param strtime Time string in ISO 8601 or SQLite format. + * @param sqlite Is the string in SQLite format? + */ + const time_point string_to_timepoint(const string &strtime, + bool sqlite = false); + + /*! + * @brief Convert time_point to ISO 8601 or SQLite time-string. + * + * The SQLite format is *YY-MM-DD hh:mm:ss* instead of *YY-MM-DDThh:mm:ss*. + * + * @param time_point The std::chrono::system_clock::time_point. + * @param sqlite Is the string in SQLite format? + */ + const string timepoint_to_string(const time_point &tp, bool sqlite = false); +} + +#endif // REMWHAREAD_TIME_HPP diff --git a/src/types.hpp b/src/lib/types.hpp similarity index 80% rename from src/types.hpp rename to src/lib/types.hpp index f1940e6..41c1563 100644 --- a/src/types.hpp +++ b/src/lib/types.hpp @@ -17,13 +17,19 @@ #ifndef REMWHAREAD_TYPES_HPP #define REMWHAREAD_TYPES_HPP -enum class export_format +//! @file + +namespace remwharead { - undefined, - csv, - asciidoc, - bookmarks, - simple -}; + //! Format of the export. + enum class export_format + { + undefined, + csv, + asciidoc, + bookmarks, + simple + }; +} #endif // REMWHAREAD_TYPES_HPP diff --git a/src/lib/uri.cpp b/src/lib/uri.cpp new file mode 100644 index 0000000..50de1f8 --- /dev/null +++ b/src/lib/uri.cpp @@ -0,0 +1,531 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "uri.hpp" + +namespace remwharead +{ + namespace curlopts = curlpp::options; + using std::uint64_t; + using std::cerr; + using std::endl; + using std::regex; + using std::regex_replace; + using std::regex_search; + using std::smatch; + using std::regex_constants::icase; + using std::array; + + URI::URI(const string &uri) + :_uri(uri) + { + } + + const html_extract URI::get() + { + try + { + std::ostringstream oss; + curlpp::Easy request; + set_curlpp_options(request); + request.setOpt(_uri); + request.setOpt(&oss); + request.perform(); + + const string answer = oss.str(); + if (answer.empty()) + { + cerr << "Error: Could not download page. Response code: " + << curlpp::infos::ResponseCode::get(request) << endl; + } + else + { + return + { + extract_title(answer), + extract_description(answer), + strip_html(answer) + }; + } + } + catch (const std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + + return { "", "", "" }; + } + + void URI::set_curlpp_options(curlpp::Easy &request) + { + request.setOpt(string("remwharead/") + + global::version); + request.setOpt({ "Connection: close" }); + request.setOpt(true); + } + + const string URI::extract_title(const string &html) + { + const regex re_htmlfile("\\.(.?html?|xml|rss)$"); + if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) + { + smatch match; + regex_search(html, match, regex("([^<]+)", icase)); + return remove_newlines(unescape_html(match[1].str())); + } + + return ""; + } + + const string URI::extract_description(const string &html) + { + const regex re_htmlfile("\\.(.?html?|xml|rss)$"); + if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) + { + smatch match; + const regex re("description\"[^>]+content=\"([^\"]+)", icase); + regex_search(html, match, re); + return remove_newlines(strip_html(match[1].str())); + } + + return ""; + } + + const string URI::strip_html(const string &html) + { + string out; + + out = remove_html_tags(html, "script"); // Remove JavaScript. + out = remove_html_tags(out, "style"); // Remove CSS. + out = remove_html_tags(out); // Remove tags. + + size_t pos = 0; + while ((pos = out.find("\r", pos)) != std::string::npos) // Remove CR. + { + out.replace(pos, 1, ""); + } + + out = regex_replace(out, regex("\\s+\n"), "\n"); // Remove space at eol. + out = regex_replace(out, regex("\n{2,}"), "\n"); // Reduce newlines. + + return unescape_html(out); + } + const string URI::remove_html_tags(const string &html, const string &tag) + { + // NOTE: I did this with regex_replace before, but libstdc++ segfaulted. + string out; + if (tag.empty()) + { + size_t pos = 0; + while (pos != std::string::npos) + { + size_t startpos = html.find('<', pos); + size_t endpos = html.find('>', startpos); + out += html.substr(pos, startpos - pos); + pos = endpos; + if (pos != std::string::npos) + { + ++pos; + } + } + } + else + { + size_t pos = 0; + out = html; + while ((pos = out.find("<" + tag)) != std::string::npos) + { + size_t endpos = out.find("</" + tag, pos); + if (endpos == std::string::npos) + { + break; + } + endpos += 3 + tag.length(); // tag + </ + > + out.replace(pos, endpos - pos, ""); + } + } + + return out; + } + + const string URI::unescape_html(const string &html) + { + string buffer = html; + string output; + + // Used to convert int to utf-8 char. + std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c; + regex re_entity("&#(x)?([[:alnum:]]{1,8});"); + smatch match; + + while (regex_search(buffer, match, re_entity)) + { + char32_t codepoint = 0; + // 'x' in front of the number means it's hexadecimal, else decimal. + if (match[1].length() == 1) + { + codepoint = std::stoi(match[2].str(), nullptr, 16); + } + else + { + codepoint = std::stoi(match[2].str(), nullptr, 10); + } + output += match.prefix().str() + u8c.to_bytes(codepoint); + buffer = match.suffix().str(); + } + output += buffer; + + // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_ + // entity_references#Character_entity_references_in_HTML + const array<const std::pair<const string, const char32_t>, 258> names = + {{ + { "exclamation", 0x0021 }, + { "quot", 0x0022 }, + { "percent", 0x0025 }, + { "amp", 0x0026 }, + { "apos", 0x0027 }, + { "add", 0x002B }, + { "lt", 0x003C }, + { "equal", 0x003D }, + { "gt", 0x003E }, + { "nbsp", 0x00A0 }, + { "iexcl", 0x00A1 }, + { "cent", 0x00A2 }, + { "pound", 0x00A3 }, + { "curren", 0x00A4 }, + { "yen", 0x00A5 }, + { "brvbar", 0x00A6 }, + { "sect", 0x00A7 }, + { "uml", 0x00A8 }, + { "copy", 0x00A9 }, + { "ordf", 0x00AA }, + { "laquo", 0x00AB }, + { "not", 0x00AC }, + { "shy", 0x00AD }, + { "reg", 0x00AE }, + { "macr", 0x00AF }, + { "deg", 0x00B0 }, + { "plusmn", 0x00B1 }, + { "sup2", 0x00B2 }, + { "sup3", 0x00B3 }, + { "acute", 0x00B4 }, + { "micro", 0x00B5 }, + { "para", 0x00B6 }, + { "middot", 0x00B7 }, + { "cedil", 0x00B8 }, + { "sup1", 0x00B9 }, + { "ordm", 0x00BA }, + { "raquo", 0x00BB }, + { "frac14", 0x00BC }, + { "frac12", 0x00BD }, + { "frac34", 0x00BE }, + { "iquest", 0x00BF }, + { "Agrave", 0x00C0 }, + { "Aacute", 0x00C1 }, + { "Acirc", 0x00C2 }, + { "Atilde", 0x00C3 }, + { "Auml", 0x00C4 }, + { "Aring", 0x00C5 }, + { "AElig", 0x00C6 }, + { "Ccedil", 0x00C7 }, + { "Egrave", 0x00C8 }, + { "Eacute", 0x00C9 }, + { "Ecirc", 0x00CA }, + { "Euml", 0x00CB }, + { "Igrave", 0x00CC }, + { "Iacute", 0x00CD }, + { "Icirc", 0x00CE }, + { "Iuml", 0x00CF }, + { "ETH", 0x00D0 }, + { "Ntilde", 0x00D1 }, + { "Ograve", 0x00D2 }, + { "Oacute", 0x00D3 }, + { "Ocirc", 0x00D4 }, + { "Otilde", 0x00D5 }, + { "Ouml", 0x00D6 }, + { "times", 0x00D7 }, + { "Oslash", 0x00D8 }, + { "Ugrave", 0x00D9 }, + { "Uacute", 0x00DA }, + { "Ucirc", 0x00DB }, + { "Uuml", 0x00DC }, + { "Yacute", 0x00DD }, + { "THORN", 0x00DE }, + { "szlig", 0x00DF }, + { "agrave", 0x00E0 }, + { "aacute", 0x00E1 }, + { "acirc", 0x00E2 }, + { "atilde", 0x00E3 }, + { "auml", 0x00E4 }, + { "aring", 0x00E5 }, + { "aelig", 0x00E6 }, + { "ccedil", 0x00E7 }, + { "egrave", 0x00E8 }, + { "eacute", 0x00E9 }, + { "ecirc", 0x00EA }, + { "euml", 0x00EB }, + { "igrave", 0x00EC }, + { "iacute", 0x00ED }, + { "icirc", 0x00EE }, + { "iuml", 0x00EF }, + { "eth", 0x00F0 }, + { "ntilde", 0x00F1 }, + { "ograve", 0x00F2 }, + { "oacute", 0x00F3 }, + { "ocirc", 0x00F4 }, + { "otilde", 0x00F5 }, + { "ouml", 0x00F6 }, + { "divide", 0x00F7 }, + { "oslash", 0x00F8 }, + { "ugrave", 0x00F9 }, + { "uacute", 0x00FA }, + { "ucirc", 0x00FB }, + { "uuml", 0x00FC }, + { "yacute", 0x00FD }, + { "thorn", 0x00FE }, + { "yuml", 0x00FF }, + { "OElig", 0x0152 }, + { "oelig", 0x0153 }, + { "Scaron", 0x0160 }, + { "scaron", 0x0161 }, + { "Yuml", 0x0178 }, + { "fnof", 0x0192 }, + { "circ", 0x02C6 }, + { "tilde", 0x02DC }, + { "Alpha", 0x0391 }, + { "Beta", 0x0392 }, + { "Gamma", 0x0393 }, + { "Delta", 0x0394 }, + { "Epsilon", 0x0395 }, + { "Zeta", 0x0396 }, + { "Eta", 0x0397 }, + { "Theta", 0x0398 }, + { "Iota", 0x0399 }, + { "Kappa", 0x039A }, + { "Lambda", 0x039B }, + { "Mu", 0x039C }, + { "Nu", 0x039D }, + { "Xi", 0x039E }, + { "Omicron", 0x039F }, + { "Pi", 0x03A0 }, + { "Rho", 0x03A1 }, + { "Sigma", 0x03A3 }, + { "Tau", 0x03A4 }, + { "Upsilon", 0x03A5 }, + { "Phi", 0x03A6 }, + { "Chi", 0x03A7 }, + { "Psi", 0x03A8 }, + { "Omega", 0x03A9 }, + { "alpha", 0x03B1 }, + { "beta", 0x03B2 }, + { "gamma", 0x03B3 }, + { "delta", 0x03B4 }, + { "epsilon", 0x03B5 }, + { "zeta", 0x03B6 }, + { "eta", 0x03B7 }, + { "theta", 0x03B8 }, + { "iota", 0x03B9 }, + { "kappa", 0x03BA }, + { "lambda", 0x03BB }, + { "mu", 0x03BC }, + { "nu", 0x03BD }, + { "xi", 0x03BE }, + { "omicron", 0x03BF }, + { "pi", 0x03C0 }, + { "rho", 0x03C1 }, + { "sigmaf", 0x03C2 }, + { "sigma", 0x03C3 }, + { "tau", 0x03C4 }, + { "upsilon", 0x03C5 }, + { "phi", 0x03C6 }, + { "chi", 0x03C7 }, + { "psi", 0x03C8 }, + { "omega", 0x03C9 }, + { "thetasym", 0x03D1 }, + { "upsih", 0x03D2 }, + { "piv", 0x03D6 }, + { "ensp", 0x2002 }, + { "emsp", 0x2003 }, + { "thinsp", 0x2009 }, + { "zwnj", 0x200C }, + { "zwj", 0x200D }, + { "lrm", 0x200E }, + { "rlm", 0x200F }, + { "ndash", 0x2013 }, + { "mdash", 0x2014 }, + { "horbar", 0x2015 }, + { "lsquo", 0x2018 }, + { "rsquo", 0x2019 }, + { "sbquo", 0x201A }, + { "ldquo", 0x201C }, + { "rdquo", 0x201D }, + { "bdquo", 0x201E }, + { "dagger", 0x2020 }, + { "Dagger", 0x2021 }, + { "bull", 0x2022 }, + { "hellip", 0x2026 }, + { "permil", 0x2030 }, + { "prime", 0x2032 }, + { "Prime", 0x2033 }, + { "lsaquo", 0x2039 }, + { "rsaquo", 0x203A }, + { "oline", 0x203E }, + { "frasl", 0x2044 }, + { "euro", 0x20AC }, + { "image", 0x2111 }, + { "weierp", 0x2118 }, + { "real", 0x211C }, + { "trade", 0x2122 }, + { "alefsym", 0x2135 }, + { "larr", 0x2190 }, + { "uarr", 0x2191 }, + { "rarr", 0x2192 }, + { "darr", 0x2193 }, + { "harr", 0x2194 }, + { "crarr", 0x21B5 }, + { "lArr", 0x21D0 }, + { "uArr", 0x21D1 }, + { "rArr", 0x21D2 }, + { "dArr", 0x21D3 }, + { "hArr", 0x21D4 }, + { "forall", 0x2200 }, + { "part", 0x2202 }, + { "exist", 0x2203 }, + { "empty", 0x2205 }, + { "nabla", 0x2207 }, + { "isin", 0x2208 }, + { "notin", 0x2209 }, + { "ni", 0x220B }, + { "prod", 0x220F }, + { "sum", 0x2211 }, + { "minus", 0x2212 }, + { "lowast", 0x2217 }, + { "radic", 0x221A }, + { "prop", 0x221D }, + { "infin", 0x221E }, + { "ang", 0x2220 }, + { "and", 0x2227 }, + { "or", 0x2228 }, + { "cap", 0x2229 }, + { "cup", 0x222A }, + { "int", 0x222B }, + { "there4", 0x2234 }, + { "sim", 0x223C }, + { "cong", 0x2245 }, + { "asymp", 0x2248 }, + { "ne", 0x2260 }, + { "equiv", 0x2261 }, + { "le", 0x2264 }, + { "ge", 0x2265 }, + { "sub", 0x2282 }, + { "sup", 0x2283 }, + { "nsub", 0x2284 }, + { "sube", 0x2286 }, + { "supe", 0x2287 }, + { "oplus", 0x2295 }, + { "otimes", 0x2297 }, + { "perp", 0x22A5 }, + { "sdot", 0x22C5 }, + { "lceil", 0x2308 }, + { "rceil", 0x2309 }, + { "lfloor", 0x230A }, + { "rfloor", 0x230B }, + { "lang", 0x2329 }, + { "rang", 0x232A }, + { "loz", 0x25CA }, + { "spades", 0x2660 }, + { "clubs", 0x2663 }, + { "hearts", 0x2665 }, + { "diams", 0x2666 } + }}; + + for (auto &pair : names) + { + const regex re('&' + pair.first + ';'); + output = regex_replace(output, re, u8c.to_bytes(pair.second)); + } + + return output; + } + + const string URI::archive() + { + if (_uri.substr(0, 4) != "http") + { + return ""; + } + + try + { + std::ostringstream oss; + curlpp::Easy request; + set_curlpp_options(request); + request.setOpt<curlopts::Url>("https://web.archive.org/save/" + + _uri); + request.setOpt<curlopts::WriteStream>(&oss); + request.setOpt<curlopts::NoBody>(true); // Make HEAD request. + request.setOpt<curlpp::options::Header>(true); // Keep headers. + request.perform(); + + smatch match; + const string answer = oss.str(); + if (regex_search(answer, match, regex("Content-Location: (.+)\r"))) + { + return "https://web.archive.org" + match[1].str(); + } + else + { + cerr << "Error: Could not archive page. HTTP status: " + << curlpp::infos::ResponseCode::get(request) << endl; + } + } + catch (const std::exception &e) + { + cerr << "Error in " << __func__ << ": " << e.what() << endl; + } + + return ""; + } + + const string URI::remove_newlines(string text) + { + size_t posn = 0; + while ((posn = text.find('\n', posn)) != std::string::npos) + { + text.replace(posn, 1, " "); + + size_t posr = posn - 1; + if (text[posr] == '\r') + { + text.replace(posr, 1, " "); + } + ++posn; + } + + return text; + } +} diff --git a/src/lib/uri.hpp b/src/lib/uri.hpp new file mode 100644 index 0000000..12fc51d --- /dev/null +++ b/src/lib/uri.hpp @@ -0,0 +1,80 @@ +/* This file is part of remwharead. + * Copyright © 2019 tastytea <tastytea@tastytea.de> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef REMWHAREAD_URI_HPP +#define REMWHAREAD_URI_HPP + +#include <string> +#include <curlpp/Easy.hpp> + +namespace remwharead +{ + using std::string; + + //! A processed HTML page. + typedef struct html_extract + { + string title; + string description; + string fulltext; + } html_extract; + + //! Download, archive and process an %URI. + class URI + { + public: + //! Construct object and set URL. + explicit URI(const string &uri); + + //! Download %URI and extract title, description and full text. + const html_extract get(); + + //! Save %URI in archive and return archive-URI. + const string archive(); + + protected: + string _uri; + + //! Sets common curlpp options. + void set_curlpp_options(curlpp::Easy &request); + + //! Extract the title from an HTML page. + const string extract_title(const string &html); + + //! Extract the description from an HTML page. + const string extract_description(const string &html); + + //! Removes HTML tags and superflous spaces from an HTML page. + const string strip_html(const string &html); + + /*! + * @brief Remove HTML tags. + * + * @param html HTML page. + * @param tag If set, only remove this tag. + */ + const string remove_html_tags(const string &html, + const string &tag = ""); + + //! Convert HTML entities to UTF-8. + const string unescape_html(const string &html); + + //! Replace newlines with spaces. + const string remove_newlines(string text); + }; +} + +#endif // REMWHAREAD_URI_HPP diff --git a/src/search.cpp b/src/search.cpp deleted file mode 100644 index e1256be..0000000 --- a/src/search.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <regex> -#include <algorithm> -#include <locale> -#include <unicode/unistr.h> -#include "search.hpp" - -using std::regex; -using std::regex_search; -using std::smatch; -using std::find; -using std::find_if; - -const vector<vector<string>> parse_expression(string expression) -{ - vector<vector<string>> searchlist; - const regex re_or("(.+?) (OR|\\|\\|) "); - const regex re_and("(.+?) (AND|&&) "); - smatch match; - - vector<string> subexpressions; - { // Split expression at OR. - while (regex_search(expression, match, re_or)) - { - subexpressions.push_back(match[1].str()); - expression = match.suffix().str(); - } - subexpressions.push_back(expression); - } - - { - for (string sub : subexpressions) - { // Split each OR-slice at AND. - vector<string> terms; - while (regex_search(sub, match, re_and)) - { - terms.push_back(to_lowercase(match[1].str())); - sub = match.suffix().str(); - } - terms.push_back(to_lowercase(sub)); - searchlist.push_back(terms); - } - } - - return searchlist; -} - -const string to_lowercase(const string &str) -{ - icu::UnicodeString uni(str.c_str()); - string out; - uni.toLower().toUTF8String(out); - return out; -} - -const vector<Database::entry> -search_tags(const vector<Database::entry> &entries, string expression, - const bool is_re) -{ - vector<vector<string>> searchlist = parse_expression(expression); - vector<Database::entry> result; - - for (const vector<string> &tags_or : searchlist) - { - for (const Database::entry &entry : entries) - { // Add entry to result if all tags in an OR-slice match. - bool matched = true; - - for (const string &tag : tags_or) - { - const auto it = find_if(entry.tags.begin(), entry.tags.end(), - [&tag, is_re](string s) - { - s = to_lowercase(s); - if (is_re) - { - const regex re("^" + tag + "$"); - return regex_search(s, re); - } - else - { - return (s == tag); - } - }); - if (it == entry.tags.end()) - { - matched = false; - } - } - if (matched == true) - { - result.push_back(entry); - } - } - } - - return result; -} - -const vector<Database::entry> -search_all(const vector<Database::entry> &entries, string expression, - const bool is_re) -{ - vector<vector<string>> searchlist = parse_expression(expression); - vector<Database::entry> result = search_tags(entries, expression, is_re); - - for (const vector<string> &terms_or : searchlist) - { - for (const Database::entry &entry : entries) - { - // Add entry to result if all terms in an OR-slice match title, - // description or full text. - bool matched_title = true; - bool matched_description = true; - bool matched_fulltext = true; - - const auto it = find(result.begin(), result.end(), entry); - if (it != result.end()) - { // Skip if already in result list. - continue; - } - - for (const string &term : terms_or) - { - const string title = to_lowercase(entry.title); - const string description = to_lowercase(entry.description); - const string fulltext = to_lowercase(entry.fulltext); - - // Set matched_* to false if term is not found. - if (is_re) - { - const regex re(term); - - if(!regex_search(title, re)) - { - matched_title = false; - } - - if(!regex_search(description, re)) - { - matched_description = false; - } - - if(!regex_search(fulltext, re)) - { - matched_fulltext = false; - } - } - else - { - if (title.find(term) == string::npos) - { - matched_title = false; - } - - if (description.find(term) == string::npos) - { - matched_description = false; - } - - if (fulltext.find(term) == string::npos) - { - matched_fulltext = false; - } - } - } - if (matched_title == true - || matched_description == true - || matched_fulltext == true) - { - result.push_back(entry); - } - } - } - - return result; -} diff --git a/src/search.hpp b/src/search.hpp deleted file mode 100644 index 620e94e..0000000 --- a/src/search.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef REMWHAREAD_SEARCH_HPP -#define REMWHAREAD_SEARCH_HPP - -#include <vector> -#include <string> -#include "sqlite.hpp" - -using std::vector; -using std::string; - -const vector<vector<string>> parse_expression(string expression); -const string to_lowercase(const string &str); - -//! Seach database entries for tags. -const vector<Database::entry> -search_tags(const vector<Database::entry> &entries, string expression, - const bool is_re); - -//! Search tags, title, description and full text. -const vector<Database::entry> -search_all(const vector<Database::entry> &entries, string expression, - const bool is_re); - -#endif // REMWHAREAD_SEARCH_HPP diff --git a/src/sqlite.cpp b/src/sqlite.cpp deleted file mode 100644 index 38b9c9e..0000000 --- a/src/sqlite.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <exception> -#include <iostream> -#include <algorithm> -#include <basedir.h> -#include <sqlite/execute.hpp> -#include <sqlite/query.hpp> -#include "time.hpp" -#include "sqlite.hpp" - -using std::cerr; -using std::endl; - -Database::Database() - : _connected(false) -{ - try - { - xdgHandle xdg; - xdgInitHandle(&xdg); - _dbpath = xdgDataHome(&xdg) / fs::path("remwharead"); - xdgWipeHandle(&xdg); - - if (!fs::exists(_dbpath)) - { - fs::create_directories(_dbpath); - } - _dbpath /= "database.sqlite"; - - _con = std::make_unique<sqlite::connection>(_dbpath); - sqlite::execute(*_con, "CREATE TABLE IF NOT EXISTS remwharead(" - "uri TEXT, archive_uri TEXT, datetime TEXT, tags TEXT, " - "title TEXT, description TEXT, fulltext TEXT);", true); - - _connected = true; - } - catch (std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } -} - -Database::operator bool() const -{ - return _connected; -} - -bool operator ==(const Database::entry &a, const Database::entry &b) -{ - if (a.datetime == b.datetime) - { - return true; - } - - return false; -} - -const string Database::entry::fulltext_oneline() const -{ - string oneline = fulltext; - size_t pos = 0; - while ((pos = oneline.find('\n', pos)) != std::string::npos) - { - oneline.replace(pos, 1, "\\n"); - } - return oneline; -} - -void Database::store(const Database::entry &data) const -{ - try - { - const string strdatetime = timepoint_to_string(data.datetime, true); - string strtags; - for (const string &tag : data.tags) - { - strtags += tag; - if (tag != *(data.tags.rbegin())) - { - strtags += ","; - } - } - - sqlite::execute ins(*_con, "INSERT INTO remwharead " - "VALUES(?, ?, ?, ?, ?, ?, ?);"); - ins % data.uri % data.archive_uri % strdatetime % strtags - % data.title % data.description % data.fulltext; - ins(); - } - catch (std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } -} - -const vector<Database::entry> Database::retrieve(const time_point &start, - const time_point &end) const -{ - try - { - const string query = "SELECT * FROM remwharead WHERE datetime " - "BETWEEN '" + timepoint_to_string(start, true) - + "' AND '" + timepoint_to_string(end, true) - + "' ORDER BY datetime DESC;"; - - sqlite::query q(*_con, query); - sqlite::result_type res = q.get_result(); - vector<entry> entries; - - while(res->next_row()) - { - vector<string> tags; - const string strtags = res->get_string(3); - size_t pos = 0; - while (pos != std::string::npos) - { - const size_t newpos = strtags.find(',', pos); - tags.push_back(strtags.substr(pos, newpos - pos)); - pos = newpos; - if (pos != std::string::npos) - { - ++pos; - } - } - entries.push_back - ({ - res->get_string(0), - res->get_string(1), - string_to_timepoint(res->get_string(2), true), - tags, - res->get_string(4), - res->get_string(5), - res->get_string(6) - }); - } - - return entries; - } - catch (std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } - - return {}; -} diff --git a/src/sqlite.hpp b/src/sqlite.hpp deleted file mode 100644 index 51da00c..0000000 --- a/src/sqlite.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef REMWHAREAD_SQLITE_HPP -#define REMWHAREAD_SQLITE_HPP - -#include <experimental/filesystem> -#include <memory> -#include <string> -#include <vector> -#include <chrono> -#include <sqlite/connection.hpp> -#include "types.hpp" - -namespace fs = std::experimental::filesystem; -using std::string; -using std::vector; -using std::chrono::system_clock; -using time_point = system_clock::time_point; - -class Database -{ -public: - typedef struct entry - { - string uri; - string archive_uri; - time_point datetime; - vector<string> tags; - string title; - string description; - string fulltext; - - //! Returns true if date & time are equal. - friend bool operator ==(const Database::entry &a, - const Database::entry &b); - //! The full text in one line. - const string fulltext_oneline() const; - } entry; - - Database(); - operator bool() const; - - //! Store in database. - void store(const entry &data) const; - - //! retrieve from database. - const vector<entry> retrieve(const time_point &start = time_point(), - const time_point &end = system_clock::now()) - const; - -private: - fs::path _dbpath; - std::unique_ptr<sqlite::connection> _con; - bool _connected; -}; - -#endif // REMWHAREAD_SQLITE_HPP diff --git a/src/time.cpp b/src/time.cpp deleted file mode 100644 index a20ab8c..0000000 --- a/src/time.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <ctime> -#include <iomanip> -#include <sstream> -#include <cstdint> -#include "time.hpp" - -const time_point string_to_timepoint(const string &strtime, bool sqlite) -{ - std::stringstream sstime(strtime); - struct std::tm tm = {}; - tm.tm_isdst = -1; // Detect daylight saving time. - if (sqlite) - { - sstime >> std::get_time(&tm, "%Y-%m-%d %T"); - } - else - { - sstime >> std::get_time(&tm, "%Y-%m-%dT%T"); - } - std::time_t time = timelocal(&tm); // Assume time is local. - return system_clock::from_time_t(time); -} - -const string timepoint_to_string(const time_point &tp, bool sqlite) -{ - constexpr std::uint16_t bufsize = 32; - std::time_t time = system_clock::to_time_t(tp); - std::tm *tm; - tm = std::localtime(&time); - - char buffer[bufsize]; - if (sqlite) - { - std::strftime(buffer, bufsize, "%F %T", tm); - } - else - { - std::strftime(buffer, bufsize, "%FT%T", tm); - } - - return static_cast<const string>(buffer); -} diff --git a/src/uri.cpp b/src/uri.cpp deleted file mode 100644 index ee0b8fe..0000000 --- a/src/uri.cpp +++ /dev/null @@ -1,526 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <sstream> -#include <cstdint> -#include <iostream> -#include <regex> -#include <locale> -#include <codecvt> -#include <curlpp/cURLpp.hpp> -#include <curlpp/Options.hpp> -#include <curlpp/Exception.hpp> -#include <curlpp/Infos.hpp> -#include <version.hpp> -#include "uri.hpp" - -namespace curlopts = curlpp::options; -using std::uint64_t; -using std::cerr; -using std::endl; -using std::regex; -using std::regex_replace; -using std::regex_search; -using std::smatch; -using std::regex_constants::icase; - -URI::URI(const string &uri) - :_uri(uri) -{ -} - -const html_extract URI::get() -{ - try - { - std::ostringstream oss; - curlpp::Easy request; - set_curlpp_options(request); - request.setOpt<curlopts::Url>(_uri); - request.setOpt<curlopts::WriteStream>(&oss); - request.perform(); - - const string answer = oss.str(); - if (answer.empty()) - { - cerr << "Error: Could not download page. Response code: " - << curlpp::infos::ResponseCode::get(request) << endl; - } - else - { - return - { - extract_title(answer), - extract_description(answer), - strip_html(answer) - }; - } - } - catch (const std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } - - return { "", "", "" }; -} - -void URI::set_curlpp_options(curlpp::Easy &request) -{ - request.setOpt<curlopts::UserAgent>(string("remwharead/") - + global::version); - request.setOpt<curlopts::HttpHeader>({ "Connection: close" }); - request.setOpt<curlopts::FollowLocation>(true); -} - -const string URI::extract_title(const string &html) -{ - const regex re_htmlfile("\\.(.?html?|xml|rss)$"); - if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) - { - smatch match; - regex_search(html, match, regex("<title>([^<]+)", icase)); - return remove_newlines(unescape_html(match[1].str())); - } - - return ""; -} - -const string URI::extract_description(const string &html) -{ - const regex re_htmlfile("\\.(.?html?|xml|rss)$"); - if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) - { - smatch match; - const regex re("description\"[^>]+content=\"([^\"]+)", icase); - regex_search(html, match, re); - return remove_newlines(strip_html(match[1].str())); - } - - return ""; -} - -const string URI::strip_html(const string &html) -{ - string out; - - out = remove_html_tags(html, "script"); // Remove JavaScript. - out = remove_html_tags(out, "style"); // Remove CSS. - out = remove_html_tags(out); // Remove tags. - - size_t pos = 0; - while ((pos = out.find("\r", pos)) != std::string::npos) // Remove CR. - { - out.replace(pos, 1, ""); - } - - out = regex_replace(out, regex("\\s+\n"), "\n"); // Remove trailing space. - out = regex_replace(out, regex("\n{2,}"), "\n"); // Reduce newlines. - - return unescape_html(out); -} -const string URI::remove_html_tags(const string &html, const string &tag) -{ - // NOTE: I did this with regex_replace before, but libstdc++ segfaulted. - string out; - if (tag.empty()) - { - size_t pos = 0; - while (pos != std::string::npos) - { - size_t startpos = html.find('<', pos); - size_t endpos = html.find('>', startpos); - out += html.substr(pos, startpos - pos); - pos = endpos; - if (pos != std::string::npos) - { - ++pos; - } - } - } - else - { - size_t pos = 0; - out = html; - while ((pos = out.find("<" + tag)) != std::string::npos) - { - size_t endpos = out.find("</" + tag, pos); - if (endpos == std::string::npos) - { - break; - } - endpos += 3 + tag.length(); // tag + </ + > - out.replace(pos, endpos - pos, ""); - } - } - - return out; -} - -const string URI::unescape_html(const string &html) -{ - string buffer = html; - string output; - - // Used to convert int to utf-8 char. - std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c; - regex re_entity("&#(x)?([[:alnum:]]{1,8});"); - smatch match; - - while (regex_search(buffer, match, re_entity)) - { - char32_t codepoint = 0; - // 'x' in front of the number means it's hexadecimal, else decimal. - if (match[1].length() == 1) - { - codepoint = std::stoi(match[2].str(), nullptr, 16); - } - else - { - codepoint = std::stoi(match[2].str(), nullptr, 10); - } - output += match.prefix().str() + u8c.to_bytes(codepoint); - buffer = match.suffix().str(); - } - output += buffer; - - // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_ - // entity_references#Character_entity_references_in_HTML - const std::array<const std::pair<const string, const char32_t>, 258> names = - {{ - { "exclamation", 0x0021 }, - { "quot", 0x0022 }, - { "percent", 0x0025 }, - { "amp", 0x0026 }, - { "apos", 0x0027 }, - { "add", 0x002B }, - { "lt", 0x003C }, - { "equal", 0x003D }, - { "gt", 0x003E }, - { "nbsp", 0x00A0 }, - { "iexcl", 0x00A1 }, - { "cent", 0x00A2 }, - { "pound", 0x00A3 }, - { "curren", 0x00A4 }, - { "yen", 0x00A5 }, - { "brvbar", 0x00A6 }, - { "sect", 0x00A7 }, - { "uml", 0x00A8 }, - { "copy", 0x00A9 }, - { "ordf", 0x00AA }, - { "laquo", 0x00AB }, - { "not", 0x00AC }, - { "shy", 0x00AD }, - { "reg", 0x00AE }, - { "macr", 0x00AF }, - { "deg", 0x00B0 }, - { "plusmn", 0x00B1 }, - { "sup2", 0x00B2 }, - { "sup3", 0x00B3 }, - { "acute", 0x00B4 }, - { "micro", 0x00B5 }, - { "para", 0x00B6 }, - { "middot", 0x00B7 }, - { "cedil", 0x00B8 }, - { "sup1", 0x00B9 }, - { "ordm", 0x00BA }, - { "raquo", 0x00BB }, - { "frac14", 0x00BC }, - { "frac12", 0x00BD }, - { "frac34", 0x00BE }, - { "iquest", 0x00BF }, - { "Agrave", 0x00C0 }, - { "Aacute", 0x00C1 }, - { "Acirc", 0x00C2 }, - { "Atilde", 0x00C3 }, - { "Auml", 0x00C4 }, - { "Aring", 0x00C5 }, - { "AElig", 0x00C6 }, - { "Ccedil", 0x00C7 }, - { "Egrave", 0x00C8 }, - { "Eacute", 0x00C9 }, - { "Ecirc", 0x00CA }, - { "Euml", 0x00CB }, - { "Igrave", 0x00CC }, - { "Iacute", 0x00CD }, - { "Icirc", 0x00CE }, - { "Iuml", 0x00CF }, - { "ETH", 0x00D0 }, - { "Ntilde", 0x00D1 }, - { "Ograve", 0x00D2 }, - { "Oacute", 0x00D3 }, - { "Ocirc", 0x00D4 }, - { "Otilde", 0x00D5 }, - { "Ouml", 0x00D6 }, - { "times", 0x00D7 }, - { "Oslash", 0x00D8 }, - { "Ugrave", 0x00D9 }, - { "Uacute", 0x00DA }, - { "Ucirc", 0x00DB }, - { "Uuml", 0x00DC }, - { "Yacute", 0x00DD }, - { "THORN", 0x00DE }, - { "szlig", 0x00DF }, - { "agrave", 0x00E0 }, - { "aacute", 0x00E1 }, - { "acirc", 0x00E2 }, - { "atilde", 0x00E3 }, - { "auml", 0x00E4 }, - { "aring", 0x00E5 }, - { "aelig", 0x00E6 }, - { "ccedil", 0x00E7 }, - { "egrave", 0x00E8 }, - { "eacute", 0x00E9 }, - { "ecirc", 0x00EA }, - { "euml", 0x00EB }, - { "igrave", 0x00EC }, - { "iacute", 0x00ED }, - { "icirc", 0x00EE }, - { "iuml", 0x00EF }, - { "eth", 0x00F0 }, - { "ntilde", 0x00F1 }, - { "ograve", 0x00F2 }, - { "oacute", 0x00F3 }, - { "ocirc", 0x00F4 }, - { "otilde", 0x00F5 }, - { "ouml", 0x00F6 }, - { "divide", 0x00F7 }, - { "oslash", 0x00F8 }, - { "ugrave", 0x00F9 }, - { "uacute", 0x00FA }, - { "ucirc", 0x00FB }, - { "uuml", 0x00FC }, - { "yacute", 0x00FD }, - { "thorn", 0x00FE }, - { "yuml", 0x00FF }, - { "OElig", 0x0152 }, - { "oelig", 0x0153 }, - { "Scaron", 0x0160 }, - { "scaron", 0x0161 }, - { "Yuml", 0x0178 }, - { "fnof", 0x0192 }, - { "circ", 0x02C6 }, - { "tilde", 0x02DC }, - { "Alpha", 0x0391 }, - { "Beta", 0x0392 }, - { "Gamma", 0x0393 }, - { "Delta", 0x0394 }, - { "Epsilon", 0x0395 }, - { "Zeta", 0x0396 }, - { "Eta", 0x0397 }, - { "Theta", 0x0398 }, - { "Iota", 0x0399 }, - { "Kappa", 0x039A }, - { "Lambda", 0x039B }, - { "Mu", 0x039C }, - { "Nu", 0x039D }, - { "Xi", 0x039E }, - { "Omicron", 0x039F }, - { "Pi", 0x03A0 }, - { "Rho", 0x03A1 }, - { "Sigma", 0x03A3 }, - { "Tau", 0x03A4 }, - { "Upsilon", 0x03A5 }, - { "Phi", 0x03A6 }, - { "Chi", 0x03A7 }, - { "Psi", 0x03A8 }, - { "Omega", 0x03A9 }, - { "alpha", 0x03B1 }, - { "beta", 0x03B2 }, - { "gamma", 0x03B3 }, - { "delta", 0x03B4 }, - { "epsilon", 0x03B5 }, - { "zeta", 0x03B6 }, - { "eta", 0x03B7 }, - { "theta", 0x03B8 }, - { "iota", 0x03B9 }, - { "kappa", 0x03BA }, - { "lambda", 0x03BB }, - { "mu", 0x03BC }, - { "nu", 0x03BD }, - { "xi", 0x03BE }, - { "omicron", 0x03BF }, - { "pi", 0x03C0 }, - { "rho", 0x03C1 }, - { "sigmaf", 0x03C2 }, - { "sigma", 0x03C3 }, - { "tau", 0x03C4 }, - { "upsilon", 0x03C5 }, - { "phi", 0x03C6 }, - { "chi", 0x03C7 }, - { "psi", 0x03C8 }, - { "omega", 0x03C9 }, - { "thetasym", 0x03D1 }, - { "upsih", 0x03D2 }, - { "piv", 0x03D6 }, - { "ensp", 0x2002 }, - { "emsp", 0x2003 }, - { "thinsp", 0x2009 }, - { "zwnj", 0x200C }, - { "zwj", 0x200D }, - { "lrm", 0x200E }, - { "rlm", 0x200F }, - { "ndash", 0x2013 }, - { "mdash", 0x2014 }, - { "horbar", 0x2015 }, - { "lsquo", 0x2018 }, - { "rsquo", 0x2019 }, - { "sbquo", 0x201A }, - { "ldquo", 0x201C }, - { "rdquo", 0x201D }, - { "bdquo", 0x201E }, - { "dagger", 0x2020 }, - { "Dagger", 0x2021 }, - { "bull", 0x2022 }, - { "hellip", 0x2026 }, - { "permil", 0x2030 }, - { "prime", 0x2032 }, - { "Prime", 0x2033 }, - { "lsaquo", 0x2039 }, - { "rsaquo", 0x203A }, - { "oline", 0x203E }, - { "frasl", 0x2044 }, - { "euro", 0x20AC }, - { "image", 0x2111 }, - { "weierp", 0x2118 }, - { "real", 0x211C }, - { "trade", 0x2122 }, - { "alefsym", 0x2135 }, - { "larr", 0x2190 }, - { "uarr", 0x2191 }, - { "rarr", 0x2192 }, - { "darr", 0x2193 }, - { "harr", 0x2194 }, - { "crarr", 0x21B5 }, - { "lArr", 0x21D0 }, - { "uArr", 0x21D1 }, - { "rArr", 0x21D2 }, - { "dArr", 0x21D3 }, - { "hArr", 0x21D4 }, - { "forall", 0x2200 }, - { "part", 0x2202 }, - { "exist", 0x2203 }, - { "empty", 0x2205 }, - { "nabla", 0x2207 }, - { "isin", 0x2208 }, - { "notin", 0x2209 }, - { "ni", 0x220B }, - { "prod", 0x220F }, - { "sum", 0x2211 }, - { "minus", 0x2212 }, - { "lowast", 0x2217 }, - { "radic", 0x221A }, - { "prop", 0x221D }, - { "infin", 0x221E }, - { "ang", 0x2220 }, - { "and", 0x2227 }, - { "or", 0x2228 }, - { "cap", 0x2229 }, - { "cup", 0x222A }, - { "int", 0x222B }, - { "there4", 0x2234 }, - { "sim", 0x223C }, - { "cong", 0x2245 }, - { "asymp", 0x2248 }, - { "ne", 0x2260 }, - { "equiv", 0x2261 }, - { "le", 0x2264 }, - { "ge", 0x2265 }, - { "sub", 0x2282 }, - { "sup", 0x2283 }, - { "nsub", 0x2284 }, - { "sube", 0x2286 }, - { "supe", 0x2287 }, - { "oplus", 0x2295 }, - { "otimes", 0x2297 }, - { "perp", 0x22A5 }, - { "sdot", 0x22C5 }, - { "lceil", 0x2308 }, - { "rceil", 0x2309 }, - { "lfloor", 0x230A }, - { "rfloor", 0x230B }, - { "lang", 0x2329 }, - { "rang", 0x232A }, - { "loz", 0x25CA }, - { "spades", 0x2660 }, - { "clubs", 0x2663 }, - { "hearts", 0x2665 }, - { "diams", 0x2666 } - }}; - - for (auto &pair : names) - { - const regex re('&' + pair.first + ';'); - output = regex_replace(output, re, u8c.to_bytes(pair.second)); - } - - return output; -} - -const string URI::archive() -{ - if (_uri.substr(0, 4) != "http") - { - return ""; - } - - try - { - std::ostringstream oss; - curlpp::Easy request; - set_curlpp_options(request); - request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri); - request.setOpt<curlopts::WriteStream>(&oss); - request.setOpt<curlopts::NoBody>(true); // Make a HEAD request. - request.setOpt<curlpp::options::Header>(true); // Save headers in oss. - request.perform(); - - smatch match; - const string answer = oss.str(); - if (regex_search(answer, match, regex("Content-Location: (.+)\r\n"))) - { - return "https://web.archive.org" + match[1].str(); - } - else - { - cerr << "Error: Could not archive page. HTTP status: " - << curlpp::infos::ResponseCode::get(request) << endl; - } - } - catch (const std::exception &e) - { - cerr << "Error in " << __func__ << ": " << e.what() << endl; - } - - return ""; -} - -const string URI::remove_newlines(string text) -{ - size_t posn = 0; - while ((posn = text.find('\n', posn)) != std::string::npos) - { - text.replace(posn, 1, " "); - - size_t posr = posn - 1; - if (text[posr] == '\r') - { - text.replace(posr, 1, " "); - } - ++posn; - } - - return text; -} diff --git a/src/uri.hpp b/src/uri.hpp deleted file mode 100644 index 993e0e0..0000000 --- a/src/uri.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* This file is part of remwharead. - * Copyright © 2019 tastytea <tastytea@tastytea.de> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef REMWHAREAD_URI_HPP -#define REMWHAREAD_URI_HPP - -#include <string> -#include <curlpp/Easy.hpp> - -using std::string; - -typedef struct html_extract -{ - string title; - string description; - string fulltext; -} html_extract; - -class URI -{ -public: - explicit URI(const string &uri); - - //! Download URI and extract title, description and full text. - const html_extract get(); - //! Save URI in archive and return URI. - const string archive(); - -protected: - string _uri; - - void set_curlpp_options(curlpp::Easy &request); - const string extract_title(const string &html); - const string extract_description(const string &html); - const string strip_html(const string &html); - //! Remove all HTML tags. If tag is not empty, remove tag and its content. - const string remove_html_tags(const string &html, const string &tag = ""); - const string unescape_html(const string &html); - //! Replace newlines with spaces. - const string remove_newlines(string text); -}; - -#endif // REMWHAREAD_URI_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cd11df0..f730478 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,7 +5,8 @@ find_package(Catch2) if(Catch2_FOUND) # Catch 2.x include(Catch) add_executable(all_tests main.cpp ${sources_tests}) - target_link_libraries(all_tests Catch2::Catch2 ${PROJECT_NAME}_testlib) + target_link_libraries(all_tests + Catch2::Catch2 ${PROJECT_NAME} ${PROJECT_NAME}_testlib) target_include_directories(all_tests PRIVATE "/usr/include/catch2") catch_discover_tests(all_tests EXTRA_ARGS "${EXTRA_TEST_ARGS}") else() # Catch 1.x @@ -14,7 +15,7 @@ else() # Catch 1.x foreach(src ${sources_tests}) get_filename_component(bin ${src} NAME_WE) add_executable(${bin} main.cpp ${src}) - target_link_libraries(${bin} ${PROJECT_NAME}_testlib) + target_link_libraries(${bin} ${PROJECT_NAME} ${PROJECT_NAME}_testlib) add_test(${bin} ${bin} "${EXTRA_TEST_ARGS}") endforeach() else() diff --git a/tests/test_adoc.cpp b/tests/test_adoc.cpp index 586f0cc..e94683c 100644 --- a/tests/test_adoc.cpp +++ b/tests/test_adoc.cpp @@ -22,8 +22,9 @@ #include <catch.hpp> #include "time.hpp" #include "sqlite.hpp" -#include "adoc.hpp" +#include "export/adoc.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::regex; diff --git a/tests/test_bookmarks.cpp b/tests/test_bookmarks.cpp index b63c170..cb60178 100644 --- a/tests/test_bookmarks.cpp +++ b/tests/test_bookmarks.cpp @@ -21,8 +21,9 @@ #include <chrono> #include <catch.hpp> #include "sqlite.hpp" -#include "bookmarks.hpp" +#include "export/bookmarks.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::regex; diff --git a/tests/test_csv.cpp b/tests/test_csv.cpp index 499bdc1..47db8d7 100644 --- a/tests/test_csv.cpp +++ b/tests/test_csv.cpp @@ -22,8 +22,9 @@ #include <catch.hpp> #include "time.hpp" #include "sqlite.hpp" -#include "csv.hpp" +#include "export/csv.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::regex; diff --git a/tests/test_parse_options.cpp b/tests/test_parse_options.cpp index 4571b99..fef4d4e 100644 --- a/tests/test_parse_options.cpp +++ b/tests/test_parse_options.cpp @@ -20,6 +20,7 @@ #include <catch.hpp> #include "parse_options.hpp" +using namespace remwharead; using std::string; using std::vector; @@ -90,7 +91,7 @@ SCENARIO ("The option parser works correctly") THEN ("No exception is thrown") AND_THEN ("status code is 0") - AND_THEN ("options.file is empty") + AND_THEN ("Tag and URI are right") { REQUIRE_FALSE(exception); REQUIRE(opts.status_code == 0); @@ -126,7 +127,7 @@ SCENARIO ("The option parser works correctly") THEN ("No exception is thrown") AND_THEN ("status code is 0") - AND_THEN ("options.file is empty") + AND_THEN ("Tag and URI are right") { REQUIRE_FALSE(exception); REQUIRE(opts.status_code == 0); diff --git a/tests/test_search.cpp b/tests/test_search.cpp index 564ba7f..bda47d2 100644 --- a/tests/test_search.cpp +++ b/tests/test_search.cpp @@ -22,6 +22,7 @@ #include "sqlite.hpp" #include "search.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::vector; diff --git a/tests/test_simple.cpp b/tests/test_simple.cpp index 52b7ef8..47c3c69 100644 --- a/tests/test_simple.cpp +++ b/tests/test_simple.cpp @@ -21,8 +21,9 @@ #include <chrono> #include <catch.hpp> #include "sqlite.hpp" -#include "simple.hpp" +#include "export/simple.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::regex; diff --git a/tests/test_time.cpp b/tests/test_time.cpp index d871658..5329e95 100644 --- a/tests/test_time.cpp +++ b/tests/test_time.cpp @@ -20,6 +20,7 @@ #include <catch.hpp> #include "time.hpp" +using namespace remwharead; using std::string; using std::chrono::system_clock; using std::chrono::seconds; diff --git a/tests/test_unicode.cpp b/tests/test_unicode.cpp index a2bca6a..f9d7676 100644 --- a/tests/test_unicode.cpp +++ b/tests/test_unicode.cpp @@ -17,6 +17,7 @@ #include <catch.hpp> #include "search.hpp" +using namespace remwharead; SCENARIO ("Unicode is handled correctly") { diff --git a/tests/test_uri.cpp b/tests/test_uri.cpp index 7f1a9ab..34f403f 100644 --- a/tests/test_uri.cpp +++ b/tests/test_uri.cpp @@ -19,6 +19,7 @@ #include <catch.hpp> #include "uri.hpp" +using namespace remwharead; using std::string; SCENARIO ("URI works correctly")