Merge branch 'develop' into main
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
This commit is contained in:
commit
1e0a7446e0
|
@ -17,7 +17,7 @@ steps:
|
||||||
image: plugins/download
|
image: plugins/download
|
||||||
settings:
|
settings:
|
||||||
source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
||||||
destination: src/popl.hpp
|
destination: src/cli/popl.hpp
|
||||||
|
|
||||||
- name: gcc6
|
- name: gcc6
|
||||||
image: debian:stretch-slim
|
image: debian:stretch-slim
|
||||||
|
@ -186,7 +186,7 @@ steps:
|
||||||
image: plugins/download
|
image: plugins/download
|
||||||
settings:
|
settings:
|
||||||
source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
source: https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
||||||
destination: src/popl.hpp
|
destination: src/cli/popl.hpp
|
||||||
|
|
||||||
- name: deb
|
- name: deb
|
||||||
image: debian:stretch-slim
|
image: debian:stretch-slim
|
||||||
|
|
|
@ -1,2 +1,4 @@
|
||||||
/build/
|
/build/
|
||||||
|
/doc/
|
||||||
|
/update_doc.sh
|
||||||
*.xpi
|
*.xpi
|
||||||
|
|
|
@ -9,15 +9,6 @@ set(WITH_TESTS "NO" CACHE STRING "WITH_TESTS defaults to \"NO\"")
|
||||||
set(WITH_MOZILLA "NO" CACHE STRING "WITH_MOZILLA defaults to \"NO\"")
|
set(WITH_MOZILLA "NO" CACHE STRING "WITH_MOZILLA defaults to \"NO\"")
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
find_package(PkgConfig REQUIRED)
|
|
||||||
pkg_check_modules(LIBXDG_BASEDIR REQUIRED libxdg-basedir)
|
|
||||||
# sqlite3 is not a direct dependency, but vsqlite++ has no cmake- or pkg-config
|
|
||||||
# module. Since it installs in the same directories as sqlite3, I am adding the
|
|
||||||
# module here to add the include- and link directories below. It is not REQUIRED
|
|
||||||
# because the sqlite3 in Debian jessie doesn't come with a pkg-config module.
|
|
||||||
pkg_check_modules(SQLITE3 sqlite3)
|
|
||||||
pkg_check_modules(CURLPP REQUIRED curlpp)
|
|
||||||
find_package(ICU COMPONENTS uc i18n REQUIRED)
|
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
@ -27,33 +18,17 @@ set(CMAKE_CXX_FLAGS_DEBUG
|
||||||
"${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -ftrapv \
|
"${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -ftrapv \
|
||||||
-fsanitize=undefined -g -Og -fno-omit-frame-pointer")
|
-fsanitize=undefined -g -Og -fno-omit-frame-pointer")
|
||||||
|
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/src)
|
include_directories(${PROJECT_SOURCE_DIR}/src/lib)
|
||||||
Include_directories(${PROJECT_BINARY_DIR})
|
Include_directories(${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
include_directories(${LIBXDG_BASEDIR_INCLUDE_DIRS})
|
|
||||||
include_directories(${SQLITE3_INCLUDE_DIRS})
|
|
||||||
include_directories(${CURLPP_INCLUDE_DIRS})
|
|
||||||
include_directories(${ICU_INCLUDE_DIRS})
|
|
||||||
|
|
||||||
link_directories(${LIBXDG_BASEDIR_LIBRARY_DIRS})
|
|
||||||
link_directories(${SQLITE3_LIBRARY_DIRS})
|
|
||||||
link_directories(${CURLPP_LIBRARY_DIRS})
|
|
||||||
link_directories(${ICU_LIBRARY_DIRS})
|
|
||||||
|
|
||||||
set(COMMON_LIBRARIES
|
|
||||||
${LIBXDG_BASEDIR_LIBRARIES} vsqlitepp stdc++fs ${CURLPP_LIBRARIES}
|
|
||||||
${ICU_LIBRARIES})
|
|
||||||
|
|
||||||
# Write version in header
|
# Write version in header
|
||||||
configure_file(
|
configure_file(
|
||||||
"${PROJECT_SOURCE_DIR}/src/version.hpp.in"
|
"${PROJECT_SOURCE_DIR}/src/version.hpp.in"
|
||||||
"${PROJECT_BINARY_DIR}/version.hpp"
|
"${PROJECT_BINARY_DIR}/version.hpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
file(GLOB sources src/*.cpp)
|
add_subdirectory(src/lib)
|
||||||
add_executable(${PROJECT_NAME} "${sources}")
|
add_subdirectory(src/cli)
|
||||||
target_link_libraries(${PROJECT_NAME} ${COMMON_LIBRARIES})
|
|
||||||
install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
||||||
|
|
||||||
if (WITH_MAN)
|
if (WITH_MAN)
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
|
@ -63,6 +38,7 @@ if (WITH_MAN)
|
||||||
COMMAND ${CMAKE_SOURCE_DIR}/build_manpage.sh
|
COMMAND ${CMAKE_SOURCE_DIR}/build_manpage.sh
|
||||||
ARGS ${PROJECT_VERSION})
|
ARGS ${PROJECT_VERSION})
|
||||||
add_custom_target(man ALL DEPENDS "${PROJECT_BINARY_DIR}/${PROJECT_NAME}.1")
|
add_custom_target(man ALL DEPENDS "${PROJECT_BINARY_DIR}/${PROJECT_NAME}.1")
|
||||||
|
|
||||||
install(
|
install(
|
||||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.1
|
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.1
|
||||||
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
||||||
|
@ -73,8 +49,8 @@ if (WITH_MOZILLA)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_TESTS)
|
if(WITH_TESTS)
|
||||||
add_library(${PROJECT_NAME}_testlib SHARED ${sources})
|
include_directories(${PROJECT_SOURCE_DIR}/src/cli)
|
||||||
target_link_libraries(${PROJECT_NAME}_testlib ${COMMON_LIBRARIES})
|
add_library(${PROJECT_NAME}_testlib SHARED src/cli/parse_options.cpp)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
# -*- mode: conf-unix -*-
|
||||||
|
PROJECT_NAME = "remwharead"
|
||||||
|
PROJECT_NUMBER = 0.0.0
|
||||||
|
INPUT = src/lib/ src/lib/export/
|
||||||
|
# EXAMPLE_PATH = examples/
|
||||||
|
EXAMPLE_RECURSIVE = YES
|
||||||
|
GENERATE_HTML = YES
|
||||||
|
HTML_OUTPUT = doc/html
|
||||||
|
GENERATE_LATEX = NO
|
||||||
|
ALLOW_UNICODE_NAMES = YES
|
||||||
|
BRIEF_MEMBER_DESC = YES
|
||||||
|
REPEAT_BRIEF = YES
|
||||||
|
ALWAYS_DETAILED_SEC = YES
|
||||||
|
INLINE_INHERITED_MEMB = NO
|
||||||
|
INHERIT_DOCS = YES
|
||||||
|
SEPARATE_MEMBER_PAGES = NO
|
||||||
|
TAB_SIZE = 4
|
||||||
|
MARKDOWN_SUPPORT = YES
|
||||||
|
AUTOLINK_SUPPORT = YES
|
||||||
|
INLINE_SIMPLE_STRUCTS = NO
|
||||||
|
QUIET = NO
|
||||||
|
WARNINGS = YES
|
||||||
|
BUILTIN_STL_SUPPORT = YES
|
||||||
|
VERBATIM_HEADERS = YES
|
||||||
|
INLINE_SOURCES = YES
|
||||||
|
SEARCHENGINE = YES
|
||||||
|
SHOW_FILES = YES
|
11
README.adoc
11
README.adoc
|
@ -1,4 +1,5 @@
|
||||||
= remwharead
|
= remwharead
|
||||||
|
:toc: preamble
|
||||||
|
|
||||||
*remwharead* saves URIs of things you want to remember in a database along with
|
*remwharead* saves URIs of things you want to remember in a database along with
|
||||||
an URI to the archived version, the current date and time, title, description,
|
an URI to the archived version, the current date and time, title, description,
|
||||||
|
@ -18,7 +19,13 @@ image::https://doc.schlomp.space/.remwharead/example_tags.png[Tags view, width=4
|
||||||
|
|
||||||
== Usage
|
== Usage
|
||||||
|
|
||||||
See https://schlomp.space/tastytea/remwharead/src/branch/main/remwharead.1.adoc[manpage].
|
See
|
||||||
|
https://schlomp.space/tastytea/remwharead/src/branch/main/remwharead.1.adoc[manpage].
|
||||||
|
|
||||||
|
=== In your programs
|
||||||
|
|
||||||
|
The complete functionality is implemented in a C++ library, libremwharead. Take
|
||||||
|
a look at the https://doc.schlomp.space/remwharead/[reference] for more info.
|
||||||
|
|
||||||
== Install
|
== Install
|
||||||
|
|
||||||
|
@ -73,7 +80,7 @@ apt-get install g++-6 cmake pkg-config libcurl4-openssl-dev libxdg-basedir-dev \
|
||||||
libvsqlitepp-dev libboost-system-dev libboost-filesystem-dev libicu-dev asciidoc
|
libvsqlitepp-dev libboost-system-dev libboost-filesystem-dev libicu-dev asciidoc
|
||||||
apt-get install -t sid libcurlpp-dev
|
apt-get install -t sid libcurlpp-dev
|
||||||
# Inside the source directory:
|
# Inside the source directory:
|
||||||
wget -O src/popl.hpp https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
wget -O src/cli/popl.hpp https://raw.githubusercontent.com/badaix/popl/v1.2.0/include/popl.hpp
|
||||||
export CXX="g++-6"
|
export CXX="g++-6"
|
||||||
----
|
----
|
||||||
====
|
====
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [[ -f Doxyfile ]]; then
|
||||||
|
mkdir -p doc
|
||||||
|
(doxygen -s -g - && cat Doxyfile && echo -n "PROJECT_NUMBER = " &&
|
||||||
|
grep -Eo '[0-9]+.[0-9]+.[0-9]+$' CMakeLists.txt) | doxygen -
|
||||||
|
fi
|
251
src/adoc.cpp
251
src/adoc.cpp
|
@ -1,251 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <string>
|
|
||||||
#include <regex>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <utility>
|
|
||||||
#include <locale>
|
|
||||||
#include <curlpp/cURLpp.hpp>
|
|
||||||
#include "version.hpp"
|
|
||||||
#include "time.hpp"
|
|
||||||
#include "adoc.hpp"
|
|
||||||
|
|
||||||
using std::string;
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
using std::regex;
|
|
||||||
using std::regex_replace;
|
|
||||||
using tagpair = std::pair<string,vector<Database::entry>>;
|
|
||||||
|
|
||||||
void Export::AsciiDoc::print() const
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
_out << "= Visited things\n"
|
|
||||||
<< ":Author: remwharead " << global::version << endl
|
|
||||||
<< ":Date: "
|
|
||||||
<< timepoint_to_string(system_clock::now()) << endl
|
|
||||||
<< ":TOC: right\n"
|
|
||||||
<< ":TOCLevels: 2\n"
|
|
||||||
<< ":!webfonts:\n\n";
|
|
||||||
|
|
||||||
tagmap alltags;
|
|
||||||
string day;
|
|
||||||
for (const Database::entry &entry : _entries)
|
|
||||||
{
|
|
||||||
const string newday = get_day(entry);
|
|
||||||
|
|
||||||
if (newday != day)
|
|
||||||
{
|
|
||||||
day = newday;
|
|
||||||
_out << "== " << day << endl << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
_out << "[[dt_" << timepoint_to_string(entry.datetime) << "]]\n";
|
|
||||||
_out << "* link:" << replace_in_uri(entry.uri);
|
|
||||||
if (!entry.title.empty())
|
|
||||||
{
|
|
||||||
_out << '[' << replace_in_title(entry.title) << ']';
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_out << "[]";
|
|
||||||
}
|
|
||||||
_out << " +" << endl;
|
|
||||||
|
|
||||||
_out << '_' << get_time(entry).substr(0, 5) << '_';
|
|
||||||
if (!entry.archive_uri.empty())
|
|
||||||
{
|
|
||||||
_out << " (link:" << replace_in_uri(entry.archive_uri)
|
|
||||||
<< "[archived version])";
|
|
||||||
}
|
|
||||||
|
|
||||||
bool separator = false;
|
|
||||||
for (const string &tag : entry.tags)
|
|
||||||
{
|
|
||||||
if (tag.empty())
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!separator)
|
|
||||||
{
|
|
||||||
_out << "\n| ";
|
|
||||||
separator = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto globaltag = alltags.find(tag);
|
|
||||||
if (globaltag != alltags.end())
|
|
||||||
{
|
|
||||||
globaltag->second.push_back(entry);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
alltags.insert({ tag, { entry } });
|
|
||||||
}
|
|
||||||
|
|
||||||
_out << "xref:t_" << replace_in_tag(tag) << "[" << tag << ']';
|
|
||||||
if (tag != *(entry.tags.rbegin()))
|
|
||||||
{
|
|
||||||
_out << ", ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!entry.description.empty())
|
|
||||||
{
|
|
||||||
_out << " +" << endl << entry.description;
|
|
||||||
}
|
|
||||||
_out << endl << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!alltags.empty())
|
|
||||||
{
|
|
||||||
print_tags(alltags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Export::AsciiDoc::replace(string text,
|
|
||||||
const replacemap &replacements) const
|
|
||||||
{
|
|
||||||
for (const std::pair<const string, const string> &sr : replacements)
|
|
||||||
{
|
|
||||||
size_t pos = 0;
|
|
||||||
while ((pos = text.find(sr.first, pos)) != std::string::npos)
|
|
||||||
{
|
|
||||||
text.replace(pos, sr.first.length(), sr.second);
|
|
||||||
pos += sr.second.length();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
const string Export::AsciiDoc::replace_in_tag(const string &text) const
|
|
||||||
{
|
|
||||||
// TODO: Find a better solution.
|
|
||||||
const replacemap replacements =
|
|
||||||
{
|
|
||||||
{ " ", "-" }, { "§", "-" },
|
|
||||||
{ "$", "-" }, { "%", "-" },
|
|
||||||
{ "&", "-" }, { "/", "-" },
|
|
||||||
{ "=", "-" }, { "^", "-" },
|
|
||||||
{ "!", "-" }, { "?", "-" },
|
|
||||||
{ "'", "-" }, { "\"", "-" },
|
|
||||||
{ "´", "-" }, { "`", "-" },
|
|
||||||
{ "’", "-" }, { "#", "-" },
|
|
||||||
{ "₀", "0" }, { "⁰", "0" },
|
|
||||||
{ "₁", "1" }, { "¹", "1" },
|
|
||||||
{ "₂", "2" }, { "²", "2" },
|
|
||||||
{ "₃", "3" }, { "³", "3" },
|
|
||||||
{ "₄", "4" }, { "⁴", "4" },
|
|
||||||
{ "₅", "5" }, { "⁵", "5" },
|
|
||||||
{ "₆", "6" }, { "⁶", "6" },
|
|
||||||
{ "₇", "7" }, { "⁷", "7" },
|
|
||||||
{ "₈", "8" }, { "⁸", "8" },
|
|
||||||
{ "₉", "9" }, { "⁹", "9" }
|
|
||||||
};
|
|
||||||
|
|
||||||
return replace(text, replacements);
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Export::AsciiDoc::replace_in_title(const string &text) const
|
|
||||||
{
|
|
||||||
// [ is implicitly escaped if the corresponding ] is.
|
|
||||||
return replace(text, {{ "]", "\\]" }});
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Export::AsciiDoc::replace_in_uri(const string &text) const
|
|
||||||
{
|
|
||||||
return replace(text,
|
|
||||||
{
|
|
||||||
{ "[", "%5B" }, { "]", "%5D" }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void Export::AsciiDoc::print_tags(const tagmap &tags) const
|
|
||||||
{
|
|
||||||
_out << "== Tags\n\n";
|
|
||||||
vector<tagpair> sortedtags(tags.size());
|
|
||||||
std::move(tags.begin(), tags.end(), sortedtags.begin());
|
|
||||||
std::sort(sortedtags.begin(), sortedtags.end(),
|
|
||||||
[](const tagpair &a, tagpair &b)
|
|
||||||
{
|
|
||||||
if (a.second.size() != b.second.size())
|
|
||||||
{ // Sort by number of occurrences if they are different.
|
|
||||||
return a.second.size() > b.second.size();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ // Sort by tag names otherwise.
|
|
||||||
std::locale loc;
|
|
||||||
const std::collate<char> &coll =
|
|
||||||
std::use_facet<std::collate<char>>(loc);
|
|
||||||
return (coll.compare(
|
|
||||||
a.first.data(), a.first.data()
|
|
||||||
+ a.first.length(),
|
|
||||||
b.first.data(), b.first.data()
|
|
||||||
+ b.first.length()) == -1);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
bool othertags = false; // Have we printed “Less used tags” already?
|
|
||||||
for (const auto &tag : sortedtags)
|
|
||||||
{
|
|
||||||
// If we have more than 20 tags, group all tags that occur only 1 time
|
|
||||||
// under the section “Less used tags”.
|
|
||||||
if (sortedtags.size() > 20 && tag.second.size() == 1)
|
|
||||||
{
|
|
||||||
if (!othertags)
|
|
||||||
{
|
|
||||||
_out << "=== Less used tags\n\n";
|
|
||||||
othertags = true;
|
|
||||||
}
|
|
||||||
_out << "=";
|
|
||||||
}
|
|
||||||
|
|
||||||
_out << "=== [[t_" << replace_in_tag(tag.first) << "]]"
|
|
||||||
<< tag.first << endl;
|
|
||||||
for (const Database::entry &entry : tag.second)
|
|
||||||
{
|
|
||||||
const string datetime = timepoint_to_string(entry.datetime);
|
|
||||||
const string date = datetime.substr(0, datetime.find('T'));
|
|
||||||
string title = replace_in_title(entry.title);
|
|
||||||
if (title.empty())
|
|
||||||
{
|
|
||||||
title = "++" + entry.uri + "++";
|
|
||||||
}
|
|
||||||
_out << endl << "* xref:dt_" << datetime
|
|
||||||
<< '[' << title << "] _(" << date << ")_" << endl;
|
|
||||||
}
|
|
||||||
_out << endl;
|
|
||||||
}
|
|
||||||
_out << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Export::AsciiDoc::get_day(const Database::entry &entry) const
|
|
||||||
{
|
|
||||||
const string datetime = timepoint_to_string(entry.datetime);
|
|
||||||
return datetime.substr(0, datetime.find('T'));
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Export::AsciiDoc::get_time(const Database::entry &entry) const
|
|
||||||
{
|
|
||||||
const string datetime = timepoint_to_string(entry.datetime);
|
|
||||||
return datetime.substr(datetime.find('T') + 1);
|
|
||||||
}
|
|
|
@ -1,56 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <string>
|
|
||||||
#include "sqlite.hpp"
|
|
||||||
#include "bookmarks.hpp"
|
|
||||||
|
|
||||||
using std::chrono::system_clock;
|
|
||||||
using std::chrono::duration_cast;
|
|
||||||
using std::chrono::seconds;
|
|
||||||
using std::string;
|
|
||||||
|
|
||||||
void Export::Bookmarks::print() const
|
|
||||||
{
|
|
||||||
_out << "<!DOCTYPE NETSCAPE-Bookmark-file-1>\n"
|
|
||||||
"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; "
|
|
||||||
"charset=UTF-8\">\n"
|
|
||||||
"<TITLE>Bookmarks from remwharead</TITLE>\n"
|
|
||||||
"<H1>Bookmarks from remwharead<H1>\n\n"
|
|
||||||
"<DL><p>\n"
|
|
||||||
"<DT><H3>remwharead</H3>\n"
|
|
||||||
"<DL><p>\n";
|
|
||||||
|
|
||||||
for (const Database::entry & entry : _entries)
|
|
||||||
{
|
|
||||||
string title = entry.title;
|
|
||||||
if (title.empty())
|
|
||||||
{
|
|
||||||
title = entry.uri;
|
|
||||||
}
|
|
||||||
system_clock::time_point tp = entry.datetime;
|
|
||||||
system_clock::duration duration = tp.time_since_epoch();
|
|
||||||
string time_seconds =
|
|
||||||
std::to_string(duration_cast<seconds>(duration).count());
|
|
||||||
|
|
||||||
_out << "<DT><A HREF=\"" << entry.uri << "\" "
|
|
||||||
<< "ADD_DATE=\"" << time_seconds << "\">"
|
|
||||||
<< title << "</A>\n";
|
|
||||||
}
|
|
||||||
_out << "</DL><p>\n"
|
|
||||||
<< "</DL><p>\n";
|
|
||||||
}
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/src/cli)
|
||||||
|
|
||||||
|
file(GLOB sources_cli *.cpp)
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME}-cli ${sources_cli})
|
||||||
|
target_link_libraries(${PROJECT_NAME}-cli ${PROJECT_NAME})
|
||||||
|
set_target_properties(${PROJECT_NAME}-cli PROPERTIES OUTPUT_NAME ${PROJECT_NAME})
|
||||||
|
|
||||||
|
install(TARGETS ${PROJECT_NAME}-cli DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
@ -24,12 +24,13 @@
|
||||||
#include "parse_options.hpp"
|
#include "parse_options.hpp"
|
||||||
#include "uri.hpp"
|
#include "uri.hpp"
|
||||||
#include "types.hpp"
|
#include "types.hpp"
|
||||||
#include "csv.hpp"
|
#include "export/csv.hpp"
|
||||||
#include "adoc.hpp"
|
#include "export/adoc.hpp"
|
||||||
#include "bookmarks.hpp"
|
#include "export/bookmarks.hpp"
|
||||||
#include "simple.hpp"
|
#include "export/simple.hpp"
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::cerr;
|
using std::cerr;
|
||||||
using std::endl;
|
using std::endl;
|
|
@ -25,6 +25,7 @@
|
||||||
#include "types.hpp"
|
#include "types.hpp"
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::array;
|
using std::array;
|
|
@ -0,0 +1,37 @@
|
||||||
|
find_package(PkgConfig REQUIRED)
|
||||||
|
pkg_check_modules(LIBXDG_BASEDIR REQUIRED libxdg-basedir)
|
||||||
|
# sqlite3 is not a direct dependency, but vsqlite++ has no cmake- or pkg-config
|
||||||
|
# module. Since it installs in the same directories as sqlite3, I am adding the
|
||||||
|
# module here to add the include- and link directories below. It is not REQUIRED
|
||||||
|
# because the sqlite3 in Debian jessie doesn't come with a pkg-config module.
|
||||||
|
pkg_check_modules(SQLITE3 sqlite3)
|
||||||
|
pkg_check_modules(CURLPP REQUIRED curlpp)
|
||||||
|
find_package(ICU COMPONENTS uc i18n REQUIRED)
|
||||||
|
|
||||||
|
include_directories(${LIBXDG_BASEDIR_INCLUDE_DIRS})
|
||||||
|
include_directories(${SQLITE3_INCLUDE_DIRS})
|
||||||
|
include_directories(${CURLPP_INCLUDE_DIRS})
|
||||||
|
include_directories(${ICU_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
link_directories(${LIBXDG_BASEDIR_LIBRARY_DIRS})
|
||||||
|
link_directories(${SQLITE3_LIBRARY_DIRS})
|
||||||
|
link_directories(${CURLPP_LIBRARY_DIRS})
|
||||||
|
link_directories(${ICU_LIBRARY_DIRS})
|
||||||
|
|
||||||
|
file(GLOB sources_lib *.cpp export/*.cpp)
|
||||||
|
file(GLOB headers_lib *.hpp)
|
||||||
|
file(GLOB headers_lib_export export/*.hpp)
|
||||||
|
|
||||||
|
add_library(${PROJECT_NAME} SHARED ${sources_lib})
|
||||||
|
target_link_libraries(${PROJECT_NAME} ${LIBXDG_BASEDIR_LIBRARIES}
|
||||||
|
vsqlitepp stdc++fs ${CURLPP_LIBRARIES} ${ICU_LIBRARIES})
|
||||||
|
set_target_properties(${PROJECT_NAME} PROPERTIES
|
||||||
|
VERSION ${PROJECT_VERSION}
|
||||||
|
SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR}
|
||||||
|
)
|
||||||
|
|
||||||
|
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||||
|
install(FILES ${headers_lib}
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
|
||||||
|
install(FILES ${headers_lib_export}
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/export)
|
|
@ -0,0 +1,255 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <regex>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
#include <locale>
|
||||||
|
#include <curlpp/cURLpp.hpp>
|
||||||
|
#include "version.hpp"
|
||||||
|
#include "time.hpp"
|
||||||
|
#include "adoc.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::string;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
using std::regex;
|
||||||
|
using std::regex_replace;
|
||||||
|
using tagpair = std::pair<string,vector<Database::entry>>;
|
||||||
|
|
||||||
|
void Export::AsciiDoc::print() const
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_out << "= Visited things\n"
|
||||||
|
<< ":Author: remwharead " << global::version << endl
|
||||||
|
<< ":Date: "
|
||||||
|
<< timepoint_to_string(system_clock::now()) << endl
|
||||||
|
<< ":TOC: right\n"
|
||||||
|
<< ":TOCLevels: 2\n"
|
||||||
|
<< ":!webfonts:\n\n";
|
||||||
|
|
||||||
|
tagmap alltags;
|
||||||
|
string day;
|
||||||
|
for (const Database::entry &entry : _entries)
|
||||||
|
{
|
||||||
|
const string newday = get_day(entry);
|
||||||
|
|
||||||
|
if (newday != day)
|
||||||
|
{
|
||||||
|
day = newday;
|
||||||
|
_out << "== " << day << endl << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
_out << "[[dt_" << timepoint_to_string(entry.datetime)
|
||||||
|
<< "]]\n" << "* link:" << replace_in_uri(entry.uri);
|
||||||
|
if (!entry.title.empty())
|
||||||
|
{
|
||||||
|
_out << '[' << replace_in_title(entry.title) << ']';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_out << "[]";
|
||||||
|
}
|
||||||
|
_out << " +" << endl;
|
||||||
|
|
||||||
|
_out << '_' << get_time(entry).substr(0, 5) << '_';
|
||||||
|
if (!entry.archive_uri.empty())
|
||||||
|
{
|
||||||
|
_out << " (link:" << replace_in_uri(entry.archive_uri)
|
||||||
|
<< "[archived version])";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool separator = false;
|
||||||
|
for (const string &tag : entry.tags)
|
||||||
|
{
|
||||||
|
if (tag.empty())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!separator)
|
||||||
|
{
|
||||||
|
_out << "\n| ";
|
||||||
|
separator = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto globaltag = alltags.find(tag);
|
||||||
|
if (globaltag != alltags.end())
|
||||||
|
{
|
||||||
|
globaltag->second.push_back(entry);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
alltags.insert({ tag, { entry } });
|
||||||
|
}
|
||||||
|
|
||||||
|
_out << "xref:t_" << replace_in_tag(tag)
|
||||||
|
<< "[" << tag << ']';
|
||||||
|
if (tag != *(entry.tags.rbegin()))
|
||||||
|
{
|
||||||
|
_out << ", ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!entry.description.empty())
|
||||||
|
{
|
||||||
|
_out << " +" << endl << entry.description;
|
||||||
|
}
|
||||||
|
_out << endl << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!alltags.empty())
|
||||||
|
{
|
||||||
|
print_tags(alltags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Export::AsciiDoc::replace(string text,
|
||||||
|
const replacemap &replacements) const
|
||||||
|
{
|
||||||
|
for (const std::pair<const string, const string> &sr : replacements)
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
while ((pos = text.find(sr.first, pos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
text.replace(pos, sr.first.length(), sr.second);
|
||||||
|
pos += sr.second.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
const string Export::AsciiDoc::replace_in_tag(const string &text) const
|
||||||
|
{
|
||||||
|
// TODO: Find a better solution.
|
||||||
|
const replacemap replacements =
|
||||||
|
{
|
||||||
|
{ " ", "-" }, { "§", "-" },
|
||||||
|
{ "$", "-" }, { "%", "-" },
|
||||||
|
{ "&", "-" }, { "/", "-" },
|
||||||
|
{ "=", "-" }, { "^", "-" },
|
||||||
|
{ "!", "-" }, { "?", "-" },
|
||||||
|
{ "'", "-" }, { "\"", "-" },
|
||||||
|
{ "´", "-" }, { "`", "-" },
|
||||||
|
{ "’", "-" }, { "#", "-" },
|
||||||
|
{ "₀", "0" }, { "⁰", "0" },
|
||||||
|
{ "₁", "1" }, { "¹", "1" },
|
||||||
|
{ "₂", "2" }, { "²", "2" },
|
||||||
|
{ "₃", "3" }, { "³", "3" },
|
||||||
|
{ "₄", "4" }, { "⁴", "4" },
|
||||||
|
{ "₅", "5" }, { "⁵", "5" },
|
||||||
|
{ "₆", "6" }, { "⁶", "6" },
|
||||||
|
{ "₇", "7" }, { "⁷", "7" },
|
||||||
|
{ "₈", "8" }, { "⁸", "8" },
|
||||||
|
{ "₉", "9" }, { "⁹", "9" }
|
||||||
|
};
|
||||||
|
|
||||||
|
return replace(text, replacements);
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Export::AsciiDoc::replace_in_title(const string &text) const
|
||||||
|
{
|
||||||
|
// [ is implicitly escaped if the corresponding ] is.
|
||||||
|
return replace(text, {{ "]", "\\]" }});
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Export::AsciiDoc::replace_in_uri(const string &text) const
|
||||||
|
{
|
||||||
|
return replace(text,
|
||||||
|
{
|
||||||
|
{ "[", "%5B" }, { "]", "%5D" }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void Export::AsciiDoc::print_tags(const tagmap &tags) const
|
||||||
|
{
|
||||||
|
_out << "== Tags\n\n";
|
||||||
|
vector<tagpair> sortedtags(tags.size());
|
||||||
|
std::move(tags.begin(), tags.end(), sortedtags.begin());
|
||||||
|
std::sort(sortedtags.begin(), sortedtags.end(),
|
||||||
|
[](const tagpair &a, tagpair &b)
|
||||||
|
{
|
||||||
|
if (a.second.size() != b.second.size())
|
||||||
|
{ // Sort by number of occurrences if they are different.
|
||||||
|
return a.second.size() > b.second.size();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // Sort by tag names otherwise.
|
||||||
|
std::locale loc;
|
||||||
|
const std::collate<char> &coll =
|
||||||
|
std::use_facet<std::collate<char>>(loc);
|
||||||
|
return (coll.compare(
|
||||||
|
a.first.data(), a.first.data()
|
||||||
|
+ a.first.length(),
|
||||||
|
b.first.data(), b.first.data()
|
||||||
|
+ b.first.length()) == -1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
bool othertags = false; // Have we printed “Less used tags” already?
|
||||||
|
for (const auto &tag : sortedtags)
|
||||||
|
{
|
||||||
|
// If we have more than 20 tags, group all tags that occur only 1
|
||||||
|
// time under the section “Less used tags”.
|
||||||
|
if (sortedtags.size() > 20 && tag.second.size() == 1)
|
||||||
|
{
|
||||||
|
if (!othertags)
|
||||||
|
{
|
||||||
|
_out << "=== Less used tags\n\n";
|
||||||
|
othertags = true;
|
||||||
|
}
|
||||||
|
_out << "=";
|
||||||
|
}
|
||||||
|
|
||||||
|
_out << "=== [[t_" << replace_in_tag(tag.first) << "]]"
|
||||||
|
<< tag.first << endl;
|
||||||
|
for (const Database::entry &entry : tag.second)
|
||||||
|
{
|
||||||
|
const string datetime = timepoint_to_string(entry.datetime);
|
||||||
|
const string date = datetime.substr(0, datetime.find('T'));
|
||||||
|
string title = replace_in_title(entry.title);
|
||||||
|
if (title.empty())
|
||||||
|
{
|
||||||
|
title = "++" + entry.uri + "++";
|
||||||
|
}
|
||||||
|
_out << endl << "* xref:dt_" << datetime
|
||||||
|
<< '[' << title << "] _(" << date << ")_" << endl;
|
||||||
|
}
|
||||||
|
_out << endl;
|
||||||
|
}
|
||||||
|
_out << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Export::AsciiDoc::get_day(const Database::entry &entry) const
|
||||||
|
{
|
||||||
|
const string datetime = timepoint_to_string(entry.datetime);
|
||||||
|
return datetime.substr(0, datetime.find('T'));
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Export::AsciiDoc::get_time(const Database::entry &entry) const
|
||||||
|
{
|
||||||
|
const string datetime = timepoint_to_string(entry.datetime);
|
||||||
|
return datetime.substr(datetime.find('T') + 1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,10 +23,13 @@
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "export.hpp"
|
#include "export.hpp"
|
||||||
|
|
||||||
using std::string;
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
//! Export as %AsciiDoc document.
|
||||||
class AsciiDoc : protected ExportBase
|
class AsciiDoc : protected ExportBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -38,17 +41,28 @@ namespace Export
|
||||||
using tagmap = std::map<string,vector<Database::entry>>;
|
using tagmap = std::map<string,vector<Database::entry>>;
|
||||||
using replacemap = const std::map<const string, const string>;
|
using replacemap = const std::map<const string, const string>;
|
||||||
|
|
||||||
|
//! Replace strings in text.
|
||||||
const string replace(string text, const replacemap &replacements) const;
|
const string replace(string text, const replacemap &replacements) const;
|
||||||
|
|
||||||
//! Replaces characters in tags that asciidoctor doesn't like.
|
//! Replaces characters in tags that asciidoctor doesn't like.
|
||||||
const string replace_in_tag(const string &text) const;
|
const string replace_in_tag(const string &text) const;
|
||||||
|
|
||||||
//! Replaces characters in title that asciidoctor doesn't like.
|
//! Replaces characters in title that asciidoctor doesn't like.
|
||||||
const string replace_in_title(const string &text) const;
|
const string replace_in_title(const string &text) const;
|
||||||
|
|
||||||
//! Replaces characters in URI that asciidoctor doesn't like.
|
//! Replaces characters in URI that asciidoctor doesn't like.
|
||||||
const string replace_in_uri(const string &text) const;
|
const string replace_in_uri(const string &text) const;
|
||||||
|
|
||||||
|
//! Print things sorted by tag.
|
||||||
void print_tags(const tagmap &tags) const;
|
void print_tags(const tagmap &tags) const;
|
||||||
|
|
||||||
|
//! Get ISO-8601 day from Database::entry.
|
||||||
const string get_day(const Database::entry &entry) const;
|
const string get_day(const Database::entry &entry) const;
|
||||||
|
|
||||||
|
//! Get ISO-8601 time from Database::entry.
|
||||||
const string get_time(const Database::entry &entry) const;
|
const string get_time(const Database::entry &entry) const;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_ADOC_HPP
|
#endif // REMWHAREAD_ADOC_HPP
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <string>
|
||||||
|
#include "sqlite.hpp"
|
||||||
|
#include "bookmarks.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::chrono::system_clock;
|
||||||
|
using std::chrono::duration_cast;
|
||||||
|
using std::chrono::seconds;
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
void Export::Bookmarks::print() const
|
||||||
|
{
|
||||||
|
_out << "<!DOCTYPE NETSCAPE-Bookmark-file-1>\n"
|
||||||
|
"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; "
|
||||||
|
"charset=UTF-8\">\n"
|
||||||
|
"<TITLE>Bookmarks from remwharead</TITLE>\n"
|
||||||
|
"<H1>Bookmarks from remwharead<H1>\n\n"
|
||||||
|
"<DL><p>\n"
|
||||||
|
"<DT><H3>remwharead</H3>\n"
|
||||||
|
"<DL><p>\n";
|
||||||
|
|
||||||
|
for (const Database::entry & entry : _entries)
|
||||||
|
{
|
||||||
|
string title = entry.title;
|
||||||
|
if (title.empty())
|
||||||
|
{
|
||||||
|
title = entry.uri;
|
||||||
|
}
|
||||||
|
system_clock::time_point tp = entry.datetime;
|
||||||
|
system_clock::duration duration = tp.time_since_epoch();
|
||||||
|
string time_seconds =
|
||||||
|
std::to_string(duration_cast<seconds>(duration).count());
|
||||||
|
|
||||||
|
_out << "<DT><A HREF=\"" << entry.uri << "\" "
|
||||||
|
<< "ADD_DATE=\"" << time_seconds << "\">"
|
||||||
|
<< title << "</A>\n";
|
||||||
|
}
|
||||||
|
_out << "</DL><p>\n"
|
||||||
|
<< "</DL><p>\n";
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
#include "export.hpp"
|
#include "export.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
//! Export as Netscape bookmark file.
|
//! Export as Netscape bookmark file.
|
||||||
|
@ -29,5 +31,6 @@ namespace Export
|
||||||
virtual void print() const override;
|
virtual void print() const override;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_BOOKMARKS_HPP
|
#endif // REMWHAREAD_BOOKMARKS_HPP
|
|
@ -17,12 +17,12 @@
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
#include "csv.hpp"
|
#include "csv.hpp"
|
||||||
|
|
||||||
using std::cerr;
|
namespace remwharead
|
||||||
using std::endl;
|
|
||||||
|
|
||||||
namespace Export
|
|
||||||
{
|
{
|
||||||
void CSV::print() const
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
|
void Export::CSV::print() const
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -54,7 +54,7 @@ namespace Export
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const string CSV::quote(string field) const
|
const string Export::CSV::quote(string field) const
|
||||||
{
|
{
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
while ((pos = field.find('"', pos)) != std::string::npos)
|
while ((pos = field.find('"', pos)) != std::string::npos)
|
|
@ -20,10 +20,13 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "export.hpp"
|
#include "export.hpp"
|
||||||
|
|
||||||
using std::string;
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
//! Export as Comma Separated Values.
|
||||||
class CSV : protected ExportBase
|
class CSV : protected ExportBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -36,5 +39,6 @@ namespace Export
|
||||||
const string quote(string field) const;
|
const string quote(string field) const;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_CSV_HPP
|
#endif // REMWHAREAD_CSV_HPP
|
|
@ -17,6 +17,8 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "export.hpp"
|
#include "export.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
ExportBase::ExportBase(const vector<Database::entry> &entries, ostream &out)
|
ExportBase::ExportBase(const vector<Database::entry> &entries, ostream &out)
|
||||||
|
@ -35,3 +37,4 @@ namespace Export
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
|
@ -25,24 +25,43 @@ using std::vector;
|
||||||
using std::ostream;
|
using std::ostream;
|
||||||
using std::cout;
|
using std::cout;
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
|
//! Base class for exports.
|
||||||
class ExportBase
|
class ExportBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
/*!
|
||||||
|
* @brief Export vector of Database::entry.
|
||||||
|
*
|
||||||
|
* @param entries Vector of Database::entry to export.
|
||||||
|
* @param out Output stream.
|
||||||
|
*/
|
||||||
explicit ExportBase(const vector<Database::entry> &entries,
|
explicit ExportBase(const vector<Database::entry> &entries,
|
||||||
ostream &out = cout);
|
ostream &out = cout);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Print output to std::ostream.
|
||||||
|
*/
|
||||||
virtual void print() const = 0;
|
virtual void print() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const vector<Database::entry> _entries;
|
const vector<Database::entry> _entries;
|
||||||
ostream &_out;
|
ostream &_out;
|
||||||
|
|
||||||
//! Sort entries from newest to oldest.
|
/*!
|
||||||
|
* @brief Sort entries from newest to oldest.
|
||||||
|
*
|
||||||
|
* @param entries Vector of Database::entry to sort.
|
||||||
|
*
|
||||||
|
* @return Sorted vector of Database::entry.
|
||||||
|
*/
|
||||||
const vector<Database::entry>
|
const vector<Database::entry>
|
||||||
sort_entries(vector<Database::entry> entries) const;
|
sort_entries(vector<Database::entry> entries) const;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_EXPORT_HPP
|
#endif // REMWHAREAD_EXPORT_HPP
|
|
@ -19,20 +19,23 @@
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
#include "simple.hpp"
|
#include "simple.hpp"
|
||||||
|
|
||||||
using std::string;
|
namespace remwharead
|
||||||
|
|
||||||
void Export::Simple::print() const
|
|
||||||
{
|
{
|
||||||
for (const Database::entry & entry : _entries)
|
using std::string;
|
||||||
{
|
|
||||||
const string timestring = timepoint_to_string(entry.datetime);
|
|
||||||
_out << timestring.substr(0, timestring.find('T')) << ": ";
|
|
||||||
if (!entry.title.empty())
|
|
||||||
{
|
|
||||||
_out << entry.title << '\n';
|
|
||||||
_out << " ";
|
|
||||||
}
|
|
||||||
|
|
||||||
_out << "<" << entry.uri << ">\n";
|
void Export::Simple::print() const
|
||||||
|
{
|
||||||
|
for (const Database::entry & entry : _entries)
|
||||||
|
{
|
||||||
|
const string timestring = timepoint_to_string(entry.datetime);
|
||||||
|
_out << timestring.substr(0, timestring.find('T')) << ": ";
|
||||||
|
if (!entry.title.empty())
|
||||||
|
{
|
||||||
|
_out << entry.title << '\n';
|
||||||
|
_out << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
_out << "<" << entry.uri << ">\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
#include "export.hpp"
|
#include "export.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
namespace Export
|
namespace Export
|
||||||
{
|
{
|
||||||
//! Export as simple list.
|
//! Export as simple list.
|
||||||
|
@ -29,5 +31,6 @@ namespace Export
|
||||||
virtual void print() const override;
|
virtual void print() const override;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_SIMPLE_HPP
|
#endif // REMWHAREAD_SIMPLE_HPP
|
|
@ -14,21 +14,33 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef REMWHAREAD_TIME_HPP
|
#ifndef REMWHAREAD_HPP
|
||||||
#define REMWHAREAD_TIME_HPP
|
#define REMWHAREAD_HPP
|
||||||
|
|
||||||
#include <string>
|
/*!
|
||||||
#include <chrono>
|
* @mainpage remwharead Reference
|
||||||
|
*
|
||||||
|
* @section using Using the library
|
||||||
|
*
|
||||||
|
* The easiest way is to include remwharead.hpp, which then includes all other
|
||||||
|
* headers.
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* #include <remwharead/remwharead.hpp>
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* Compile your code with `g++ -lremwharead`.
|
||||||
|
*/
|
||||||
|
|
||||||
using std::string;
|
#include "export/adoc.hpp"
|
||||||
using std::chrono::system_clock;
|
#include "export/bookmarks.hpp"
|
||||||
using time_point = system_clock::time_point;
|
#include "export/csv.hpp"
|
||||||
|
#include "export/export.hpp"
|
||||||
|
#include "export/simple.hpp"
|
||||||
|
#include "search.hpp"
|
||||||
|
#include "sqlite.hpp"
|
||||||
|
#include "time.hpp"
|
||||||
|
#include "types.hpp"
|
||||||
|
#include "uri.hpp"
|
||||||
|
|
||||||
// Convert ISO 8601 time-string or SQLite time-string to time_point.
|
#endif // REMWHAREAD_HPP
|
||||||
const time_point string_to_timepoint(const string &strtime,
|
|
||||||
bool sqlite = false);
|
|
||||||
|
|
||||||
// Convert time_point to USO 8601 time-string or SQLite time-string.
|
|
||||||
const string timepoint_to_string(const time_point &tp, bool sqlite = false);
|
|
||||||
|
|
||||||
#endif // REMWHAREAD_TIME_HPP
|
|
|
@ -0,0 +1,194 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <locale>
|
||||||
|
#include <unicode/unistr.h>
|
||||||
|
#include "search.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::regex;
|
||||||
|
using std::regex_search;
|
||||||
|
using std::smatch;
|
||||||
|
using std::find;
|
||||||
|
using std::find_if;
|
||||||
|
|
||||||
|
const vector<vector<string>> parse_expression(string expression)
|
||||||
|
{
|
||||||
|
vector<vector<string>> searchlist;
|
||||||
|
const regex re_or("(.+?) (OR|\\|\\|) ");
|
||||||
|
const regex re_and("(.+?) (AND|&&) ");
|
||||||
|
smatch match;
|
||||||
|
|
||||||
|
vector<string> subexpressions;
|
||||||
|
{ // Split expression at OR.
|
||||||
|
while (regex_search(expression, match, re_or))
|
||||||
|
{
|
||||||
|
subexpressions.push_back(match[1].str());
|
||||||
|
expression = match.suffix().str();
|
||||||
|
}
|
||||||
|
subexpressions.push_back(expression);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
for (string sub : subexpressions)
|
||||||
|
{ // Split each OR-slice at AND.
|
||||||
|
vector<string> terms;
|
||||||
|
while (regex_search(sub, match, re_and))
|
||||||
|
{
|
||||||
|
terms.push_back(to_lowercase(match[1].str()));
|
||||||
|
sub = match.suffix().str();
|
||||||
|
}
|
||||||
|
terms.push_back(to_lowercase(sub));
|
||||||
|
searchlist.push_back(terms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return searchlist;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string to_lowercase(const string &str)
|
||||||
|
{
|
||||||
|
icu::UnicodeString uni(str.c_str());
|
||||||
|
string out;
|
||||||
|
uni.toLower().toUTF8String(out);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
const vector<DB::entry> search_tags(const vector<DB::entry> &entries,
|
||||||
|
string expression, const bool is_re)
|
||||||
|
{
|
||||||
|
vector<vector<string>> searchlist = parse_expression(expression);
|
||||||
|
vector<DB::entry> result;
|
||||||
|
|
||||||
|
for (const vector<string> &tags_or : searchlist)
|
||||||
|
{
|
||||||
|
for (const DB::entry &entry : entries)
|
||||||
|
{ // Add entry to result if all tags in an OR-slice match.
|
||||||
|
bool matched = true;
|
||||||
|
|
||||||
|
for (const string &tag : tags_or)
|
||||||
|
{
|
||||||
|
const auto it = find_if(
|
||||||
|
entry.tags.begin(), entry.tags.end(),
|
||||||
|
[&tag, is_re](string s)
|
||||||
|
{
|
||||||
|
s = to_lowercase(s);
|
||||||
|
if (is_re)
|
||||||
|
{
|
||||||
|
const regex re("^" + tag + "$");
|
||||||
|
return regex_search(s, re);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return (s == tag);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (it == entry.tags.end())
|
||||||
|
{
|
||||||
|
matched = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (matched == true)
|
||||||
|
{
|
||||||
|
result.push_back(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
const vector<DB::entry> search_all(const vector<DB::entry> &entries,
|
||||||
|
string expression, const bool is_re)
|
||||||
|
{
|
||||||
|
vector<vector<string>> searchlist = parse_expression(expression);
|
||||||
|
vector<DB::entry> result = search_tags(entries, expression, is_re);
|
||||||
|
|
||||||
|
for (const vector<string> &terms_or : searchlist)
|
||||||
|
{
|
||||||
|
for (const DB::entry &entry : entries)
|
||||||
|
{
|
||||||
|
// Add entry to result if all terms in an OR-slice match title,
|
||||||
|
// description or full text.
|
||||||
|
bool matched_title = true;
|
||||||
|
bool matched_description = true;
|
||||||
|
bool matched_fulltext = true;
|
||||||
|
|
||||||
|
const auto it = find(result.begin(), result.end(), entry);
|
||||||
|
if (it != result.end())
|
||||||
|
{ // Skip if already in result list.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const string &term : terms_or)
|
||||||
|
{
|
||||||
|
const string title = to_lowercase(entry.title);
|
||||||
|
const string description = to_lowercase(entry.description);
|
||||||
|
const string fulltext = to_lowercase(entry.fulltext);
|
||||||
|
|
||||||
|
// Set matched_* to false if term is not found.
|
||||||
|
if (is_re)
|
||||||
|
{
|
||||||
|
const regex re(term);
|
||||||
|
|
||||||
|
if(!regex_search(title, re))
|
||||||
|
{
|
||||||
|
matched_title = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!regex_search(description, re))
|
||||||
|
{
|
||||||
|
matched_description = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!regex_search(fulltext, re))
|
||||||
|
{
|
||||||
|
matched_fulltext = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (title.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_title = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (description.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_description = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fulltext.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_fulltext = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (matched_title == true
|
||||||
|
|| matched_description == true
|
||||||
|
|| matched_fulltext == true)
|
||||||
|
{
|
||||||
|
result.push_back(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef REMWHAREAD_SEARCH_HPP
|
||||||
|
#define REMWHAREAD_SEARCH_HPP
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "sqlite.hpp"
|
||||||
|
|
||||||
|
//! @file
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::vector;
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Split expression in subexpressions.
|
||||||
|
*
|
||||||
|
* First it splits at `OR` or `||`, then it splits the subexpressions at
|
||||||
|
* `AND` or `&&`. The first vector contains all tags before the first `OR`.
|
||||||
|
*
|
||||||
|
* @return Vector of `OR`-vectors of `AND`-tags.
|
||||||
|
*/
|
||||||
|
const vector<vector<string>> parse_expression(string expression);
|
||||||
|
|
||||||
|
//! Convert str to lowercase. Works with unicode.
|
||||||
|
const string to_lowercase(const string &str);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Search in tags of database entries.
|
||||||
|
*
|
||||||
|
* Only matches whole tags, *Pill* does not match *Pillow*.
|
||||||
|
*
|
||||||
|
* @param entries Vector of Database::entry to search.
|
||||||
|
* @param expression Search expression.
|
||||||
|
* @param is_re Is it a regular expression?
|
||||||
|
*
|
||||||
|
* @return Vector of matching Database::entry.
|
||||||
|
*/
|
||||||
|
const vector<Database::entry>
|
||||||
|
search_tags(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Search in full text of database entries.
|
||||||
|
*
|
||||||
|
* Searches in tags, title, description and full text.
|
||||||
|
*
|
||||||
|
* @param entries Vector of Database::entry to search.
|
||||||
|
* @param expression Search expression.
|
||||||
|
* @param is_re Is it a regular expression?
|
||||||
|
*
|
||||||
|
* @return Vector of matching Database::entry.
|
||||||
|
*/
|
||||||
|
const vector<Database::entry>
|
||||||
|
search_all(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // REMWHAREAD_SEARCH_HPP
|
|
@ -0,0 +1,164 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <exception>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <basedir.h>
|
||||||
|
#include <sqlite/execute.hpp>
|
||||||
|
#include <sqlite/query.hpp>
|
||||||
|
#include "time.hpp"
|
||||||
|
#include "sqlite.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
|
Database::Database()
|
||||||
|
: _connected(false)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
xdgHandle xdg;
|
||||||
|
xdgInitHandle(&xdg);
|
||||||
|
_dbpath = xdgDataHome(&xdg) / fs::path("remwharead");
|
||||||
|
xdgWipeHandle(&xdg);
|
||||||
|
|
||||||
|
if (!fs::exists(_dbpath))
|
||||||
|
{
|
||||||
|
fs::create_directories(_dbpath);
|
||||||
|
}
|
||||||
|
_dbpath /= "database.sqlite";
|
||||||
|
|
||||||
|
_con = std::make_unique<sqlite::connection>(_dbpath);
|
||||||
|
sqlite::execute(*_con, "CREATE TABLE IF NOT EXISTS remwharead("
|
||||||
|
"uri TEXT, archive_uri TEXT, datetime TEXT, "
|
||||||
|
"tags TEXT, title TEXT, description TEXT, "
|
||||||
|
"fulltext TEXT);", true);
|
||||||
|
|
||||||
|
_connected = true;
|
||||||
|
}
|
||||||
|
catch (std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Database::operator bool() const
|
||||||
|
{
|
||||||
|
return _connected;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator ==(const Database::entry &a, const Database::entry &b)
|
||||||
|
{
|
||||||
|
if (a.datetime == b.datetime)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string Database::entry::fulltext_oneline() const
|
||||||
|
{
|
||||||
|
string oneline = fulltext;
|
||||||
|
size_t pos = 0;
|
||||||
|
while ((pos = oneline.find('\n', pos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
oneline.replace(pos, 1, "\\n");
|
||||||
|
}
|
||||||
|
return oneline;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Database::store(const Database::entry &data) const
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
const string strdatetime = timepoint_to_string(data.datetime, true);
|
||||||
|
string strtags;
|
||||||
|
for (const string &tag : data.tags)
|
||||||
|
{
|
||||||
|
strtags += tag;
|
||||||
|
if (tag != *(data.tags.rbegin()))
|
||||||
|
{
|
||||||
|
strtags += ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite::execute ins(*_con, "INSERT INTO remwharead "
|
||||||
|
"VALUES(?, ?, ?, ?, ?, ?, ?);");
|
||||||
|
ins % data.uri % data.archive_uri % strdatetime % strtags
|
||||||
|
% data.title % data.description % data.fulltext;
|
||||||
|
ins();
|
||||||
|
}
|
||||||
|
catch (std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const vector<Database::entry> Database::retrieve(
|
||||||
|
const time_point &start, const time_point &end) const
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
const string query = "SELECT * FROM remwharead WHERE datetime "
|
||||||
|
"BETWEEN '" + timepoint_to_string(start, true)
|
||||||
|
+ "' AND '" + timepoint_to_string(end, true)
|
||||||
|
+ "' ORDER BY datetime DESC;";
|
||||||
|
|
||||||
|
sqlite::query q(*_con, query);
|
||||||
|
sqlite::result_type res = q.get_result();
|
||||||
|
vector<entry> entries;
|
||||||
|
|
||||||
|
while(res->next_row())
|
||||||
|
{
|
||||||
|
vector<string> tags;
|
||||||
|
const string strtags = res->get_string(3);
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos != std::string::npos)
|
||||||
|
{
|
||||||
|
const size_t newpos = strtags.find(',', pos);
|
||||||
|
tags.push_back(strtags.substr(pos, newpos - pos));
|
||||||
|
pos = newpos;
|
||||||
|
if (pos != std::string::npos)
|
||||||
|
{
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entries.push_back
|
||||||
|
({
|
||||||
|
res->get_string(0),
|
||||||
|
res->get_string(1),
|
||||||
|
string_to_timepoint(res->get_string(2), true),
|
||||||
|
tags,
|
||||||
|
res->get_string(4),
|
||||||
|
res->get_string(5),
|
||||||
|
res->get_string(6)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return entries;
|
||||||
|
}
|
||||||
|
catch (std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef REMWHAREAD_SQLITE_HPP
|
||||||
|
#define REMWHAREAD_SQLITE_HPP
|
||||||
|
|
||||||
|
#include <experimental/filesystem>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <chrono>
|
||||||
|
#include <sqlite/connection.hpp>
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
namespace fs = std::experimental::filesystem;
|
||||||
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
using std::chrono::system_clock;
|
||||||
|
using time_point = system_clock::time_point;
|
||||||
|
|
||||||
|
//! Store and retrieve files from/to SQLite.
|
||||||
|
class Database
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! Describes a database entry.
|
||||||
|
typedef struct entry
|
||||||
|
{
|
||||||
|
string uri;
|
||||||
|
string archive_uri;
|
||||||
|
time_point datetime;
|
||||||
|
vector<string> tags;
|
||||||
|
string title;
|
||||||
|
string description;
|
||||||
|
string fulltext;
|
||||||
|
|
||||||
|
//! Returns true if date and time are equal.
|
||||||
|
friend bool operator ==(const Database::entry &a,
|
||||||
|
const Database::entry &b);
|
||||||
|
//! The full text in one line.
|
||||||
|
const string fulltext_oneline() const;
|
||||||
|
} entry;
|
||||||
|
|
||||||
|
//! Connects to the database and creates it if necessary.
|
||||||
|
Database();
|
||||||
|
|
||||||
|
//! Returns true if connected to the database.
|
||||||
|
operator bool() const;
|
||||||
|
|
||||||
|
//! Store a Database::entry in the database.
|
||||||
|
void store(const entry &data) const;
|
||||||
|
|
||||||
|
//! Retrieve a vector of Database::entry from the database.
|
||||||
|
const vector<entry> retrieve(
|
||||||
|
const time_point &start = time_point(),
|
||||||
|
const time_point &end = system_clock::now()) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
fs::path _dbpath;
|
||||||
|
std::unique_ptr<sqlite::connection> _con;
|
||||||
|
bool _connected;
|
||||||
|
};
|
||||||
|
|
||||||
|
using DB = Database;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // REMWHAREAD_SQLITE_HPP
|
|
@ -0,0 +1,61 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <ctime>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <cstdint>
|
||||||
|
#include "time.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
const time_point string_to_timepoint(const string &strtime, bool sqlite)
|
||||||
|
{
|
||||||
|
std::stringstream sstime(strtime);
|
||||||
|
struct std::tm tm = {};
|
||||||
|
tm.tm_isdst = -1; // Detect daylight saving time.
|
||||||
|
if (sqlite)
|
||||||
|
{
|
||||||
|
sstime >> std::get_time(&tm, "%Y-%m-%d %T");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sstime >> std::get_time(&tm, "%Y-%m-%dT%T");
|
||||||
|
}
|
||||||
|
std::time_t time = timelocal(&tm); // Assume time is local.
|
||||||
|
return system_clock::from_time_t(time);
|
||||||
|
}
|
||||||
|
|
||||||
|
const string timepoint_to_string(const time_point &tp, bool sqlite)
|
||||||
|
{
|
||||||
|
constexpr std::uint16_t bufsize = 32;
|
||||||
|
std::time_t time = system_clock::to_time_t(tp);
|
||||||
|
std::tm *tm;
|
||||||
|
tm = std::localtime(&time);
|
||||||
|
|
||||||
|
char buffer[bufsize];
|
||||||
|
if (sqlite)
|
||||||
|
{
|
||||||
|
std::strftime(buffer, bufsize, "%F %T", tm);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::strftime(buffer, bufsize, "%FT%T", tm);
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast<const string>(buffer);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef REMWHAREAD_TIME_HPP
|
||||||
|
#define REMWHAREAD_TIME_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
//! @file
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::string;
|
||||||
|
using std::chrono::system_clock;
|
||||||
|
using time_point = system_clock::time_point;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Convert ISO 8601 or SQLite time-string to time_point.
|
||||||
|
*
|
||||||
|
* The SQLite format is *YY-MM-DD hh:mm:ss* instead of *YY-MM-DDThh:mm:ss*.
|
||||||
|
*
|
||||||
|
* @param strtime Time string in ISO 8601 or SQLite format.
|
||||||
|
* @param sqlite Is the string in SQLite format?
|
||||||
|
*/
|
||||||
|
const time_point string_to_timepoint(const string &strtime,
|
||||||
|
bool sqlite = false);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Convert time_point to ISO 8601 or SQLite time-string.
|
||||||
|
*
|
||||||
|
* The SQLite format is *YY-MM-DD hh:mm:ss* instead of *YY-MM-DDThh:mm:ss*.
|
||||||
|
*
|
||||||
|
* @param time_point The std::chrono::system_clock::time_point.
|
||||||
|
* @param sqlite Is the string in SQLite format?
|
||||||
|
*/
|
||||||
|
const string timepoint_to_string(const time_point &tp, bool sqlite = false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // REMWHAREAD_TIME_HPP
|
|
@ -17,13 +17,19 @@
|
||||||
#ifndef REMWHAREAD_TYPES_HPP
|
#ifndef REMWHAREAD_TYPES_HPP
|
||||||
#define REMWHAREAD_TYPES_HPP
|
#define REMWHAREAD_TYPES_HPP
|
||||||
|
|
||||||
enum class export_format
|
//! @file
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
{
|
{
|
||||||
undefined,
|
//! Format of the export.
|
||||||
csv,
|
enum class export_format
|
||||||
asciidoc,
|
{
|
||||||
bookmarks,
|
undefined,
|
||||||
simple
|
csv,
|
||||||
};
|
asciidoc,
|
||||||
|
bookmarks,
|
||||||
|
simple
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif // REMWHAREAD_TYPES_HPP
|
#endif // REMWHAREAD_TYPES_HPP
|
|
@ -0,0 +1,531 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <iostream>
|
||||||
|
#include <regex>
|
||||||
|
#include <locale>
|
||||||
|
#include <codecvt>
|
||||||
|
#include <curlpp/cURLpp.hpp>
|
||||||
|
#include <curlpp/Options.hpp>
|
||||||
|
#include <curlpp/Exception.hpp>
|
||||||
|
#include <curlpp/Infos.hpp>
|
||||||
|
#include <version.hpp>
|
||||||
|
#include "uri.hpp"
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
namespace curlopts = curlpp::options;
|
||||||
|
using std::uint64_t;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
using std::regex;
|
||||||
|
using std::regex_replace;
|
||||||
|
using std::regex_search;
|
||||||
|
using std::smatch;
|
||||||
|
using std::regex_constants::icase;
|
||||||
|
using std::array;
|
||||||
|
|
||||||
|
URI::URI(const string &uri)
|
||||||
|
:_uri(uri)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
const html_extract URI::get()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::ostringstream oss;
|
||||||
|
curlpp::Easy request;
|
||||||
|
set_curlpp_options(request);
|
||||||
|
request.setOpt<curlopts::Url>(_uri);
|
||||||
|
request.setOpt<curlopts::WriteStream>(&oss);
|
||||||
|
request.perform();
|
||||||
|
|
||||||
|
const string answer = oss.str();
|
||||||
|
if (answer.empty())
|
||||||
|
{
|
||||||
|
cerr << "Error: Could not download page. Response code: "
|
||||||
|
<< curlpp::infos::ResponseCode::get(request) << endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return
|
||||||
|
{
|
||||||
|
extract_title(answer),
|
||||||
|
extract_description(answer),
|
||||||
|
strip_html(answer)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { "", "", "" };
|
||||||
|
}
|
||||||
|
|
||||||
|
void URI::set_curlpp_options(curlpp::Easy &request)
|
||||||
|
{
|
||||||
|
request.setOpt<curlopts::UserAgent>(string("remwharead/")
|
||||||
|
+ global::version);
|
||||||
|
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
||||||
|
request.setOpt<curlopts::FollowLocation>(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::extract_title(const string &html)
|
||||||
|
{
|
||||||
|
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
|
||||||
|
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
|
||||||
|
{
|
||||||
|
smatch match;
|
||||||
|
regex_search(html, match, regex("<title>([^<]+)", icase));
|
||||||
|
return remove_newlines(unescape_html(match[1].str()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::extract_description(const string &html)
|
||||||
|
{
|
||||||
|
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
|
||||||
|
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
|
||||||
|
{
|
||||||
|
smatch match;
|
||||||
|
const regex re("description\"[^>]+content=\"([^\"]+)", icase);
|
||||||
|
regex_search(html, match, re);
|
||||||
|
return remove_newlines(strip_html(match[1].str()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::strip_html(const string &html)
|
||||||
|
{
|
||||||
|
string out;
|
||||||
|
|
||||||
|
out = remove_html_tags(html, "script"); // Remove JavaScript.
|
||||||
|
out = remove_html_tags(out, "style"); // Remove CSS.
|
||||||
|
out = remove_html_tags(out); // Remove tags.
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
while ((pos = out.find("\r", pos)) != std::string::npos) // Remove CR.
|
||||||
|
{
|
||||||
|
out.replace(pos, 1, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
out = regex_replace(out, regex("\\s+\n"), "\n"); // Remove space at eol.
|
||||||
|
out = regex_replace(out, regex("\n{2,}"), "\n"); // Reduce newlines.
|
||||||
|
|
||||||
|
return unescape_html(out);
|
||||||
|
}
|
||||||
|
const string URI::remove_html_tags(const string &html, const string &tag)
|
||||||
|
{
|
||||||
|
// NOTE: I did this with regex_replace before, but libstdc++ segfaulted.
|
||||||
|
string out;
|
||||||
|
if (tag.empty())
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos != std::string::npos)
|
||||||
|
{
|
||||||
|
size_t startpos = html.find('<', pos);
|
||||||
|
size_t endpos = html.find('>', startpos);
|
||||||
|
out += html.substr(pos, startpos - pos);
|
||||||
|
pos = endpos;
|
||||||
|
if (pos != std::string::npos)
|
||||||
|
{
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
out = html;
|
||||||
|
while ((pos = out.find("<" + tag)) != std::string::npos)
|
||||||
|
{
|
||||||
|
size_t endpos = out.find("</" + tag, pos);
|
||||||
|
if (endpos == std::string::npos)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
endpos += 3 + tag.length(); // tag + </ + >
|
||||||
|
out.replace(pos, endpos - pos, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::unescape_html(const string &html)
|
||||||
|
{
|
||||||
|
string buffer = html;
|
||||||
|
string output;
|
||||||
|
|
||||||
|
// Used to convert int to utf-8 char.
|
||||||
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c;
|
||||||
|
regex re_entity("&#(x)?([[:alnum:]]{1,8});");
|
||||||
|
smatch match;
|
||||||
|
|
||||||
|
while (regex_search(buffer, match, re_entity))
|
||||||
|
{
|
||||||
|
char32_t codepoint = 0;
|
||||||
|
// 'x' in front of the number means it's hexadecimal, else decimal.
|
||||||
|
if (match[1].length() == 1)
|
||||||
|
{
|
||||||
|
codepoint = std::stoi(match[2].str(), nullptr, 16);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
codepoint = std::stoi(match[2].str(), nullptr, 10);
|
||||||
|
}
|
||||||
|
output += match.prefix().str() + u8c.to_bytes(codepoint);
|
||||||
|
buffer = match.suffix().str();
|
||||||
|
}
|
||||||
|
output += buffer;
|
||||||
|
|
||||||
|
// Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
|
||||||
|
// entity_references#Character_entity_references_in_HTML
|
||||||
|
const array<const std::pair<const string, const char32_t>, 258> names =
|
||||||
|
{{
|
||||||
|
{ "exclamation", 0x0021 },
|
||||||
|
{ "quot", 0x0022 },
|
||||||
|
{ "percent", 0x0025 },
|
||||||
|
{ "amp", 0x0026 },
|
||||||
|
{ "apos", 0x0027 },
|
||||||
|
{ "add", 0x002B },
|
||||||
|
{ "lt", 0x003C },
|
||||||
|
{ "equal", 0x003D },
|
||||||
|
{ "gt", 0x003E },
|
||||||
|
{ "nbsp", 0x00A0 },
|
||||||
|
{ "iexcl", 0x00A1 },
|
||||||
|
{ "cent", 0x00A2 },
|
||||||
|
{ "pound", 0x00A3 },
|
||||||
|
{ "curren", 0x00A4 },
|
||||||
|
{ "yen", 0x00A5 },
|
||||||
|
{ "brvbar", 0x00A6 },
|
||||||
|
{ "sect", 0x00A7 },
|
||||||
|
{ "uml", 0x00A8 },
|
||||||
|
{ "copy", 0x00A9 },
|
||||||
|
{ "ordf", 0x00AA },
|
||||||
|
{ "laquo", 0x00AB },
|
||||||
|
{ "not", 0x00AC },
|
||||||
|
{ "shy", 0x00AD },
|
||||||
|
{ "reg", 0x00AE },
|
||||||
|
{ "macr", 0x00AF },
|
||||||
|
{ "deg", 0x00B0 },
|
||||||
|
{ "plusmn", 0x00B1 },
|
||||||
|
{ "sup2", 0x00B2 },
|
||||||
|
{ "sup3", 0x00B3 },
|
||||||
|
{ "acute", 0x00B4 },
|
||||||
|
{ "micro", 0x00B5 },
|
||||||
|
{ "para", 0x00B6 },
|
||||||
|
{ "middot", 0x00B7 },
|
||||||
|
{ "cedil", 0x00B8 },
|
||||||
|
{ "sup1", 0x00B9 },
|
||||||
|
{ "ordm", 0x00BA },
|
||||||
|
{ "raquo", 0x00BB },
|
||||||
|
{ "frac14", 0x00BC },
|
||||||
|
{ "frac12", 0x00BD },
|
||||||
|
{ "frac34", 0x00BE },
|
||||||
|
{ "iquest", 0x00BF },
|
||||||
|
{ "Agrave", 0x00C0 },
|
||||||
|
{ "Aacute", 0x00C1 },
|
||||||
|
{ "Acirc", 0x00C2 },
|
||||||
|
{ "Atilde", 0x00C3 },
|
||||||
|
{ "Auml", 0x00C4 },
|
||||||
|
{ "Aring", 0x00C5 },
|
||||||
|
{ "AElig", 0x00C6 },
|
||||||
|
{ "Ccedil", 0x00C7 },
|
||||||
|
{ "Egrave", 0x00C8 },
|
||||||
|
{ "Eacute", 0x00C9 },
|
||||||
|
{ "Ecirc", 0x00CA },
|
||||||
|
{ "Euml", 0x00CB },
|
||||||
|
{ "Igrave", 0x00CC },
|
||||||
|
{ "Iacute", 0x00CD },
|
||||||
|
{ "Icirc", 0x00CE },
|
||||||
|
{ "Iuml", 0x00CF },
|
||||||
|
{ "ETH", 0x00D0 },
|
||||||
|
{ "Ntilde", 0x00D1 },
|
||||||
|
{ "Ograve", 0x00D2 },
|
||||||
|
{ "Oacute", 0x00D3 },
|
||||||
|
{ "Ocirc", 0x00D4 },
|
||||||
|
{ "Otilde", 0x00D5 },
|
||||||
|
{ "Ouml", 0x00D6 },
|
||||||
|
{ "times", 0x00D7 },
|
||||||
|
{ "Oslash", 0x00D8 },
|
||||||
|
{ "Ugrave", 0x00D9 },
|
||||||
|
{ "Uacute", 0x00DA },
|
||||||
|
{ "Ucirc", 0x00DB },
|
||||||
|
{ "Uuml", 0x00DC },
|
||||||
|
{ "Yacute", 0x00DD },
|
||||||
|
{ "THORN", 0x00DE },
|
||||||
|
{ "szlig", 0x00DF },
|
||||||
|
{ "agrave", 0x00E0 },
|
||||||
|
{ "aacute", 0x00E1 },
|
||||||
|
{ "acirc", 0x00E2 },
|
||||||
|
{ "atilde", 0x00E3 },
|
||||||
|
{ "auml", 0x00E4 },
|
||||||
|
{ "aring", 0x00E5 },
|
||||||
|
{ "aelig", 0x00E6 },
|
||||||
|
{ "ccedil", 0x00E7 },
|
||||||
|
{ "egrave", 0x00E8 },
|
||||||
|
{ "eacute", 0x00E9 },
|
||||||
|
{ "ecirc", 0x00EA },
|
||||||
|
{ "euml", 0x00EB },
|
||||||
|
{ "igrave", 0x00EC },
|
||||||
|
{ "iacute", 0x00ED },
|
||||||
|
{ "icirc", 0x00EE },
|
||||||
|
{ "iuml", 0x00EF },
|
||||||
|
{ "eth", 0x00F0 },
|
||||||
|
{ "ntilde", 0x00F1 },
|
||||||
|
{ "ograve", 0x00F2 },
|
||||||
|
{ "oacute", 0x00F3 },
|
||||||
|
{ "ocirc", 0x00F4 },
|
||||||
|
{ "otilde", 0x00F5 },
|
||||||
|
{ "ouml", 0x00F6 },
|
||||||
|
{ "divide", 0x00F7 },
|
||||||
|
{ "oslash", 0x00F8 },
|
||||||
|
{ "ugrave", 0x00F9 },
|
||||||
|
{ "uacute", 0x00FA },
|
||||||
|
{ "ucirc", 0x00FB },
|
||||||
|
{ "uuml", 0x00FC },
|
||||||
|
{ "yacute", 0x00FD },
|
||||||
|
{ "thorn", 0x00FE },
|
||||||
|
{ "yuml", 0x00FF },
|
||||||
|
{ "OElig", 0x0152 },
|
||||||
|
{ "oelig", 0x0153 },
|
||||||
|
{ "Scaron", 0x0160 },
|
||||||
|
{ "scaron", 0x0161 },
|
||||||
|
{ "Yuml", 0x0178 },
|
||||||
|
{ "fnof", 0x0192 },
|
||||||
|
{ "circ", 0x02C6 },
|
||||||
|
{ "tilde", 0x02DC },
|
||||||
|
{ "Alpha", 0x0391 },
|
||||||
|
{ "Beta", 0x0392 },
|
||||||
|
{ "Gamma", 0x0393 },
|
||||||
|
{ "Delta", 0x0394 },
|
||||||
|
{ "Epsilon", 0x0395 },
|
||||||
|
{ "Zeta", 0x0396 },
|
||||||
|
{ "Eta", 0x0397 },
|
||||||
|
{ "Theta", 0x0398 },
|
||||||
|
{ "Iota", 0x0399 },
|
||||||
|
{ "Kappa", 0x039A },
|
||||||
|
{ "Lambda", 0x039B },
|
||||||
|
{ "Mu", 0x039C },
|
||||||
|
{ "Nu", 0x039D },
|
||||||
|
{ "Xi", 0x039E },
|
||||||
|
{ "Omicron", 0x039F },
|
||||||
|
{ "Pi", 0x03A0 },
|
||||||
|
{ "Rho", 0x03A1 },
|
||||||
|
{ "Sigma", 0x03A3 },
|
||||||
|
{ "Tau", 0x03A4 },
|
||||||
|
{ "Upsilon", 0x03A5 },
|
||||||
|
{ "Phi", 0x03A6 },
|
||||||
|
{ "Chi", 0x03A7 },
|
||||||
|
{ "Psi", 0x03A8 },
|
||||||
|
{ "Omega", 0x03A9 },
|
||||||
|
{ "alpha", 0x03B1 },
|
||||||
|
{ "beta", 0x03B2 },
|
||||||
|
{ "gamma", 0x03B3 },
|
||||||
|
{ "delta", 0x03B4 },
|
||||||
|
{ "epsilon", 0x03B5 },
|
||||||
|
{ "zeta", 0x03B6 },
|
||||||
|
{ "eta", 0x03B7 },
|
||||||
|
{ "theta", 0x03B8 },
|
||||||
|
{ "iota", 0x03B9 },
|
||||||
|
{ "kappa", 0x03BA },
|
||||||
|
{ "lambda", 0x03BB },
|
||||||
|
{ "mu", 0x03BC },
|
||||||
|
{ "nu", 0x03BD },
|
||||||
|
{ "xi", 0x03BE },
|
||||||
|
{ "omicron", 0x03BF },
|
||||||
|
{ "pi", 0x03C0 },
|
||||||
|
{ "rho", 0x03C1 },
|
||||||
|
{ "sigmaf", 0x03C2 },
|
||||||
|
{ "sigma", 0x03C3 },
|
||||||
|
{ "tau", 0x03C4 },
|
||||||
|
{ "upsilon", 0x03C5 },
|
||||||
|
{ "phi", 0x03C6 },
|
||||||
|
{ "chi", 0x03C7 },
|
||||||
|
{ "psi", 0x03C8 },
|
||||||
|
{ "omega", 0x03C9 },
|
||||||
|
{ "thetasym", 0x03D1 },
|
||||||
|
{ "upsih", 0x03D2 },
|
||||||
|
{ "piv", 0x03D6 },
|
||||||
|
{ "ensp", 0x2002 },
|
||||||
|
{ "emsp", 0x2003 },
|
||||||
|
{ "thinsp", 0x2009 },
|
||||||
|
{ "zwnj", 0x200C },
|
||||||
|
{ "zwj", 0x200D },
|
||||||
|
{ "lrm", 0x200E },
|
||||||
|
{ "rlm", 0x200F },
|
||||||
|
{ "ndash", 0x2013 },
|
||||||
|
{ "mdash", 0x2014 },
|
||||||
|
{ "horbar", 0x2015 },
|
||||||
|
{ "lsquo", 0x2018 },
|
||||||
|
{ "rsquo", 0x2019 },
|
||||||
|
{ "sbquo", 0x201A },
|
||||||
|
{ "ldquo", 0x201C },
|
||||||
|
{ "rdquo", 0x201D },
|
||||||
|
{ "bdquo", 0x201E },
|
||||||
|
{ "dagger", 0x2020 },
|
||||||
|
{ "Dagger", 0x2021 },
|
||||||
|
{ "bull", 0x2022 },
|
||||||
|
{ "hellip", 0x2026 },
|
||||||
|
{ "permil", 0x2030 },
|
||||||
|
{ "prime", 0x2032 },
|
||||||
|
{ "Prime", 0x2033 },
|
||||||
|
{ "lsaquo", 0x2039 },
|
||||||
|
{ "rsaquo", 0x203A },
|
||||||
|
{ "oline", 0x203E },
|
||||||
|
{ "frasl", 0x2044 },
|
||||||
|
{ "euro", 0x20AC },
|
||||||
|
{ "image", 0x2111 },
|
||||||
|
{ "weierp", 0x2118 },
|
||||||
|
{ "real", 0x211C },
|
||||||
|
{ "trade", 0x2122 },
|
||||||
|
{ "alefsym", 0x2135 },
|
||||||
|
{ "larr", 0x2190 },
|
||||||
|
{ "uarr", 0x2191 },
|
||||||
|
{ "rarr", 0x2192 },
|
||||||
|
{ "darr", 0x2193 },
|
||||||
|
{ "harr", 0x2194 },
|
||||||
|
{ "crarr", 0x21B5 },
|
||||||
|
{ "lArr", 0x21D0 },
|
||||||
|
{ "uArr", 0x21D1 },
|
||||||
|
{ "rArr", 0x21D2 },
|
||||||
|
{ "dArr", 0x21D3 },
|
||||||
|
{ "hArr", 0x21D4 },
|
||||||
|
{ "forall", 0x2200 },
|
||||||
|
{ "part", 0x2202 },
|
||||||
|
{ "exist", 0x2203 },
|
||||||
|
{ "empty", 0x2205 },
|
||||||
|
{ "nabla", 0x2207 },
|
||||||
|
{ "isin", 0x2208 },
|
||||||
|
{ "notin", 0x2209 },
|
||||||
|
{ "ni", 0x220B },
|
||||||
|
{ "prod", 0x220F },
|
||||||
|
{ "sum", 0x2211 },
|
||||||
|
{ "minus", 0x2212 },
|
||||||
|
{ "lowast", 0x2217 },
|
||||||
|
{ "radic", 0x221A },
|
||||||
|
{ "prop", 0x221D },
|
||||||
|
{ "infin", 0x221E },
|
||||||
|
{ "ang", 0x2220 },
|
||||||
|
{ "and", 0x2227 },
|
||||||
|
{ "or", 0x2228 },
|
||||||
|
{ "cap", 0x2229 },
|
||||||
|
{ "cup", 0x222A },
|
||||||
|
{ "int", 0x222B },
|
||||||
|
{ "there4", 0x2234 },
|
||||||
|
{ "sim", 0x223C },
|
||||||
|
{ "cong", 0x2245 },
|
||||||
|
{ "asymp", 0x2248 },
|
||||||
|
{ "ne", 0x2260 },
|
||||||
|
{ "equiv", 0x2261 },
|
||||||
|
{ "le", 0x2264 },
|
||||||
|
{ "ge", 0x2265 },
|
||||||
|
{ "sub", 0x2282 },
|
||||||
|
{ "sup", 0x2283 },
|
||||||
|
{ "nsub", 0x2284 },
|
||||||
|
{ "sube", 0x2286 },
|
||||||
|
{ "supe", 0x2287 },
|
||||||
|
{ "oplus", 0x2295 },
|
||||||
|
{ "otimes", 0x2297 },
|
||||||
|
{ "perp", 0x22A5 },
|
||||||
|
{ "sdot", 0x22C5 },
|
||||||
|
{ "lceil", 0x2308 },
|
||||||
|
{ "rceil", 0x2309 },
|
||||||
|
{ "lfloor", 0x230A },
|
||||||
|
{ "rfloor", 0x230B },
|
||||||
|
{ "lang", 0x2329 },
|
||||||
|
{ "rang", 0x232A },
|
||||||
|
{ "loz", 0x25CA },
|
||||||
|
{ "spades", 0x2660 },
|
||||||
|
{ "clubs", 0x2663 },
|
||||||
|
{ "hearts", 0x2665 },
|
||||||
|
{ "diams", 0x2666 }
|
||||||
|
}};
|
||||||
|
|
||||||
|
for (auto &pair : names)
|
||||||
|
{
|
||||||
|
const regex re('&' + pair.first + ';');
|
||||||
|
output = regex_replace(output, re, u8c.to_bytes(pair.second));
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::archive()
|
||||||
|
{
|
||||||
|
if (_uri.substr(0, 4) != "http")
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::ostringstream oss;
|
||||||
|
curlpp::Easy request;
|
||||||
|
set_curlpp_options(request);
|
||||||
|
request.setOpt<curlopts::Url>("https://web.archive.org/save/"
|
||||||
|
+ _uri);
|
||||||
|
request.setOpt<curlopts::WriteStream>(&oss);
|
||||||
|
request.setOpt<curlopts::NoBody>(true); // Make HEAD request.
|
||||||
|
request.setOpt<curlpp::options::Header>(true); // Keep headers.
|
||||||
|
request.perform();
|
||||||
|
|
||||||
|
smatch match;
|
||||||
|
const string answer = oss.str();
|
||||||
|
if (regex_search(answer, match, regex("Content-Location: (.+)\r")))
|
||||||
|
{
|
||||||
|
return "https://web.archive.org" + match[1].str();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cerr << "Error: Could not archive page. HTTP status: "
|
||||||
|
<< curlpp::infos::ResponseCode::get(request) << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception &e)
|
||||||
|
{
|
||||||
|
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const string URI::remove_newlines(string text)
|
||||||
|
{
|
||||||
|
size_t posn = 0;
|
||||||
|
while ((posn = text.find('\n', posn)) != std::string::npos)
|
||||||
|
{
|
||||||
|
text.replace(posn, 1, " ");
|
||||||
|
|
||||||
|
size_t posr = posn - 1;
|
||||||
|
if (text[posr] == '\r')
|
||||||
|
{
|
||||||
|
text.replace(posr, 1, " ");
|
||||||
|
}
|
||||||
|
++posn;
|
||||||
|
}
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
/* This file is part of remwharead.
|
||||||
|
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef REMWHAREAD_URI_HPP
|
||||||
|
#define REMWHAREAD_URI_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <curlpp/Easy.hpp>
|
||||||
|
|
||||||
|
namespace remwharead
|
||||||
|
{
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
//! A processed HTML page.
|
||||||
|
typedef struct html_extract
|
||||||
|
{
|
||||||
|
string title;
|
||||||
|
string description;
|
||||||
|
string fulltext;
|
||||||
|
} html_extract;
|
||||||
|
|
||||||
|
//! Download, archive and process an %URI.
|
||||||
|
class URI
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! Construct object and set URL.
|
||||||
|
explicit URI(const string &uri);
|
||||||
|
|
||||||
|
//! Download %URI and extract title, description and full text.
|
||||||
|
const html_extract get();
|
||||||
|
|
||||||
|
//! Save %URI in archive and return archive-URI.
|
||||||
|
const string archive();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
string _uri;
|
||||||
|
|
||||||
|
//! Sets common curlpp options.
|
||||||
|
void set_curlpp_options(curlpp::Easy &request);
|
||||||
|
|
||||||
|
//! Extract the title from an HTML page.
|
||||||
|
const string extract_title(const string &html);
|
||||||
|
|
||||||
|
//! Extract the description from an HTML page.
|
||||||
|
const string extract_description(const string &html);
|
||||||
|
|
||||||
|
//! Removes HTML tags and superflous spaces from an HTML page.
|
||||||
|
const string strip_html(const string &html);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* @brief Remove HTML tags.
|
||||||
|
*
|
||||||
|
* @param html HTML page.
|
||||||
|
* @param tag If set, only remove this tag.
|
||||||
|
*/
|
||||||
|
const string remove_html_tags(const string &html,
|
||||||
|
const string &tag = "");
|
||||||
|
|
||||||
|
//! Convert HTML entities to UTF-8.
|
||||||
|
const string unescape_html(const string &html);
|
||||||
|
|
||||||
|
//! Replace newlines with spaces.
|
||||||
|
const string remove_newlines(string text);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // REMWHAREAD_URI_HPP
|
192
src/search.cpp
192
src/search.cpp
|
@ -1,192 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <regex>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <locale>
|
|
||||||
#include <unicode/unistr.h>
|
|
||||||
#include "search.hpp"
|
|
||||||
|
|
||||||
using std::regex;
|
|
||||||
using std::regex_search;
|
|
||||||
using std::smatch;
|
|
||||||
using std::find;
|
|
||||||
using std::find_if;
|
|
||||||
|
|
||||||
const vector<vector<string>> parse_expression(string expression)
|
|
||||||
{
|
|
||||||
vector<vector<string>> searchlist;
|
|
||||||
const regex re_or("(.+?) (OR|\\|\\|) ");
|
|
||||||
const regex re_and("(.+?) (AND|&&) ");
|
|
||||||
smatch match;
|
|
||||||
|
|
||||||
vector<string> subexpressions;
|
|
||||||
{ // Split expression at OR.
|
|
||||||
while (regex_search(expression, match, re_or))
|
|
||||||
{
|
|
||||||
subexpressions.push_back(match[1].str());
|
|
||||||
expression = match.suffix().str();
|
|
||||||
}
|
|
||||||
subexpressions.push_back(expression);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
for (string sub : subexpressions)
|
|
||||||
{ // Split each OR-slice at AND.
|
|
||||||
vector<string> terms;
|
|
||||||
while (regex_search(sub, match, re_and))
|
|
||||||
{
|
|
||||||
terms.push_back(to_lowercase(match[1].str()));
|
|
||||||
sub = match.suffix().str();
|
|
||||||
}
|
|
||||||
terms.push_back(to_lowercase(sub));
|
|
||||||
searchlist.push_back(terms);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return searchlist;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string to_lowercase(const string &str)
|
|
||||||
{
|
|
||||||
icu::UnicodeString uni(str.c_str());
|
|
||||||
string out;
|
|
||||||
uni.toLower().toUTF8String(out);
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
const vector<Database::entry>
|
|
||||||
search_tags(const vector<Database::entry> &entries, string expression,
|
|
||||||
const bool is_re)
|
|
||||||
{
|
|
||||||
vector<vector<string>> searchlist = parse_expression(expression);
|
|
||||||
vector<Database::entry> result;
|
|
||||||
|
|
||||||
for (const vector<string> &tags_or : searchlist)
|
|
||||||
{
|
|
||||||
for (const Database::entry &entry : entries)
|
|
||||||
{ // Add entry to result if all tags in an OR-slice match.
|
|
||||||
bool matched = true;
|
|
||||||
|
|
||||||
for (const string &tag : tags_or)
|
|
||||||
{
|
|
||||||
const auto it = find_if(entry.tags.begin(), entry.tags.end(),
|
|
||||||
[&tag, is_re](string s)
|
|
||||||
{
|
|
||||||
s = to_lowercase(s);
|
|
||||||
if (is_re)
|
|
||||||
{
|
|
||||||
const regex re("^" + tag + "$");
|
|
||||||
return regex_search(s, re);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return (s == tag);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (it == entry.tags.end())
|
|
||||||
{
|
|
||||||
matched = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (matched == true)
|
|
||||||
{
|
|
||||||
result.push_back(entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const vector<Database::entry>
|
|
||||||
search_all(const vector<Database::entry> &entries, string expression,
|
|
||||||
const bool is_re)
|
|
||||||
{
|
|
||||||
vector<vector<string>> searchlist = parse_expression(expression);
|
|
||||||
vector<Database::entry> result = search_tags(entries, expression, is_re);
|
|
||||||
|
|
||||||
for (const vector<string> &terms_or : searchlist)
|
|
||||||
{
|
|
||||||
for (const Database::entry &entry : entries)
|
|
||||||
{
|
|
||||||
// Add entry to result if all terms in an OR-slice match title,
|
|
||||||
// description or full text.
|
|
||||||
bool matched_title = true;
|
|
||||||
bool matched_description = true;
|
|
||||||
bool matched_fulltext = true;
|
|
||||||
|
|
||||||
const auto it = find(result.begin(), result.end(), entry);
|
|
||||||
if (it != result.end())
|
|
||||||
{ // Skip if already in result list.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const string &term : terms_or)
|
|
||||||
{
|
|
||||||
const string title = to_lowercase(entry.title);
|
|
||||||
const string description = to_lowercase(entry.description);
|
|
||||||
const string fulltext = to_lowercase(entry.fulltext);
|
|
||||||
|
|
||||||
// Set matched_* to false if term is not found.
|
|
||||||
if (is_re)
|
|
||||||
{
|
|
||||||
const regex re(term);
|
|
||||||
|
|
||||||
if(!regex_search(title, re))
|
|
||||||
{
|
|
||||||
matched_title = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!regex_search(description, re))
|
|
||||||
{
|
|
||||||
matched_description = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!regex_search(fulltext, re))
|
|
||||||
{
|
|
||||||
matched_fulltext = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (title.find(term) == string::npos)
|
|
||||||
{
|
|
||||||
matched_title = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (description.find(term) == string::npos)
|
|
||||||
{
|
|
||||||
matched_description = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fulltext.find(term) == string::npos)
|
|
||||||
{
|
|
||||||
matched_fulltext = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (matched_title == true
|
|
||||||
|| matched_description == true
|
|
||||||
|| matched_fulltext == true)
|
|
||||||
{
|
|
||||||
result.push_back(entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
|
@ -1,40 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REMWHAREAD_SEARCH_HPP
|
|
||||||
#define REMWHAREAD_SEARCH_HPP
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "sqlite.hpp"
|
|
||||||
|
|
||||||
using std::vector;
|
|
||||||
using std::string;
|
|
||||||
|
|
||||||
const vector<vector<string>> parse_expression(string expression);
|
|
||||||
const string to_lowercase(const string &str);
|
|
||||||
|
|
||||||
//! Seach database entries for tags.
|
|
||||||
const vector<Database::entry>
|
|
||||||
search_tags(const vector<Database::entry> &entries, string expression,
|
|
||||||
const bool is_re);
|
|
||||||
|
|
||||||
//! Search tags, title, description and full text.
|
|
||||||
const vector<Database::entry>
|
|
||||||
search_all(const vector<Database::entry> &entries, string expression,
|
|
||||||
const bool is_re);
|
|
||||||
|
|
||||||
#endif // REMWHAREAD_SEARCH_HPP
|
|
160
src/sqlite.cpp
160
src/sqlite.cpp
|
@ -1,160 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <exception>
|
|
||||||
#include <iostream>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <basedir.h>
|
|
||||||
#include <sqlite/execute.hpp>
|
|
||||||
#include <sqlite/query.hpp>
|
|
||||||
#include "time.hpp"
|
|
||||||
#include "sqlite.hpp"
|
|
||||||
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
|
|
||||||
Database::Database()
|
|
||||||
: _connected(false)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
xdgHandle xdg;
|
|
||||||
xdgInitHandle(&xdg);
|
|
||||||
_dbpath = xdgDataHome(&xdg) / fs::path("remwharead");
|
|
||||||
xdgWipeHandle(&xdg);
|
|
||||||
|
|
||||||
if (!fs::exists(_dbpath))
|
|
||||||
{
|
|
||||||
fs::create_directories(_dbpath);
|
|
||||||
}
|
|
||||||
_dbpath /= "database.sqlite";
|
|
||||||
|
|
||||||
_con = std::make_unique<sqlite::connection>(_dbpath);
|
|
||||||
sqlite::execute(*_con, "CREATE TABLE IF NOT EXISTS remwharead("
|
|
||||||
"uri TEXT, archive_uri TEXT, datetime TEXT, tags TEXT, "
|
|
||||||
"title TEXT, description TEXT, fulltext TEXT);", true);
|
|
||||||
|
|
||||||
_connected = true;
|
|
||||||
}
|
|
||||||
catch (std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Database::operator bool() const
|
|
||||||
{
|
|
||||||
return _connected;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator ==(const Database::entry &a, const Database::entry &b)
|
|
||||||
{
|
|
||||||
if (a.datetime == b.datetime)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string Database::entry::fulltext_oneline() const
|
|
||||||
{
|
|
||||||
string oneline = fulltext;
|
|
||||||
size_t pos = 0;
|
|
||||||
while ((pos = oneline.find('\n', pos)) != std::string::npos)
|
|
||||||
{
|
|
||||||
oneline.replace(pos, 1, "\\n");
|
|
||||||
}
|
|
||||||
return oneline;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Database::store(const Database::entry &data) const
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
const string strdatetime = timepoint_to_string(data.datetime, true);
|
|
||||||
string strtags;
|
|
||||||
for (const string &tag : data.tags)
|
|
||||||
{
|
|
||||||
strtags += tag;
|
|
||||||
if (tag != *(data.tags.rbegin()))
|
|
||||||
{
|
|
||||||
strtags += ",";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sqlite::execute ins(*_con, "INSERT INTO remwharead "
|
|
||||||
"VALUES(?, ?, ?, ?, ?, ?, ?);");
|
|
||||||
ins % data.uri % data.archive_uri % strdatetime % strtags
|
|
||||||
% data.title % data.description % data.fulltext;
|
|
||||||
ins();
|
|
||||||
}
|
|
||||||
catch (std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const vector<Database::entry> Database::retrieve(const time_point &start,
|
|
||||||
const time_point &end) const
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
const string query = "SELECT * FROM remwharead WHERE datetime "
|
|
||||||
"BETWEEN '" + timepoint_to_string(start, true)
|
|
||||||
+ "' AND '" + timepoint_to_string(end, true)
|
|
||||||
+ "' ORDER BY datetime DESC;";
|
|
||||||
|
|
||||||
sqlite::query q(*_con, query);
|
|
||||||
sqlite::result_type res = q.get_result();
|
|
||||||
vector<entry> entries;
|
|
||||||
|
|
||||||
while(res->next_row())
|
|
||||||
{
|
|
||||||
vector<string> tags;
|
|
||||||
const string strtags = res->get_string(3);
|
|
||||||
size_t pos = 0;
|
|
||||||
while (pos != std::string::npos)
|
|
||||||
{
|
|
||||||
const size_t newpos = strtags.find(',', pos);
|
|
||||||
tags.push_back(strtags.substr(pos, newpos - pos));
|
|
||||||
pos = newpos;
|
|
||||||
if (pos != std::string::npos)
|
|
||||||
{
|
|
||||||
++pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
entries.push_back
|
|
||||||
({
|
|
||||||
res->get_string(0),
|
|
||||||
res->get_string(1),
|
|
||||||
string_to_timepoint(res->get_string(2), true),
|
|
||||||
tags,
|
|
||||||
res->get_string(4),
|
|
||||||
res->get_string(5),
|
|
||||||
res->get_string(6)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return entries;
|
|
||||||
}
|
|
||||||
catch (std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {};
|
|
||||||
}
|
|
|
@ -1,71 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REMWHAREAD_SQLITE_HPP
|
|
||||||
#define REMWHAREAD_SQLITE_HPP
|
|
||||||
|
|
||||||
#include <experimental/filesystem>
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <chrono>
|
|
||||||
#include <sqlite/connection.hpp>
|
|
||||||
#include "types.hpp"
|
|
||||||
|
|
||||||
namespace fs = std::experimental::filesystem;
|
|
||||||
using std::string;
|
|
||||||
using std::vector;
|
|
||||||
using std::chrono::system_clock;
|
|
||||||
using time_point = system_clock::time_point;
|
|
||||||
|
|
||||||
class Database
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef struct entry
|
|
||||||
{
|
|
||||||
string uri;
|
|
||||||
string archive_uri;
|
|
||||||
time_point datetime;
|
|
||||||
vector<string> tags;
|
|
||||||
string title;
|
|
||||||
string description;
|
|
||||||
string fulltext;
|
|
||||||
|
|
||||||
//! Returns true if date & time are equal.
|
|
||||||
friend bool operator ==(const Database::entry &a,
|
|
||||||
const Database::entry &b);
|
|
||||||
//! The full text in one line.
|
|
||||||
const string fulltext_oneline() const;
|
|
||||||
} entry;
|
|
||||||
|
|
||||||
Database();
|
|
||||||
operator bool() const;
|
|
||||||
|
|
||||||
//! Store in database.
|
|
||||||
void store(const entry &data) const;
|
|
||||||
|
|
||||||
//! retrieve from database.
|
|
||||||
const vector<entry> retrieve(const time_point &start = time_point(),
|
|
||||||
const time_point &end = system_clock::now())
|
|
||||||
const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
fs::path _dbpath;
|
|
||||||
std::unique_ptr<sqlite::connection> _con;
|
|
||||||
bool _connected;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // REMWHAREAD_SQLITE_HPP
|
|
58
src/time.cpp
58
src/time.cpp
|
@ -1,58 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <ctime>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <sstream>
|
|
||||||
#include <cstdint>
|
|
||||||
#include "time.hpp"
|
|
||||||
|
|
||||||
const time_point string_to_timepoint(const string &strtime, bool sqlite)
|
|
||||||
{
|
|
||||||
std::stringstream sstime(strtime);
|
|
||||||
struct std::tm tm = {};
|
|
||||||
tm.tm_isdst = -1; // Detect daylight saving time.
|
|
||||||
if (sqlite)
|
|
||||||
{
|
|
||||||
sstime >> std::get_time(&tm, "%Y-%m-%d %T");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sstime >> std::get_time(&tm, "%Y-%m-%dT%T");
|
|
||||||
}
|
|
||||||
std::time_t time = timelocal(&tm); // Assume time is local.
|
|
||||||
return system_clock::from_time_t(time);
|
|
||||||
}
|
|
||||||
|
|
||||||
const string timepoint_to_string(const time_point &tp, bool sqlite)
|
|
||||||
{
|
|
||||||
constexpr std::uint16_t bufsize = 32;
|
|
||||||
std::time_t time = system_clock::to_time_t(tp);
|
|
||||||
std::tm *tm;
|
|
||||||
tm = std::localtime(&time);
|
|
||||||
|
|
||||||
char buffer[bufsize];
|
|
||||||
if (sqlite)
|
|
||||||
{
|
|
||||||
std::strftime(buffer, bufsize, "%F %T", tm);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::strftime(buffer, bufsize, "%FT%T", tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
return static_cast<const string>(buffer);
|
|
||||||
}
|
|
526
src/uri.cpp
526
src/uri.cpp
|
@ -1,526 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <sstream>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <iostream>
|
|
||||||
#include <regex>
|
|
||||||
#include <locale>
|
|
||||||
#include <codecvt>
|
|
||||||
#include <curlpp/cURLpp.hpp>
|
|
||||||
#include <curlpp/Options.hpp>
|
|
||||||
#include <curlpp/Exception.hpp>
|
|
||||||
#include <curlpp/Infos.hpp>
|
|
||||||
#include <version.hpp>
|
|
||||||
#include "uri.hpp"
|
|
||||||
|
|
||||||
namespace curlopts = curlpp::options;
|
|
||||||
using std::uint64_t;
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
using std::regex;
|
|
||||||
using std::regex_replace;
|
|
||||||
using std::regex_search;
|
|
||||||
using std::smatch;
|
|
||||||
using std::regex_constants::icase;
|
|
||||||
|
|
||||||
URI::URI(const string &uri)
|
|
||||||
:_uri(uri)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
const html_extract URI::get()
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
curlpp::Easy request;
|
|
||||||
set_curlpp_options(request);
|
|
||||||
request.setOpt<curlopts::Url>(_uri);
|
|
||||||
request.setOpt<curlopts::WriteStream>(&oss);
|
|
||||||
request.perform();
|
|
||||||
|
|
||||||
const string answer = oss.str();
|
|
||||||
if (answer.empty())
|
|
||||||
{
|
|
||||||
cerr << "Error: Could not download page. Response code: "
|
|
||||||
<< curlpp::infos::ResponseCode::get(request) << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return
|
|
||||||
{
|
|
||||||
extract_title(answer),
|
|
||||||
extract_description(answer),
|
|
||||||
strip_html(answer)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (const std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return { "", "", "" };
|
|
||||||
}
|
|
||||||
|
|
||||||
void URI::set_curlpp_options(curlpp::Easy &request)
|
|
||||||
{
|
|
||||||
request.setOpt<curlopts::UserAgent>(string("remwharead/")
|
|
||||||
+ global::version);
|
|
||||||
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
|
||||||
request.setOpt<curlopts::FollowLocation>(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::extract_title(const string &html)
|
|
||||||
{
|
|
||||||
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
|
|
||||||
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
|
|
||||||
{
|
|
||||||
smatch match;
|
|
||||||
regex_search(html, match, regex("<title>([^<]+)", icase));
|
|
||||||
return remove_newlines(unescape_html(match[1].str()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::extract_description(const string &html)
|
|
||||||
{
|
|
||||||
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
|
|
||||||
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
|
|
||||||
{
|
|
||||||
smatch match;
|
|
||||||
const regex re("description\"[^>]+content=\"([^\"]+)", icase);
|
|
||||||
regex_search(html, match, re);
|
|
||||||
return remove_newlines(strip_html(match[1].str()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::strip_html(const string &html)
|
|
||||||
{
|
|
||||||
string out;
|
|
||||||
|
|
||||||
out = remove_html_tags(html, "script"); // Remove JavaScript.
|
|
||||||
out = remove_html_tags(out, "style"); // Remove CSS.
|
|
||||||
out = remove_html_tags(out); // Remove tags.
|
|
||||||
|
|
||||||
size_t pos = 0;
|
|
||||||
while ((pos = out.find("\r", pos)) != std::string::npos) // Remove CR.
|
|
||||||
{
|
|
||||||
out.replace(pos, 1, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
out = regex_replace(out, regex("\\s+\n"), "\n"); // Remove trailing space.
|
|
||||||
out = regex_replace(out, regex("\n{2,}"), "\n"); // Reduce newlines.
|
|
||||||
|
|
||||||
return unescape_html(out);
|
|
||||||
}
|
|
||||||
const string URI::remove_html_tags(const string &html, const string &tag)
|
|
||||||
{
|
|
||||||
// NOTE: I did this with regex_replace before, but libstdc++ segfaulted.
|
|
||||||
string out;
|
|
||||||
if (tag.empty())
|
|
||||||
{
|
|
||||||
size_t pos = 0;
|
|
||||||
while (pos != std::string::npos)
|
|
||||||
{
|
|
||||||
size_t startpos = html.find('<', pos);
|
|
||||||
size_t endpos = html.find('>', startpos);
|
|
||||||
out += html.substr(pos, startpos - pos);
|
|
||||||
pos = endpos;
|
|
||||||
if (pos != std::string::npos)
|
|
||||||
{
|
|
||||||
++pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
size_t pos = 0;
|
|
||||||
out = html;
|
|
||||||
while ((pos = out.find("<" + tag)) != std::string::npos)
|
|
||||||
{
|
|
||||||
size_t endpos = out.find("</" + tag, pos);
|
|
||||||
if (endpos == std::string::npos)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
endpos += 3 + tag.length(); // tag + </ + >
|
|
||||||
out.replace(pos, endpos - pos, "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::unescape_html(const string &html)
|
|
||||||
{
|
|
||||||
string buffer = html;
|
|
||||||
string output;
|
|
||||||
|
|
||||||
// Used to convert int to utf-8 char.
|
|
||||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c;
|
|
||||||
regex re_entity("&#(x)?([[:alnum:]]{1,8});");
|
|
||||||
smatch match;
|
|
||||||
|
|
||||||
while (regex_search(buffer, match, re_entity))
|
|
||||||
{
|
|
||||||
char32_t codepoint = 0;
|
|
||||||
// 'x' in front of the number means it's hexadecimal, else decimal.
|
|
||||||
if (match[1].length() == 1)
|
|
||||||
{
|
|
||||||
codepoint = std::stoi(match[2].str(), nullptr, 16);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
codepoint = std::stoi(match[2].str(), nullptr, 10);
|
|
||||||
}
|
|
||||||
output += match.prefix().str() + u8c.to_bytes(codepoint);
|
|
||||||
buffer = match.suffix().str();
|
|
||||||
}
|
|
||||||
output += buffer;
|
|
||||||
|
|
||||||
// Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
|
|
||||||
// entity_references#Character_entity_references_in_HTML
|
|
||||||
const std::array<const std::pair<const string, const char32_t>, 258> names =
|
|
||||||
{{
|
|
||||||
{ "exclamation", 0x0021 },
|
|
||||||
{ "quot", 0x0022 },
|
|
||||||
{ "percent", 0x0025 },
|
|
||||||
{ "amp", 0x0026 },
|
|
||||||
{ "apos", 0x0027 },
|
|
||||||
{ "add", 0x002B },
|
|
||||||
{ "lt", 0x003C },
|
|
||||||
{ "equal", 0x003D },
|
|
||||||
{ "gt", 0x003E },
|
|
||||||
{ "nbsp", 0x00A0 },
|
|
||||||
{ "iexcl", 0x00A1 },
|
|
||||||
{ "cent", 0x00A2 },
|
|
||||||
{ "pound", 0x00A3 },
|
|
||||||
{ "curren", 0x00A4 },
|
|
||||||
{ "yen", 0x00A5 },
|
|
||||||
{ "brvbar", 0x00A6 },
|
|
||||||
{ "sect", 0x00A7 },
|
|
||||||
{ "uml", 0x00A8 },
|
|
||||||
{ "copy", 0x00A9 },
|
|
||||||
{ "ordf", 0x00AA },
|
|
||||||
{ "laquo", 0x00AB },
|
|
||||||
{ "not", 0x00AC },
|
|
||||||
{ "shy", 0x00AD },
|
|
||||||
{ "reg", 0x00AE },
|
|
||||||
{ "macr", 0x00AF },
|
|
||||||
{ "deg", 0x00B0 },
|
|
||||||
{ "plusmn", 0x00B1 },
|
|
||||||
{ "sup2", 0x00B2 },
|
|
||||||
{ "sup3", 0x00B3 },
|
|
||||||
{ "acute", 0x00B4 },
|
|
||||||
{ "micro", 0x00B5 },
|
|
||||||
{ "para", 0x00B6 },
|
|
||||||
{ "middot", 0x00B7 },
|
|
||||||
{ "cedil", 0x00B8 },
|
|
||||||
{ "sup1", 0x00B9 },
|
|
||||||
{ "ordm", 0x00BA },
|
|
||||||
{ "raquo", 0x00BB },
|
|
||||||
{ "frac14", 0x00BC },
|
|
||||||
{ "frac12", 0x00BD },
|
|
||||||
{ "frac34", 0x00BE },
|
|
||||||
{ "iquest", 0x00BF },
|
|
||||||
{ "Agrave", 0x00C0 },
|
|
||||||
{ "Aacute", 0x00C1 },
|
|
||||||
{ "Acirc", 0x00C2 },
|
|
||||||
{ "Atilde", 0x00C3 },
|
|
||||||
{ "Auml", 0x00C4 },
|
|
||||||
{ "Aring", 0x00C5 },
|
|
||||||
{ "AElig", 0x00C6 },
|
|
||||||
{ "Ccedil", 0x00C7 },
|
|
||||||
{ "Egrave", 0x00C8 },
|
|
||||||
{ "Eacute", 0x00C9 },
|
|
||||||
{ "Ecirc", 0x00CA },
|
|
||||||
{ "Euml", 0x00CB },
|
|
||||||
{ "Igrave", 0x00CC },
|
|
||||||
{ "Iacute", 0x00CD },
|
|
||||||
{ "Icirc", 0x00CE },
|
|
||||||
{ "Iuml", 0x00CF },
|
|
||||||
{ "ETH", 0x00D0 },
|
|
||||||
{ "Ntilde", 0x00D1 },
|
|
||||||
{ "Ograve", 0x00D2 },
|
|
||||||
{ "Oacute", 0x00D3 },
|
|
||||||
{ "Ocirc", 0x00D4 },
|
|
||||||
{ "Otilde", 0x00D5 },
|
|
||||||
{ "Ouml", 0x00D6 },
|
|
||||||
{ "times", 0x00D7 },
|
|
||||||
{ "Oslash", 0x00D8 },
|
|
||||||
{ "Ugrave", 0x00D9 },
|
|
||||||
{ "Uacute", 0x00DA },
|
|
||||||
{ "Ucirc", 0x00DB },
|
|
||||||
{ "Uuml", 0x00DC },
|
|
||||||
{ "Yacute", 0x00DD },
|
|
||||||
{ "THORN", 0x00DE },
|
|
||||||
{ "szlig", 0x00DF },
|
|
||||||
{ "agrave", 0x00E0 },
|
|
||||||
{ "aacute", 0x00E1 },
|
|
||||||
{ "acirc", 0x00E2 },
|
|
||||||
{ "atilde", 0x00E3 },
|
|
||||||
{ "auml", 0x00E4 },
|
|
||||||
{ "aring", 0x00E5 },
|
|
||||||
{ "aelig", 0x00E6 },
|
|
||||||
{ "ccedil", 0x00E7 },
|
|
||||||
{ "egrave", 0x00E8 },
|
|
||||||
{ "eacute", 0x00E9 },
|
|
||||||
{ "ecirc", 0x00EA },
|
|
||||||
{ "euml", 0x00EB },
|
|
||||||
{ "igrave", 0x00EC },
|
|
||||||
{ "iacute", 0x00ED },
|
|
||||||
{ "icirc", 0x00EE },
|
|
||||||
{ "iuml", 0x00EF },
|
|
||||||
{ "eth", 0x00F0 },
|
|
||||||
{ "ntilde", 0x00F1 },
|
|
||||||
{ "ograve", 0x00F2 },
|
|
||||||
{ "oacute", 0x00F3 },
|
|
||||||
{ "ocirc", 0x00F4 },
|
|
||||||
{ "otilde", 0x00F5 },
|
|
||||||
{ "ouml", 0x00F6 },
|
|
||||||
{ "divide", 0x00F7 },
|
|
||||||
{ "oslash", 0x00F8 },
|
|
||||||
{ "ugrave", 0x00F9 },
|
|
||||||
{ "uacute", 0x00FA },
|
|
||||||
{ "ucirc", 0x00FB },
|
|
||||||
{ "uuml", 0x00FC },
|
|
||||||
{ "yacute", 0x00FD },
|
|
||||||
{ "thorn", 0x00FE },
|
|
||||||
{ "yuml", 0x00FF },
|
|
||||||
{ "OElig", 0x0152 },
|
|
||||||
{ "oelig", 0x0153 },
|
|
||||||
{ "Scaron", 0x0160 },
|
|
||||||
{ "scaron", 0x0161 },
|
|
||||||
{ "Yuml", 0x0178 },
|
|
||||||
{ "fnof", 0x0192 },
|
|
||||||
{ "circ", 0x02C6 },
|
|
||||||
{ "tilde", 0x02DC },
|
|
||||||
{ "Alpha", 0x0391 },
|
|
||||||
{ "Beta", 0x0392 },
|
|
||||||
{ "Gamma", 0x0393 },
|
|
||||||
{ "Delta", 0x0394 },
|
|
||||||
{ "Epsilon", 0x0395 },
|
|
||||||
{ "Zeta", 0x0396 },
|
|
||||||
{ "Eta", 0x0397 },
|
|
||||||
{ "Theta", 0x0398 },
|
|
||||||
{ "Iota", 0x0399 },
|
|
||||||
{ "Kappa", 0x039A },
|
|
||||||
{ "Lambda", 0x039B },
|
|
||||||
{ "Mu", 0x039C },
|
|
||||||
{ "Nu", 0x039D },
|
|
||||||
{ "Xi", 0x039E },
|
|
||||||
{ "Omicron", 0x039F },
|
|
||||||
{ "Pi", 0x03A0 },
|
|
||||||
{ "Rho", 0x03A1 },
|
|
||||||
{ "Sigma", 0x03A3 },
|
|
||||||
{ "Tau", 0x03A4 },
|
|
||||||
{ "Upsilon", 0x03A5 },
|
|
||||||
{ "Phi", 0x03A6 },
|
|
||||||
{ "Chi", 0x03A7 },
|
|
||||||
{ "Psi", 0x03A8 },
|
|
||||||
{ "Omega", 0x03A9 },
|
|
||||||
{ "alpha", 0x03B1 },
|
|
||||||
{ "beta", 0x03B2 },
|
|
||||||
{ "gamma", 0x03B3 },
|
|
||||||
{ "delta", 0x03B4 },
|
|
||||||
{ "epsilon", 0x03B5 },
|
|
||||||
{ "zeta", 0x03B6 },
|
|
||||||
{ "eta", 0x03B7 },
|
|
||||||
{ "theta", 0x03B8 },
|
|
||||||
{ "iota", 0x03B9 },
|
|
||||||
{ "kappa", 0x03BA },
|
|
||||||
{ "lambda", 0x03BB },
|
|
||||||
{ "mu", 0x03BC },
|
|
||||||
{ "nu", 0x03BD },
|
|
||||||
{ "xi", 0x03BE },
|
|
||||||
{ "omicron", 0x03BF },
|
|
||||||
{ "pi", 0x03C0 },
|
|
||||||
{ "rho", 0x03C1 },
|
|
||||||
{ "sigmaf", 0x03C2 },
|
|
||||||
{ "sigma", 0x03C3 },
|
|
||||||
{ "tau", 0x03C4 },
|
|
||||||
{ "upsilon", 0x03C5 },
|
|
||||||
{ "phi", 0x03C6 },
|
|
||||||
{ "chi", 0x03C7 },
|
|
||||||
{ "psi", 0x03C8 },
|
|
||||||
{ "omega", 0x03C9 },
|
|
||||||
{ "thetasym", 0x03D1 },
|
|
||||||
{ "upsih", 0x03D2 },
|
|
||||||
{ "piv", 0x03D6 },
|
|
||||||
{ "ensp", 0x2002 },
|
|
||||||
{ "emsp", 0x2003 },
|
|
||||||
{ "thinsp", 0x2009 },
|
|
||||||
{ "zwnj", 0x200C },
|
|
||||||
{ "zwj", 0x200D },
|
|
||||||
{ "lrm", 0x200E },
|
|
||||||
{ "rlm", 0x200F },
|
|
||||||
{ "ndash", 0x2013 },
|
|
||||||
{ "mdash", 0x2014 },
|
|
||||||
{ "horbar", 0x2015 },
|
|
||||||
{ "lsquo", 0x2018 },
|
|
||||||
{ "rsquo", 0x2019 },
|
|
||||||
{ "sbquo", 0x201A },
|
|
||||||
{ "ldquo", 0x201C },
|
|
||||||
{ "rdquo", 0x201D },
|
|
||||||
{ "bdquo", 0x201E },
|
|
||||||
{ "dagger", 0x2020 },
|
|
||||||
{ "Dagger", 0x2021 },
|
|
||||||
{ "bull", 0x2022 },
|
|
||||||
{ "hellip", 0x2026 },
|
|
||||||
{ "permil", 0x2030 },
|
|
||||||
{ "prime", 0x2032 },
|
|
||||||
{ "Prime", 0x2033 },
|
|
||||||
{ "lsaquo", 0x2039 },
|
|
||||||
{ "rsaquo", 0x203A },
|
|
||||||
{ "oline", 0x203E },
|
|
||||||
{ "frasl", 0x2044 },
|
|
||||||
{ "euro", 0x20AC },
|
|
||||||
{ "image", 0x2111 },
|
|
||||||
{ "weierp", 0x2118 },
|
|
||||||
{ "real", 0x211C },
|
|
||||||
{ "trade", 0x2122 },
|
|
||||||
{ "alefsym", 0x2135 },
|
|
||||||
{ "larr", 0x2190 },
|
|
||||||
{ "uarr", 0x2191 },
|
|
||||||
{ "rarr", 0x2192 },
|
|
||||||
{ "darr", 0x2193 },
|
|
||||||
{ "harr", 0x2194 },
|
|
||||||
{ "crarr", 0x21B5 },
|
|
||||||
{ "lArr", 0x21D0 },
|
|
||||||
{ "uArr", 0x21D1 },
|
|
||||||
{ "rArr", 0x21D2 },
|
|
||||||
{ "dArr", 0x21D3 },
|
|
||||||
{ "hArr", 0x21D4 },
|
|
||||||
{ "forall", 0x2200 },
|
|
||||||
{ "part", 0x2202 },
|
|
||||||
{ "exist", 0x2203 },
|
|
||||||
{ "empty", 0x2205 },
|
|
||||||
{ "nabla", 0x2207 },
|
|
||||||
{ "isin", 0x2208 },
|
|
||||||
{ "notin", 0x2209 },
|
|
||||||
{ "ni", 0x220B },
|
|
||||||
{ "prod", 0x220F },
|
|
||||||
{ "sum", 0x2211 },
|
|
||||||
{ "minus", 0x2212 },
|
|
||||||
{ "lowast", 0x2217 },
|
|
||||||
{ "radic", 0x221A },
|
|
||||||
{ "prop", 0x221D },
|
|
||||||
{ "infin", 0x221E },
|
|
||||||
{ "ang", 0x2220 },
|
|
||||||
{ "and", 0x2227 },
|
|
||||||
{ "or", 0x2228 },
|
|
||||||
{ "cap", 0x2229 },
|
|
||||||
{ "cup", 0x222A },
|
|
||||||
{ "int", 0x222B },
|
|
||||||
{ "there4", 0x2234 },
|
|
||||||
{ "sim", 0x223C },
|
|
||||||
{ "cong", 0x2245 },
|
|
||||||
{ "asymp", 0x2248 },
|
|
||||||
{ "ne", 0x2260 },
|
|
||||||
{ "equiv", 0x2261 },
|
|
||||||
{ "le", 0x2264 },
|
|
||||||
{ "ge", 0x2265 },
|
|
||||||
{ "sub", 0x2282 },
|
|
||||||
{ "sup", 0x2283 },
|
|
||||||
{ "nsub", 0x2284 },
|
|
||||||
{ "sube", 0x2286 },
|
|
||||||
{ "supe", 0x2287 },
|
|
||||||
{ "oplus", 0x2295 },
|
|
||||||
{ "otimes", 0x2297 },
|
|
||||||
{ "perp", 0x22A5 },
|
|
||||||
{ "sdot", 0x22C5 },
|
|
||||||
{ "lceil", 0x2308 },
|
|
||||||
{ "rceil", 0x2309 },
|
|
||||||
{ "lfloor", 0x230A },
|
|
||||||
{ "rfloor", 0x230B },
|
|
||||||
{ "lang", 0x2329 },
|
|
||||||
{ "rang", 0x232A },
|
|
||||||
{ "loz", 0x25CA },
|
|
||||||
{ "spades", 0x2660 },
|
|
||||||
{ "clubs", 0x2663 },
|
|
||||||
{ "hearts", 0x2665 },
|
|
||||||
{ "diams", 0x2666 }
|
|
||||||
}};
|
|
||||||
|
|
||||||
for (auto &pair : names)
|
|
||||||
{
|
|
||||||
const regex re('&' + pair.first + ';');
|
|
||||||
output = regex_replace(output, re, u8c.to_bytes(pair.second));
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::archive()
|
|
||||||
{
|
|
||||||
if (_uri.substr(0, 4) != "http")
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
curlpp::Easy request;
|
|
||||||
set_curlpp_options(request);
|
|
||||||
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri);
|
|
||||||
request.setOpt<curlopts::WriteStream>(&oss);
|
|
||||||
request.setOpt<curlopts::NoBody>(true); // Make a HEAD request.
|
|
||||||
request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
|
|
||||||
request.perform();
|
|
||||||
|
|
||||||
smatch match;
|
|
||||||
const string answer = oss.str();
|
|
||||||
if (regex_search(answer, match, regex("Content-Location: (.+)\r\n")))
|
|
||||||
{
|
|
||||||
return "https://web.archive.org" + match[1].str();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cerr << "Error: Could not archive page. HTTP status: "
|
|
||||||
<< curlpp::infos::ResponseCode::get(request) << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (const std::exception &e)
|
|
||||||
{
|
|
||||||
cerr << "Error in " << __func__ << ": " << e.what() << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
const string URI::remove_newlines(string text)
|
|
||||||
{
|
|
||||||
size_t posn = 0;
|
|
||||||
while ((posn = text.find('\n', posn)) != std::string::npos)
|
|
||||||
{
|
|
||||||
text.replace(posn, 1, " ");
|
|
||||||
|
|
||||||
size_t posr = posn - 1;
|
|
||||||
if (text[posr] == '\r')
|
|
||||||
{
|
|
||||||
text.replace(posr, 1, " ");
|
|
||||||
}
|
|
||||||
++posn;
|
|
||||||
}
|
|
||||||
|
|
||||||
return text;
|
|
||||||
}
|
|
56
src/uri.hpp
56
src/uri.hpp
|
@ -1,56 +0,0 @@
|
||||||
/* This file is part of remwharead.
|
|
||||||
* Copyright © 2019 tastytea <tastytea@tastytea.de>
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, version 3.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REMWHAREAD_URI_HPP
|
|
||||||
#define REMWHAREAD_URI_HPP
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <curlpp/Easy.hpp>
|
|
||||||
|
|
||||||
using std::string;
|
|
||||||
|
|
||||||
typedef struct html_extract
|
|
||||||
{
|
|
||||||
string title;
|
|
||||||
string description;
|
|
||||||
string fulltext;
|
|
||||||
} html_extract;
|
|
||||||
|
|
||||||
class URI
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
explicit URI(const string &uri);
|
|
||||||
|
|
||||||
//! Download URI and extract title, description and full text.
|
|
||||||
const html_extract get();
|
|
||||||
//! Save URI in archive and return URI.
|
|
||||||
const string archive();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
string _uri;
|
|
||||||
|
|
||||||
void set_curlpp_options(curlpp::Easy &request);
|
|
||||||
const string extract_title(const string &html);
|
|
||||||
const string extract_description(const string &html);
|
|
||||||
const string strip_html(const string &html);
|
|
||||||
//! Remove all HTML tags. If tag is not empty, remove tag and its content.
|
|
||||||
const string remove_html_tags(const string &html, const string &tag = "");
|
|
||||||
const string unescape_html(const string &html);
|
|
||||||
//! Replace newlines with spaces.
|
|
||||||
const string remove_newlines(string text);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // REMWHAREAD_URI_HPP
|
|
|
@ -5,7 +5,8 @@ find_package(Catch2)
|
||||||
if(Catch2_FOUND) # Catch 2.x
|
if(Catch2_FOUND) # Catch 2.x
|
||||||
include(Catch)
|
include(Catch)
|
||||||
add_executable(all_tests main.cpp ${sources_tests})
|
add_executable(all_tests main.cpp ${sources_tests})
|
||||||
target_link_libraries(all_tests Catch2::Catch2 ${PROJECT_NAME}_testlib)
|
target_link_libraries(all_tests
|
||||||
|
Catch2::Catch2 ${PROJECT_NAME} ${PROJECT_NAME}_testlib)
|
||||||
target_include_directories(all_tests PRIVATE "/usr/include/catch2")
|
target_include_directories(all_tests PRIVATE "/usr/include/catch2")
|
||||||
catch_discover_tests(all_tests EXTRA_ARGS "${EXTRA_TEST_ARGS}")
|
catch_discover_tests(all_tests EXTRA_ARGS "${EXTRA_TEST_ARGS}")
|
||||||
else() # Catch 1.x
|
else() # Catch 1.x
|
||||||
|
@ -14,7 +15,7 @@ else() # Catch 1.x
|
||||||
foreach(src ${sources_tests})
|
foreach(src ${sources_tests})
|
||||||
get_filename_component(bin ${src} NAME_WE)
|
get_filename_component(bin ${src} NAME_WE)
|
||||||
add_executable(${bin} main.cpp ${src})
|
add_executable(${bin} main.cpp ${src})
|
||||||
target_link_libraries(${bin} ${PROJECT_NAME}_testlib)
|
target_link_libraries(${bin} ${PROJECT_NAME} ${PROJECT_NAME}_testlib)
|
||||||
add_test(${bin} ${bin} "${EXTRA_TEST_ARGS}")
|
add_test(${bin} ${bin} "${EXTRA_TEST_ARGS}")
|
||||||
endforeach()
|
endforeach()
|
||||||
else()
|
else()
|
||||||
|
|
|
@ -22,8 +22,9 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "adoc.hpp"
|
#include "export/adoc.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::regex;
|
using std::regex;
|
||||||
|
|
|
@ -21,8 +21,9 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "bookmarks.hpp"
|
#include "export/bookmarks.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::regex;
|
using std::regex;
|
||||||
|
|
|
@ -22,8 +22,9 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "csv.hpp"
|
#include "export/csv.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::regex;
|
using std::regex;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "parse_options.hpp"
|
#include "parse_options.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
|
@ -90,7 +91,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
|
|
||||||
THEN ("No exception is thrown")
|
THEN ("No exception is thrown")
|
||||||
AND_THEN ("status code is 0")
|
AND_THEN ("status code is 0")
|
||||||
AND_THEN ("options.file is empty")
|
AND_THEN ("Tag and URI are right")
|
||||||
{
|
{
|
||||||
REQUIRE_FALSE(exception);
|
REQUIRE_FALSE(exception);
|
||||||
REQUIRE(opts.status_code == 0);
|
REQUIRE(opts.status_code == 0);
|
||||||
|
@ -126,7 +127,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
|
|
||||||
THEN ("No exception is thrown")
|
THEN ("No exception is thrown")
|
||||||
AND_THEN ("status code is 0")
|
AND_THEN ("status code is 0")
|
||||||
AND_THEN ("options.file is empty")
|
AND_THEN ("Tag and URI are right")
|
||||||
{
|
{
|
||||||
REQUIRE_FALSE(exception);
|
REQUIRE_FALSE(exception);
|
||||||
REQUIRE(opts.status_code == 0);
|
REQUIRE(opts.status_code == 0);
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
|
@ -21,8 +21,9 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "simple.hpp"
|
#include "export/simple.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::regex;
|
using std::regex;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "time.hpp"
|
#include "time.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::chrono::system_clock;
|
using std::chrono::system_clock;
|
||||||
using std::chrono::seconds;
|
using std::chrono::seconds;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
|
|
||||||
SCENARIO ("Unicode is handled correctly")
|
SCENARIO ("Unicode is handled correctly")
|
||||||
{
|
{
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
#include "uri.hpp"
|
#include "uri.hpp"
|
||||||
|
|
||||||
|
using namespace remwharead;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
SCENARIO ("URI works correctly")
|
SCENARIO ("URI works correctly")
|
||||||
|
|
Loading…
Reference in New Issue