Renamed URL to URI.
This commit is contained in:
parent
98743148f9
commit
914d22569f
|
@ -13,20 +13,20 @@ remwharead - Remember what you read, and when
|
||||||
|
|
||||||
== SYNOPSIS
|
== SYNOPSIS
|
||||||
|
|
||||||
*remwharead* [*-t* _tags_] _URL_
|
*remwharead* [*-t* _tags_] _URI_
|
||||||
|
|
||||||
*remwharead* *-e* _format_ [*-f* _file_] [*-s* _start_,_end_]
|
*remwharead* *-e* _format_ [*-f* _file_] [*-s* _start_,_end_]
|
||||||
|
|
||||||
== DESCRIPTION
|
== DESCRIPTION
|
||||||
|
|
||||||
*remwharead* saves URLs of things you read in a database along with an URL to
|
*remwharead* saves URIs of things you read in a database along with an URI to
|
||||||
the archived version, the current date and time, title, description, the full
|
the archived version, the current date and time, title, description, the full
|
||||||
text of the page and optional tags.
|
text of the page and optional tags.
|
||||||
|
|
||||||
== OPTIONS
|
== OPTIONS
|
||||||
|
|
||||||
*-t* _tags_, *--tags* _tags_::
|
*-t* _tags_, *--tags* _tags_::
|
||||||
Add tags to _URL_, delimited by commas.
|
Add tags to _URI_, delimited by commas.
|
||||||
|
|
||||||
*-e* _format_, *--export* _format_::
|
*-e* _format_, *--export* _format_::
|
||||||
Export to _format_. Possible values are _csv_ and _asciidoc_. See _FORMATS_.
|
Export to _format_. Possible values are _csv_ and _asciidoc_. See _FORMATS_.
|
||||||
|
|
10
src/main.cpp
10
src/main.cpp
|
@ -19,8 +19,8 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include "sqlite.hpp"
|
#include "sqlite.hpp"
|
||||||
#include "parse_options.hpp"
|
#include "parse_options.hpp"
|
||||||
#include "url.hpp"
|
|
||||||
#include "csv.hpp"
|
#include "csv.hpp"
|
||||||
|
#include "uri.hpp"
|
||||||
|
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::cerr;
|
using std::cerr;
|
||||||
|
@ -44,11 +44,11 @@ int main(const int argc, const char *argv[])
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!opts.url.empty())
|
if (!opts.uri.empty())
|
||||||
{
|
{
|
||||||
URL url(opts.url);
|
URI uri(opts.uri);
|
||||||
html_extract page = url.get();
|
html_extract page = uri.get();
|
||||||
db.store({opts.url, url.archive(), system_clock::now(), opts.tags,
|
db.store({opts.uri, uri.archive(), system_clock::now(), opts.tags,
|
||||||
page.title, page.description, page.fulltext});
|
page.title, page.description, page.fulltext});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ const options parse_options(const int argc, const char *argv[])
|
||||||
{
|
{
|
||||||
popl::OptionParser op("Available options");
|
popl::OptionParser op("Available options");
|
||||||
op.add<popl::Value<string>>
|
op.add<popl::Value<string>>
|
||||||
("t", "tags", "Add tags to URL, delimited by commas.", "", &tags);
|
("t", "tags", "Add tags to URI, delimited by commas.", "", &tags);
|
||||||
op.add<popl::Value<string>>
|
op.add<popl::Value<string>>
|
||||||
("e", "export", "Export to format.", "", &format);
|
("e", "export", "Export to format.", "", &format);
|
||||||
op.add<popl::Value<string>>
|
op.add<popl::Value<string>>
|
||||||
|
@ -62,7 +62,7 @@ const options parse_options(const int argc, const char *argv[])
|
||||||
|
|
||||||
if (option_help->is_set())
|
if (option_help->is_set())
|
||||||
{
|
{
|
||||||
cout << "Usage: " << argv[0] << " [-t tags] URL\n"
|
cout << "Usage: " << argv[0] << " [-t tags] URI\n"
|
||||||
<< " " << argv[0]
|
<< " " << argv[0]
|
||||||
<< " -e format [-f file] [-s start,end]\n";
|
<< " -e format [-f file] [-s start,end]\n";
|
||||||
cout << op;
|
cout << op;
|
||||||
|
@ -155,12 +155,12 @@ const options parse_options(const int argc, const char *argv[])
|
||||||
|
|
||||||
if (op.non_option_args().size() > 0)
|
if (op.non_option_args().size() > 0)
|
||||||
{
|
{
|
||||||
opts.url = op.non_option_args().front();
|
opts.uri = op.non_option_args().front();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opts.url == "" && opts.format == export_format::undefined)
|
if (opts.uri == "" && opts.format == export_format::undefined)
|
||||||
{
|
{
|
||||||
cerr << "Error: You have to specify either URL or --export.\n";
|
cerr << "Error: You have to specify either URI or --export.\n";
|
||||||
return options(1);
|
return options(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ typedef struct options
|
||||||
string file;
|
string file;
|
||||||
array<time_point, 2> span = { time_point(), system_clock::now() };
|
array<time_point, 2> span = { time_point(), system_clock::now() };
|
||||||
sort_attribute sort = sort_attribute::both;
|
sort_attribute sort = sort_attribute::both;
|
||||||
string url;
|
string uri;
|
||||||
uint8_t status_code = 0;
|
uint8_t status_code = 0;
|
||||||
|
|
||||||
options();
|
options();
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#include <curlpp/Exception.hpp>
|
#include <curlpp/Exception.hpp>
|
||||||
#include <curlpp/Infos.hpp>
|
#include <curlpp/Infos.hpp>
|
||||||
#include <version.hpp>
|
#include <version.hpp>
|
||||||
#include "url.hpp"
|
#include "uri.hpp"
|
||||||
|
|
||||||
namespace curlopts = curlpp::options;
|
namespace curlopts = curlpp::options;
|
||||||
using std::uint64_t;
|
using std::uint64_t;
|
||||||
|
@ -37,12 +37,12 @@ using std::regex_replace;
|
||||||
using std::regex_search;
|
using std::regex_search;
|
||||||
using std::smatch;
|
using std::smatch;
|
||||||
|
|
||||||
URL::URL(const string &url)
|
URI::URI(const string &uri)
|
||||||
:_url(url)
|
:_uri(uri)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
const html_extract URL::get()
|
const html_extract URI::get()
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -52,7 +52,7 @@ const html_extract URL::get()
|
||||||
+ global::version);
|
+ global::version);
|
||||||
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
||||||
request.setOpt<curlopts::FollowLocation>(true);
|
request.setOpt<curlopts::FollowLocation>(true);
|
||||||
request.setOpt<curlopts::Url>(_url);
|
request.setOpt<curlopts::Url>(_uri);
|
||||||
request.setOpt<curlopts::WriteStream>(&oss);
|
request.setOpt<curlopts::WriteStream>(&oss);
|
||||||
request.perform();
|
request.perform();
|
||||||
|
|
||||||
|
@ -80,21 +80,21 @@ const html_extract URL::get()
|
||||||
return { "", "", "" };
|
return { "", "", "" };
|
||||||
}
|
}
|
||||||
|
|
||||||
const string URL::extract_title(const string &html)
|
const string URI::extract_title(const string &html)
|
||||||
{
|
{
|
||||||
smatch match;
|
smatch match;
|
||||||
regex_search(html, match, regex("<title>([^<]+)"));
|
regex_search(html, match, regex("<title>([^<]+)"));
|
||||||
return match[1].str();
|
return remove_newlines(match[1].str());
|
||||||
}
|
}
|
||||||
|
|
||||||
const string URL::extract_description(const string &html)
|
const string URI::extract_description(const string &html)
|
||||||
{
|
{
|
||||||
smatch match;
|
smatch match;
|
||||||
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
|
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
|
||||||
return match[1].str();
|
return remove_newlines(match[1].str());
|
||||||
}
|
}
|
||||||
|
|
||||||
const string URL::strip_html(const string &html)
|
const string URI::strip_html(const string &html)
|
||||||
{
|
{
|
||||||
string out;
|
string out;
|
||||||
out = regex_replace(html, regex("<script[^<]+"), ""); // Remove JavaScript.
|
out = regex_replace(html, regex("<script[^<]+"), ""); // Remove JavaScript.
|
||||||
|
@ -107,7 +107,7 @@ const string URL::strip_html(const string &html)
|
||||||
return unescape_html(out);
|
return unescape_html(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
const string URL::unescape_html(const string &html)
|
const string URI::unescape_html(const string &html)
|
||||||
{
|
{
|
||||||
string buffer = html;
|
string buffer = html;
|
||||||
string output;
|
string output;
|
||||||
|
@ -407,9 +407,9 @@ const string URL::unescape_html(const string &html)
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
const string URL::archive()
|
const string URI::archive()
|
||||||
{
|
{
|
||||||
if (_url.substr(0, 4) != "http")
|
if (_uri.substr(0, 4) != "http")
|
||||||
{
|
{
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@ -422,7 +422,7 @@ const string URL::archive()
|
||||||
+ global::version);
|
+ global::version);
|
||||||
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
||||||
request.setOpt<curlopts::FollowLocation>(true);
|
request.setOpt<curlopts::FollowLocation>(true);
|
||||||
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _url);
|
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri);
|
||||||
request.setOpt<curlopts::WriteStream>(&oss);
|
request.setOpt<curlopts::WriteStream>(&oss);
|
||||||
request.setOpt<curlopts::NoBody>(true); // Make a HEAD request.
|
request.setOpt<curlopts::NoBody>(true); // Make a HEAD request.
|
||||||
request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
|
request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
|
||||||
|
@ -442,3 +442,8 @@ const string URL::archive()
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const string URI::remove_newlines(const string &text)
|
||||||
|
{
|
||||||
|
return regex_replace(text, regex("\n"), " ");
|
||||||
|
}
|
|
@ -14,8 +14,8 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef REMWHAREAD_URL_HPP
|
#ifndef REMWHAREAD_URI_HPP
|
||||||
#define REMWHAREAD_URL_HPP
|
#define REMWHAREAD_URI_HPP
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -28,23 +28,24 @@ typedef struct html_extract
|
||||||
string fulltext;
|
string fulltext;
|
||||||
} html_extract;
|
} html_extract;
|
||||||
|
|
||||||
class URL
|
class URI
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit URL(const string &url);
|
explicit URI(const string &uri);
|
||||||
|
|
||||||
//! Download URL and extract title, description and full text.
|
//! Download URI and extract title, description and full text.
|
||||||
const html_extract get();
|
const html_extract get();
|
||||||
//! Save URL in archive and return URL.
|
//! Save URI in archive and return URI.
|
||||||
const string archive();
|
const string archive();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string _url;
|
string _uri;
|
||||||
|
|
||||||
const string extract_title(const string &html);
|
const string extract_title(const string &html);
|
||||||
const string extract_description(const string &html);
|
const string extract_description(const string &html);
|
||||||
const string strip_html(const string &html);
|
const string strip_html(const string &html);
|
||||||
const string unescape_html(const string &html);
|
const string unescape_html(const string &html);
|
||||||
|
const string remove_newlines(const string &text);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // REMWHAREAD_URL_HPP
|
#endif // REMWHAREAD_URI_HPP
|
|
@ -27,7 +27,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
{
|
{
|
||||||
bool exception = false;
|
bool exception = false;
|
||||||
options opts;
|
options opts;
|
||||||
const string url = "https://example.com/article.html";
|
const string uri = "https://example.com/article.html";
|
||||||
|
|
||||||
WHEN ("The options are --help --file test")
|
WHEN ("The options are --help --file test")
|
||||||
{
|
{
|
||||||
|
@ -75,12 +75,12 @@ SCENARIO ("The option parser works correctly")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WHEN ("The options are -t 💩 " + url)
|
WHEN ("The options are -t 💩 " + uri)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
const char *argv[]
|
const char *argv[]
|
||||||
= { "remwharead", "-t", "💩", url.c_str() };
|
= { "remwharead", "-t", "💩", uri.c_str() };
|
||||||
opts = parse_options(4, argv);
|
opts = parse_options(4, argv);
|
||||||
}
|
}
|
||||||
catch (const std::exception &e)
|
catch (const std::exception &e)
|
||||||
|
@ -95,7 +95,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
REQUIRE_FALSE(exception);
|
REQUIRE_FALSE(exception);
|
||||||
REQUIRE(opts.status_code == 0);
|
REQUIRE(opts.status_code == 0);
|
||||||
REQUIRE(opts.tags == vector<string>{ "💩" });
|
REQUIRE(opts.tags == vector<string>{ "💩" });
|
||||||
REQUIRE(opts.url == url);
|
REQUIRE(opts.uri == uri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
"remwharead",
|
"remwharead",
|
||||||
"-t",
|
"-t",
|
||||||
tags.c_str(),
|
tags.c_str(),
|
||||||
url.c_str()
|
uri.c_str()
|
||||||
};
|
};
|
||||||
opts = parse_options(4, argv);
|
opts = parse_options(4, argv);
|
||||||
}
|
}
|
||||||
|
@ -131,7 +131,7 @@ SCENARIO ("The option parser works correctly")
|
||||||
REQUIRE_FALSE(exception);
|
REQUIRE_FALSE(exception);
|
||||||
REQUIRE(opts.status_code == 0);
|
REQUIRE(opts.status_code == 0);
|
||||||
REQUIRE(opts.tags == vector<string>{ "tag1", longstring, "tag3" });
|
REQUIRE(opts.tags == vector<string>{ "tag1", longstring, "tag3" });
|
||||||
REQUIRE(opts.url == url);
|
REQUIRE(opts.uri == uri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue