Renamed URL to URI.

This commit is contained in:
tastytea 2019-05-16 08:36:35 +02:00
parent 98743148f9
commit 914d22569f
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
7 changed files with 48 additions and 42 deletions

View File

@ -13,20 +13,20 @@ remwharead - Remember what you read, and when
== SYNOPSIS
*remwharead* [*-t* _tags_] _URL_
*remwharead* [*-t* _tags_] _URI_
*remwharead* *-e* _format_ [*-f* _file_] [*-s* _start_,_end_]
== DESCRIPTION
*remwharead* saves URLs of things you read in a database along with an URL to
*remwharead* saves URIs of things you read in a database along with an URI to
the archived version, the current date and time, title, description, the full
text of the page and optional tags.
== OPTIONS
*-t* _tags_, *--tags* _tags_::
Add tags to _URL_, delimited by commas.
Add tags to _URI_, delimited by commas.
*-e* _format_, *--export* _format_::
Export to _format_. Possible values are _csv_ and _asciidoc_. See _FORMATS_.

View File

@ -19,8 +19,8 @@
#include <chrono>
#include "sqlite.hpp"
#include "parse_options.hpp"
#include "url.hpp"
#include "csv.hpp"
#include "uri.hpp"
using std::cout;
using std::cerr;
@ -44,11 +44,11 @@ int main(const int argc, const char *argv[])
return 2;
}
if (!opts.url.empty())
if (!opts.uri.empty())
{
URL url(opts.url);
html_extract page = url.get();
db.store({opts.url, url.archive(), system_clock::now(), opts.tags,
URI uri(opts.uri);
html_extract page = uri.get();
db.store({opts.uri, uri.archive(), system_clock::now(), opts.tags,
page.title, page.description, page.fulltext});
}

View File

@ -44,7 +44,7 @@ const options parse_options(const int argc, const char *argv[])
{
popl::OptionParser op("Available options");
op.add<popl::Value<string>>
("t", "tags", "Add tags to URL, delimited by commas.", "", &tags);
("t", "tags", "Add tags to URI, delimited by commas.", "", &tags);
op.add<popl::Value<string>>
("e", "export", "Export to format.", "", &format);
op.add<popl::Value<string>>
@ -62,7 +62,7 @@ const options parse_options(const int argc, const char *argv[])
if (option_help->is_set())
{
cout << "Usage: " << argv[0] << " [-t tags] URL\n"
cout << "Usage: " << argv[0] << " [-t tags] URI\n"
<< " " << argv[0]
<< " -e format [-f file] [-s start,end]\n";
cout << op;
@ -155,12 +155,12 @@ const options parse_options(const int argc, const char *argv[])
if (op.non_option_args().size() > 0)
{
opts.url = op.non_option_args().front();
opts.uri = op.non_option_args().front();
}
if (opts.url == "" && opts.format == export_format::undefined)
if (opts.uri == "" && opts.format == export_format::undefined)
{
cerr << "Error: You have to specify either URL or --export.\n";
cerr << "Error: You have to specify either URI or --export.\n";
return options(1);
}
}

View File

@ -39,7 +39,7 @@ typedef struct options
string file;
array<time_point, 2> span = { time_point(), system_clock::now() };
sort_attribute sort = sort_attribute::both;
string url;
string uri;
uint8_t status_code = 0;
options();

View File

@ -26,7 +26,7 @@
#include <curlpp/Exception.hpp>
#include <curlpp/Infos.hpp>
#include <version.hpp>
#include "url.hpp"
#include "uri.hpp"
namespace curlopts = curlpp::options;
using std::uint64_t;
@ -37,12 +37,12 @@ using std::regex_replace;
using std::regex_search;
using std::smatch;
URL::URL(const string &url)
:_url(url)
URI::URI(const string &uri)
:_uri(uri)
{
}
const html_extract URL::get()
const html_extract URI::get()
{
try
{
@ -52,7 +52,7 @@ const html_extract URL::get()
+ global::version);
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
request.setOpt<curlopts::FollowLocation>(true);
request.setOpt<curlopts::Url>(_url);
request.setOpt<curlopts::Url>(_uri);
request.setOpt<curlopts::WriteStream>(&oss);
request.perform();
@ -80,21 +80,21 @@ const html_extract URL::get()
return { "", "", "" };
}
const string URL::extract_title(const string &html)
const string URI::extract_title(const string &html)
{
smatch match;
regex_search(html, match, regex("<title>([^<]+)"));
return match[1].str();
return remove_newlines(match[1].str());
}
const string URL::extract_description(const string &html)
const string URI::extract_description(const string &html)
{
smatch match;
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
return match[1].str();
return remove_newlines(match[1].str());
}
const string URL::strip_html(const string &html)
const string URI::strip_html(const string &html)
{
string out;
out = regex_replace(html, regex("<script[^<]+"), ""); // Remove JavaScript.
@ -107,7 +107,7 @@ const string URL::strip_html(const string &html)
return unescape_html(out);
}
const string URL::unescape_html(const string &html)
const string URI::unescape_html(const string &html)
{
string buffer = html;
string output;
@ -407,9 +407,9 @@ const string URL::unescape_html(const string &html)
return output;
}
const string URL::archive()
const string URI::archive()
{
if (_url.substr(0, 4) != "http")
if (_uri.substr(0, 4) != "http")
{
return "";
}
@ -422,7 +422,7 @@ const string URL::archive()
+ global::version);
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
request.setOpt<curlopts::FollowLocation>(true);
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _url);
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri);
request.setOpt<curlopts::WriteStream>(&oss);
request.setOpt<curlopts::NoBody>(true); // Make a HEAD request.
request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
@ -442,3 +442,8 @@ const string URL::archive()
return "";
}
const string URI::remove_newlines(const string &text)
{
return regex_replace(text, regex("\n"), " ");
}

View File

@ -14,8 +14,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef REMWHAREAD_URL_HPP
#define REMWHAREAD_URL_HPP
#ifndef REMWHAREAD_URI_HPP
#define REMWHAREAD_URI_HPP
#include <string>
@ -28,23 +28,24 @@ typedef struct html_extract
string fulltext;
} html_extract;
class URL
class URI
{
public:
explicit URL(const string &url);
explicit URI(const string &uri);
//! Download URL and extract title, description and full text.
//! Download URI and extract title, description and full text.
const html_extract get();
//! Save URL in archive and return URL.
//! Save URI in archive and return URI.
const string archive();
private:
string _url;
string _uri;
const string extract_title(const string &html);
const string extract_description(const string &html);
const string strip_html(const string &html);
const string unescape_html(const string &html);
const string remove_newlines(const string &text);
};
#endif // REMWHAREAD_URL_HPP
#endif // REMWHAREAD_URI_HPP

View File

@ -27,7 +27,7 @@ SCENARIO ("The option parser works correctly")
{
bool exception = false;
options opts;
const string url = "https://example.com/article.html";
const string uri = "https://example.com/article.html";
WHEN ("The options are --help --file test")
{
@ -75,12 +75,12 @@ SCENARIO ("The option parser works correctly")
}
}
WHEN ("The options are -t 💩 " + url)
WHEN ("The options are -t 💩 " + uri)
{
try
{
const char *argv[]
= { "remwharead", "-t", "💩", url.c_str() };
= { "remwharead", "-t", "💩", uri.c_str() };
opts = parse_options(4, argv);
}
catch (const std::exception &e)
@ -95,7 +95,7 @@ SCENARIO ("The option parser works correctly")
REQUIRE_FALSE(exception);
REQUIRE(opts.status_code == 0);
REQUIRE(opts.tags == vector<string>{ "💩" });
REQUIRE(opts.url == url);
REQUIRE(opts.uri == uri);
}
}
@ -115,7 +115,7 @@ SCENARIO ("The option parser works correctly")
"remwharead",
"-t",
tags.c_str(),
url.c_str()
uri.c_str()
};
opts = parse_options(4, argv);
}
@ -131,7 +131,7 @@ SCENARIO ("The option parser works correctly")
REQUIRE_FALSE(exception);
REQUIRE(opts.status_code == 0);
REQUIRE(opts.tags == vector<string>{ "tag1", longstring, "tag3" });
REQUIRE(opts.url == url);
REQUIRE(opts.uri == uri);
}
}
}