Renamed URL to URI.
This commit is contained in:
parent
98743148f9
commit
914d22569f
|
@ -13,20 +13,20 @@ remwharead - Remember what you read, and when
|
|||
|
||||
== SYNOPSIS
|
||||
|
||||
*remwharead* [*-t* _tags_] _URL_
|
||||
*remwharead* [*-t* _tags_] _URI_
|
||||
|
||||
*remwharead* *-e* _format_ [*-f* _file_] [*-s* _start_,_end_]
|
||||
|
||||
== DESCRIPTION
|
||||
|
||||
*remwharead* saves URLs of things you read in a database along with an URL to
|
||||
*remwharead* saves URIs of things you read in a database along with an URI to
|
||||
the archived version, the current date and time, title, description, the full
|
||||
text of the page and optional tags.
|
||||
|
||||
== OPTIONS
|
||||
|
||||
*-t* _tags_, *--tags* _tags_::
|
||||
Add tags to _URL_, delimited by commas.
|
||||
Add tags to _URI_, delimited by commas.
|
||||
|
||||
*-e* _format_, *--export* _format_::
|
||||
Export to _format_. Possible values are _csv_ and _asciidoc_. See _FORMATS_.
|
||||
|
|
10
src/main.cpp
10
src/main.cpp
|
@ -19,8 +19,8 @@
|
|||
#include <chrono>
|
||||
#include "sqlite.hpp"
|
||||
#include "parse_options.hpp"
|
||||
#include "url.hpp"
|
||||
#include "csv.hpp"
|
||||
#include "uri.hpp"
|
||||
|
||||
using std::cout;
|
||||
using std::cerr;
|
||||
|
@ -44,11 +44,11 @@ int main(const int argc, const char *argv[])
|
|||
return 2;
|
||||
}
|
||||
|
||||
if (!opts.url.empty())
|
||||
if (!opts.uri.empty())
|
||||
{
|
||||
URL url(opts.url);
|
||||
html_extract page = url.get();
|
||||
db.store({opts.url, url.archive(), system_clock::now(), opts.tags,
|
||||
URI uri(opts.uri);
|
||||
html_extract page = uri.get();
|
||||
db.store({opts.uri, uri.archive(), system_clock::now(), opts.tags,
|
||||
page.title, page.description, page.fulltext});
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ const options parse_options(const int argc, const char *argv[])
|
|||
{
|
||||
popl::OptionParser op("Available options");
|
||||
op.add<popl::Value<string>>
|
||||
("t", "tags", "Add tags to URL, delimited by commas.", "", &tags);
|
||||
("t", "tags", "Add tags to URI, delimited by commas.", "", &tags);
|
||||
op.add<popl::Value<string>>
|
||||
("e", "export", "Export to format.", "", &format);
|
||||
op.add<popl::Value<string>>
|
||||
|
@ -62,7 +62,7 @@ const options parse_options(const int argc, const char *argv[])
|
|||
|
||||
if (option_help->is_set())
|
||||
{
|
||||
cout << "Usage: " << argv[0] << " [-t tags] URL\n"
|
||||
cout << "Usage: " << argv[0] << " [-t tags] URI\n"
|
||||
<< " " << argv[0]
|
||||
<< " -e format [-f file] [-s start,end]\n";
|
||||
cout << op;
|
||||
|
@ -155,12 +155,12 @@ const options parse_options(const int argc, const char *argv[])
|
|||
|
||||
if (op.non_option_args().size() > 0)
|
||||
{
|
||||
opts.url = op.non_option_args().front();
|
||||
opts.uri = op.non_option_args().front();
|
||||
}
|
||||
|
||||
if (opts.url == "" && opts.format == export_format::undefined)
|
||||
if (opts.uri == "" && opts.format == export_format::undefined)
|
||||
{
|
||||
cerr << "Error: You have to specify either URL or --export.\n";
|
||||
cerr << "Error: You have to specify either URI or --export.\n";
|
||||
return options(1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ typedef struct options
|
|||
string file;
|
||||
array<time_point, 2> span = { time_point(), system_clock::now() };
|
||||
sort_attribute sort = sort_attribute::both;
|
||||
string url;
|
||||
string uri;
|
||||
uint8_t status_code = 0;
|
||||
|
||||
options();
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include <curlpp/Exception.hpp>
|
||||
#include <curlpp/Infos.hpp>
|
||||
#include <version.hpp>
|
||||
#include "url.hpp"
|
||||
#include "uri.hpp"
|
||||
|
||||
namespace curlopts = curlpp::options;
|
||||
using std::uint64_t;
|
||||
|
@ -37,12 +37,12 @@ using std::regex_replace;
|
|||
using std::regex_search;
|
||||
using std::smatch;
|
||||
|
||||
URL::URL(const string &url)
|
||||
:_url(url)
|
||||
URI::URI(const string &uri)
|
||||
:_uri(uri)
|
||||
{
|
||||
}
|
||||
|
||||
const html_extract URL::get()
|
||||
const html_extract URI::get()
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -52,7 +52,7 @@ const html_extract URL::get()
|
|||
+ global::version);
|
||||
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
||||
request.setOpt<curlopts::FollowLocation>(true);
|
||||
request.setOpt<curlopts::Url>(_url);
|
||||
request.setOpt<curlopts::Url>(_uri);
|
||||
request.setOpt<curlopts::WriteStream>(&oss);
|
||||
request.perform();
|
||||
|
||||
|
@ -80,21 +80,21 @@ const html_extract URL::get()
|
|||
return { "", "", "" };
|
||||
}
|
||||
|
||||
const string URL::extract_title(const string &html)
|
||||
const string URI::extract_title(const string &html)
|
||||
{
|
||||
smatch match;
|
||||
regex_search(html, match, regex("<title>([^<]+)"));
|
||||
return match[1].str();
|
||||
return remove_newlines(match[1].str());
|
||||
}
|
||||
|
||||
const string URL::extract_description(const string &html)
|
||||
const string URI::extract_description(const string &html)
|
||||
{
|
||||
smatch match;
|
||||
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
|
||||
return match[1].str();
|
||||
return remove_newlines(match[1].str());
|
||||
}
|
||||
|
||||
const string URL::strip_html(const string &html)
|
||||
const string URI::strip_html(const string &html)
|
||||
{
|
||||
string out;
|
||||
out = regex_replace(html, regex("<script[^<]+"), ""); // Remove JavaScript.
|
||||
|
@ -107,7 +107,7 @@ const string URL::strip_html(const string &html)
|
|||
return unescape_html(out);
|
||||
}
|
||||
|
||||
const string URL::unescape_html(const string &html)
|
||||
const string URI::unescape_html(const string &html)
|
||||
{
|
||||
string buffer = html;
|
||||
string output;
|
||||
|
@ -407,9 +407,9 @@ const string URL::unescape_html(const string &html)
|
|||
return output;
|
||||
}
|
||||
|
||||
const string URL::archive()
|
||||
const string URI::archive()
|
||||
{
|
||||
if (_url.substr(0, 4) != "http")
|
||||
if (_uri.substr(0, 4) != "http")
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ const string URL::archive()
|
|||
+ global::version);
|
||||
request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
|
||||
request.setOpt<curlopts::FollowLocation>(true);
|
||||
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _url);
|
||||
request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri);
|
||||
request.setOpt<curlopts::WriteStream>(&oss);
|
||||
request.setOpt<curlopts::NoBody>(true); // Make a HEAD request.
|
||||
request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
|
||||
|
@ -442,3 +442,8 @@ const string URL::archive()
|
|||
|
||||
return "";
|
||||
}
|
||||
|
||||
const string URI::remove_newlines(const string &text)
|
||||
{
|
||||
return regex_replace(text, regex("\n"), " ");
|
||||
}
|
|
@ -14,8 +14,8 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef REMWHAREAD_URL_HPP
|
||||
#define REMWHAREAD_URL_HPP
|
||||
#ifndef REMWHAREAD_URI_HPP
|
||||
#define REMWHAREAD_URI_HPP
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -28,23 +28,24 @@ typedef struct html_extract
|
|||
string fulltext;
|
||||
} html_extract;
|
||||
|
||||
class URL
|
||||
class URI
|
||||
{
|
||||
public:
|
||||
explicit URL(const string &url);
|
||||
explicit URI(const string &uri);
|
||||
|
||||
//! Download URL and extract title, description and full text.
|
||||
//! Download URI and extract title, description and full text.
|
||||
const html_extract get();
|
||||
//! Save URL in archive and return URL.
|
||||
//! Save URI in archive and return URI.
|
||||
const string archive();
|
||||
|
||||
private:
|
||||
string _url;
|
||||
string _uri;
|
||||
|
||||
const string extract_title(const string &html);
|
||||
const string extract_description(const string &html);
|
||||
const string strip_html(const string &html);
|
||||
const string unescape_html(const string &html);
|
||||
const string remove_newlines(const string &text);
|
||||
};
|
||||
|
||||
#endif // REMWHAREAD_URL_HPP
|
||||
#endif // REMWHAREAD_URI_HPP
|
|
@ -27,7 +27,7 @@ SCENARIO ("The option parser works correctly")
|
|||
{
|
||||
bool exception = false;
|
||||
options opts;
|
||||
const string url = "https://example.com/article.html";
|
||||
const string uri = "https://example.com/article.html";
|
||||
|
||||
WHEN ("The options are --help --file test")
|
||||
{
|
||||
|
@ -75,12 +75,12 @@ SCENARIO ("The option parser works correctly")
|
|||
}
|
||||
}
|
||||
|
||||
WHEN ("The options are -t 💩 " + url)
|
||||
WHEN ("The options are -t 💩 " + uri)
|
||||
{
|
||||
try
|
||||
{
|
||||
const char *argv[]
|
||||
= { "remwharead", "-t", "💩", url.c_str() };
|
||||
= { "remwharead", "-t", "💩", uri.c_str() };
|
||||
opts = parse_options(4, argv);
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
|
@ -95,7 +95,7 @@ SCENARIO ("The option parser works correctly")
|
|||
REQUIRE_FALSE(exception);
|
||||
REQUIRE(opts.status_code == 0);
|
||||
REQUIRE(opts.tags == vector<string>{ "💩" });
|
||||
REQUIRE(opts.url == url);
|
||||
REQUIRE(opts.uri == uri);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ SCENARIO ("The option parser works correctly")
|
|||
"remwharead",
|
||||
"-t",
|
||||
tags.c_str(),
|
||||
url.c_str()
|
||||
uri.c_str()
|
||||
};
|
||||
opts = parse_options(4, argv);
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ SCENARIO ("The option parser works correctly")
|
|||
REQUIRE_FALSE(exception);
|
||||
REQUIRE(opts.status_code == 0);
|
||||
REQUIRE(opts.tags == vector<string>{ "tag1", longstring, "tag3" });
|
||||
REQUIRE(opts.url == url);
|
||||
REQUIRE(opts.uri == uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue