Renamed URL to URI.

2019-05-16 08:36:35 +02:00 · 2019-05-16 08:36:35 +02:00 · 914d22569f
parent 98743148f9
commit 914d22569f
7 changed files with 48 additions and 42 deletions
--- a/remwharead.1.adoc
+++ b/remwharead.1.adoc
@ -13,20 +13,20 @@ remwharead - Remember what you read, and when

 == SYNOPSIS

-*remwharead* [*-t* _tags_] _URL_
+*remwharead* [*-t* _tags_] _URI_

 *remwharead* *-e* _format_ [*-f* _file_] [*-s* _start_,_end_]

 == DESCRIPTION

-*remwharead* saves URLs of things you read in a database along with an URL to
+*remwharead* saves URIs of things you read in a database along with an URI to
 the archived version, the current date and time, title, description, the full
 text of the page and optional tags.

 == OPTIONS

 *-t* _tags_, *--tags* _tags_::
-Add tags to _URL_, delimited by commas.
+Add tags to _URI_, delimited by commas.

 *-e* _format_, *--export* _format_::
 Export to _format_. Possible values are _csv_ and _asciidoc_. See _FORMATS_.
--- a/src/main.cpp
+++ b/src/main.cpp
@ -19,8 +19,8 @@
 #include <chrono>
 #include "sqlite.hpp"
 #include "parse_options.hpp"
-#include "url.hpp"
 #include "csv.hpp"
+#include "uri.hpp"

 using std::cout;
 using std::cerr;
@ -44,11 +44,11 @@ int main(const int argc, const char *argv[])
        return 2;
    }

-    if (!opts.url.empty())
+    if (!opts.uri.empty())
    {
-        URL url(opts.url);
-        html_extract page = url.get();
-        db.store({opts.url, url.archive(), system_clock::now(), opts.tags,
+        URI uri(opts.uri);
+        html_extract page = uri.get();
+        db.store({opts.uri, uri.archive(), system_clock::now(), opts.tags,
                  page.title, page.description, page.fulltext});
    }

--- a/src/parse_options.cpp
+++ b/src/parse_options.cpp
@ -44,7 +44,7 @@ const options parse_options(const int argc, const char *argv[])
    {
        popl::OptionParser op("Available options");
        op.add<popl::Value<string>>
-            ("t", "tags", "Add tags to URL, delimited by commas.", "", &tags);
+            ("t", "tags", "Add tags to URI, delimited by commas.", "", &tags);
        op.add<popl::Value<string>>
            ("e", "export", "Export to format.", "", &format);
        op.add<popl::Value<string>>
@ -62,7 +62,7 @@ const options parse_options(const int argc, const char *argv[])

        if (option_help->is_set())
        {
-            cout << "Usage: " << argv[0] << " [-t tags] URL\n"
+            cout << "Usage: " << argv[0] << " [-t tags] URI\n"
                 << "       " << argv[0]
                 << " -e format [-f file] [-s start,end]\n";
            cout << op;
@ -155,12 +155,12 @@ const options parse_options(const int argc, const char *argv[])

        if (op.non_option_args().size() > 0)
        {
-            opts.url = op.non_option_args().front();
+            opts.uri = op.non_option_args().front();
        }

-        if (opts.url == "" && opts.format == export_format::undefined)
+        if (opts.uri == "" && opts.format == export_format::undefined)
        {
-            cerr << "Error: You have to specify either URL or --export.\n";
+            cerr << "Error: You have to specify either URI or --export.\n";
            return options(1);
        }
    }
--- a/src/parse_options.hpp
+++ b/src/parse_options.hpp
@ -39,7 +39,7 @@ typedef struct options
    string file;
    array<time_point, 2> span = { time_point(), system_clock::now() };
    sort_attribute sort = sort_attribute::both;
-    string url;
+    string uri;
    uint8_t status_code = 0;

    options();
--- a/src/uri.cpp
+++ b/src/uri.cpp
@ -26,7 +26,7 @@
 #include <curlpp/Exception.hpp>
 #include <curlpp/Infos.hpp>
 #include <version.hpp>
-#include "url.hpp"
+#include "uri.hpp"

 namespace curlopts = curlpp::options;
 using std::uint64_t;
@ -37,12 +37,12 @@ using std::regex_replace;
 using std::regex_search;
 using std::smatch;

-URL::URL(const string &url)
-    :_url(url)
+URI::URI(const string &uri)
+    :_uri(uri)
 {
 }

-const html_extract URL::get()
+const html_extract URI::get()
 {
    try
    {
@ -52,7 +52,7 @@ const html_extract URL::get()
                                            + global::version);
        request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
        request.setOpt<curlopts::FollowLocation>(true);
-        request.setOpt<curlopts::Url>(_url);
+        request.setOpt<curlopts::Url>(_uri);
        request.setOpt<curlopts::WriteStream>(&oss);
        request.perform();

@ -80,21 +80,21 @@ const html_extract URL::get()
    return { "", "", "" };
 }

-const string URL::extract_title(const string &html)
+const string URI::extract_title(const string &html)
 {
    smatch match;
    regex_search(html, match, regex("<title>([^<]+)"));
-    return match[1].str();
+    return remove_newlines(match[1].str());
 }

-const string URL::extract_description(const string &html)
+const string URI::extract_description(const string &html)
 {
    smatch match;
    regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
-    return match[1].str();
+    return remove_newlines(match[1].str());
 }

-const string URL::strip_html(const string &html)
+const string URI::strip_html(const string &html)
 {
    string out;
    out = regex_replace(html, regex("<script[^<]+"), ""); // Remove JavaScript.
@ -107,7 +107,7 @@ const string URL::strip_html(const string &html)
    return unescape_html(out);
 }

-const string URL::unescape_html(const string &html)
+const string URI::unescape_html(const string &html)
 {
    string buffer = html;
    string output;
@ -407,9 +407,9 @@ const string URL::unescape_html(const string &html)
    return output;
 }

-const string URL::archive()
+const string URI::archive()
 {
-    if (_url.substr(0, 4) != "http")
+    if (_uri.substr(0, 4) != "http")
    {
        return "";
    }
@ -422,7 +422,7 @@ const string URL::archive()
                                            + global::version);
        request.setOpt<curlopts::HttpHeader>({ "Connection: close" });
        request.setOpt<curlopts::FollowLocation>(true);
-        request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _url);
+        request.setOpt<curlopts::Url>("https://web.archive.org/save/" + _uri);
        request.setOpt<curlopts::WriteStream>(&oss);
        request.setOpt<curlopts::NoBody>(true);        // Make a HEAD request.
        request.setOpt<curlpp::options::Header>(true); // Save headers in oss.
@ -442,3 +442,8 @@ const string URL::archive()

    return "";
 }
+
+const string URI::remove_newlines(const string &text)
+{
+    return regex_replace(text, regex("\n"), " ");
+}
--- a/src/uri.hpp
+++ b/src/uri.hpp
@ -14,8 +14,8 @@
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef REMWHAREAD_URL_HPP
-#define REMWHAREAD_URL_HPP
+#ifndef REMWHAREAD_URI_HPP
+#define REMWHAREAD_URI_HPP

 #include <string>

@ -28,23 +28,24 @@ typedef struct html_extract
    string fulltext;
 } html_extract;

-class URL
+class URI
 {
 public:
-    explicit URL(const string &url);
+    explicit URI(const string &uri);

-    //! Download URL and extract title, description and full text.
+    //! Download URI and extract title, description and full text.
    const html_extract get();
-    //! Save URL in archive and return URL.
+    //! Save URI in archive and return URI.
    const string archive();

 private:
-    string _url;
+    string _uri;

    const string extract_title(const string &html);
    const string extract_description(const string &html);
    const string strip_html(const string &html);
    const string unescape_html(const string &html);
+    const string remove_newlines(const string &text);
 };

-#endif  // REMWHAREAD_URL_HPP
+#endif  // REMWHAREAD_URI_HPP
--- a/tests/test_parse_options.cpp
+++ b/tests/test_parse_options.cpp
@ -27,7 +27,7 @@ SCENARIO ("The option parser works correctly")
 {
    bool exception = false;
    options opts;
-    const string url = "https://example.com/article.html";
+    const string uri = "https://example.com/article.html";

    WHEN ("The options are --help --file test")
    {
@ -75,12 +75,12 @@ SCENARIO ("The option parser works correctly")
        }
    }

-    WHEN ("The options are -t 💩 " + url)
+    WHEN ("The options are -t 💩 " + uri)
    {
        try
        {
            const char *argv[]
-                = { "remwharead", "-t", "💩", url.c_str() };
+                = { "remwharead", "-t", "💩", uri.c_str() };
            opts = parse_options(4, argv);
        }
        catch (const std::exception &e)
@ -95,7 +95,7 @@ SCENARIO ("The option parser works correctly")
            REQUIRE_FALSE(exception);
            REQUIRE(opts.status_code == 0);
            REQUIRE(opts.tags == vector<string>{ "💩" });
-            REQUIRE(opts.url == url);
+            REQUIRE(opts.uri == uri);
        }
    }

@ -115,7 +115,7 @@ SCENARIO ("The option parser works correctly")
                    "remwharead",
                    "-t",
                    tags.c_str(),
-                    url.c_str()
+                    uri.c_str()
                };
            opts = parse_options(4, argv);
        }
@ -131,7 +131,7 @@ SCENARIO ("The option parser works correctly")
            REQUIRE_FALSE(exception);
            REQUIRE(opts.status_code == 0);
            REQUIRE(opts.tags == vector<string>{ "tag1", longstring, "tag3" });
-            REQUIRE(opts.url == url);
+            REQUIRE(opts.uri == uri);
        }
    }
 }