Documented URI.

2019-07-28 02:13:23 +02:00 · 2019-07-28 02:13:23 +02:00 · 4991f7b006
parent 610173179d
commit 4991f7b006
1 changed files with 22 additions and 2 deletions
--- a/src/lib/uri.hpp
+++ b/src/lib/uri.hpp
@ -24,6 +24,7 @@ namespace remwharead
 {
    using std::string;

+    //! A processed HTML page.
    typedef struct html_extract
    {
        string title;
@ -31,27 +32,46 @@ namespace remwharead
        string fulltext;
    } html_extract;

+    //! Download, archive and process an URI.
    class URI
    {
    public:
+        //! Construct object and set URL.
        explicit URI(const string &uri);

        //! Download URI and extract title, description and full text.
        const html_extract get();
-        //! Save URI in archive and return URI.
+
+        //! Save URI in archive and return archive-URI.
        const string archive();

    protected:
        string _uri;

+        //! Sets common curlpp options.
        void set_curlpp_options(curlpp::Easy &request);
+
+        //! Extract the title from an HTML page.
        const string extract_title(const string &html);
+
+        //! Extract the description from an HTML page.
        const string extract_description(const string &html);
+
+        //! Removes HTML tags and superflous spaces from an HTML page.
        const string strip_html(const string &html);
-        //! Remove all HTML tags. If tag is not empty, remove only this tag.
+
+        /*!
+         *  @brief  Remove HTML tags.
+         *
+         *  @param  html HTML page.
+         *  @param  tag  If set, only remove this tag.
+         */
        const string remove_html_tags(const string &html,
                                      const string &tag = "");
+
+        //! Convert HTML entities to UTF-8.
        const string unescape_html(const string &html);
+
        //! Replace newlines with spaces.
        const string remove_newlines(string text);
    };