From 4991f7b0064016dde79ea46a1ffb9fc0eece7762 Mon Sep 17 00:00:00 2001 From: tastytea Date: Sun, 28 Jul 2019 02:13:23 +0200 Subject: [PATCH] Documented URI. --- src/lib/uri.hpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/lib/uri.hpp b/src/lib/uri.hpp index 593b5df..c1cf24e 100644 --- a/src/lib/uri.hpp +++ b/src/lib/uri.hpp @@ -24,6 +24,7 @@ namespace remwharead { using std::string; + //! A processed HTML page. typedef struct html_extract { string title; @@ -31,27 +32,46 @@ namespace remwharead string fulltext; } html_extract; + //! Download, archive and process an URI. class URI { public: + //! Construct object and set URL. explicit URI(const string &uri); //! Download URI and extract title, description and full text. const html_extract get(); - //! Save URI in archive and return URI. + + //! Save URI in archive and return archive-URI. const string archive(); protected: string _uri; + //! Sets common curlpp options. void set_curlpp_options(curlpp::Easy &request); + + //! Extract the title from an HTML page. const string extract_title(const string &html); + + //! Extract the description from an HTML page. const string extract_description(const string &html); + + //! Removes HTML tags and superflous spaces from an HTML page. const string strip_html(const string &html); - //! Remove all HTML tags. If tag is not empty, remove only this tag. + + /*! + * @brief Remove HTML tags. + * + * @param html HTML page. + * @param tag If set, only remove this tag. + */ const string remove_html_tags(const string &html, const string &tag = ""); + + //! Convert HTML entities to UTF-8. const string unescape_html(const string &html); + //! Replace newlines with spaces. const string remove_newlines(string text); };