Documented URI.

This commit is contained in:
tastytea 2019-07-28 02:13:23 +02:00
parent 610173179d
commit 4991f7b006
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
1 changed files with 22 additions and 2 deletions

View File

@ -24,6 +24,7 @@ namespace remwharead
{ {
using std::string; using std::string;
//! A processed HTML page.
typedef struct html_extract typedef struct html_extract
{ {
string title; string title;
@ -31,27 +32,46 @@ namespace remwharead
string fulltext; string fulltext;
} html_extract; } html_extract;
//! Download, archive and process an URI.
class URI class URI
{ {
public: public:
//! Construct object and set URL.
explicit URI(const string &uri); explicit URI(const string &uri);
//! Download URI and extract title, description and full text. //! Download URI and extract title, description and full text.
const html_extract get(); const html_extract get();
//! Save URI in archive and return URI.
//! Save URI in archive and return archive-URI.
const string archive(); const string archive();
protected: protected:
string _uri; string _uri;
//! Sets common curlpp options.
void set_curlpp_options(curlpp::Easy &request); void set_curlpp_options(curlpp::Easy &request);
//! Extract the title from an HTML page.
const string extract_title(const string &html); const string extract_title(const string &html);
//! Extract the description from an HTML page.
const string extract_description(const string &html); const string extract_description(const string &html);
//! Removes HTML tags and superflous spaces from an HTML page.
const string strip_html(const string &html); const string strip_html(const string &html);
//! Remove all HTML tags. If tag is not empty, remove only this tag.
/*!
* @brief Remove HTML tags.
*
* @param html HTML page.
* @param tag If set, only remove this tag.
*/
const string remove_html_tags(const string &html, const string remove_html_tags(const string &html,
const string &tag = ""); const string &tag = "");
//! Convert HTML entities to UTF-8.
const string unescape_html(const string &html); const string unescape_html(const string &html);
//! Replace newlines with spaces. //! Replace newlines with spaces.
const string remove_newlines(string text); const string remove_newlines(string text);
}; };