17 #ifndef REMWHAREAD_URI_HPP 18 #define REMWHAREAD_URI_HPP 21 #include <curlpp/Easy.hpp> 40 explicit URI(
const string &uri);
70 const string &tag =
"");
80 #endif // REMWHAREAD_URI_HPP
URI(const string &uri)
Construct object and set URL.
Definition: uri.cpp:43
const string remove_html_tags(const string &html, const string &tag="")
Remove HTML tags.
Definition: uri.cpp:137
Definition: search.cpp:23
const string strip_html(const string &html)
Removes HTML tags and superflous spaces from an HTML page.
Definition: uri.cpp:118
const string remove_newlines(string text)
Replace newlines with spaces.
Definition: uri.cpp:514
const string unescape_html(const string &html)
Convert HTML entities to UTF-8.
Definition: uri.cpp:175
void set_curlpp_options(curlpp::Easy &request)
Sets common curlpp options.
Definition: uri.cpp:83
const string extract_title(const string &html)
Extract the title from an HTML page.
Definition: uri.cpp:91
Download, archive and process an URI.
Definition: uri.hpp:36
const string archive()
Save URI in archive and return archive-URI.
Definition: uri.cpp:475
const string extract_description(const string &html)
Extract the description from an HTML page.
Definition: uri.cpp:104