2019-08-08 22:13:06 +02:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2019-07-27 22:46:58 +02:00
< html xmlns = "http://www.w3.org/1999/xhtml" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=9" / >
2019-08-08 22:13:06 +02:00
< meta name = "generator" content = "Doxygen 1.8.15" / >
2019-07-27 22:46:58 +02:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > remwharead: remwharead::URI Class Reference< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr style = "height: 56px;" >
< td id = "projectalign" style = "padding-left: 0.5em;" >
< div id = "projectname" > remwharead
2019-09-20 21:35:37 +02:00
  < span id = "projectnumber" > 0.8.1< / span >
2019-07-27 22:46:58 +02:00
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
2019-08-08 22:13:06 +02:00
<!-- Generated by Doxygen 1.8.15 -->
2019-07-27 22:46:58 +02:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3& dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
< / script >
< script type = "text/javascript" src = "menudata.js" > < / script >
< script type = "text/javascript" src = "menu.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3& dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
/* @license-end */< / script >
< div id = "main-nav" > < / div >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< iframe src = "javascript:void(0)" frameborder = "0"
name="MSearchResults" id="MSearchResults">
< / iframe >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < b > remwharead< / b > < / li > < li class = "navelem" > < a class = "el" href = "classremwharead_1_1URI.html" > URI< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div class = "header" >
< div class = "summary" >
< a href = "#pub-methods" > Public Member Functions< / a > |
< a href = "#pro-methods" > Protected Member Functions< / a > |
< a href = "#pro-attribs" > Protected Attributes< / a > |
< a href = "classremwharead_1_1URI-members.html" > List of all members< / a > < / div >
< div class = "headertitle" >
< div class = "title" > remwharead::URI Class Reference< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
2019-07-28 02:30:30 +02:00
< p > Download, archive and process an URI.
< a href = "classremwharead_1_1URI.html#details" > More...< / a > < / p >
2019-08-05 23:28:43 +02:00
< p > < code > #include < < a class = "el" href = "uri_8hpp_source.html" > remwharead/uri.hpp< / a > > < / code > < / p >
2019-07-27 22:46:58 +02:00
< table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a name = "pub-methods" > < / a >
Public Member Functions< / h2 > < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:acda508768b1fd3b4df81ea66dd4fab87" > < td class = "memItemLeft" align = "right" valign = "top" >   < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#acda508768b1fd3b4df81ea66dd4fab87" > URI< / a > (const string & uri)< / td > < / tr >
< tr class = "memdesc:acda508768b1fd3b4df81ea66dd4fab87" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Construct object and set URL. < a href = "#acda508768b1fd3b4df81ea66dd4fab87" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:acda508768b1fd3b4df81ea66dd4fab87" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:a8d6ac084a823749ed38c12e7bf8f3461" > < td class = "memItemLeft" align = "right" valign = "top" > const < a class = "el" href = "structremwharead_1_1html__extract.html" > html_extract< / a >   < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a8d6ac084a823749ed38c12e7bf8f3461" > get< / a > ()< / td > < / tr >
< tr class = "memdesc:a8d6ac084a823749ed38c12e7bf8f3461" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Download URI and extract title, description and full text. < a href = "#a8d6ac084a823749ed38c12e7bf8f3461" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:a8d6ac084a823749ed38c12e7bf8f3461" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-08-08 11:28:50 +02:00
< tr class = "memitem:a4252486ed37ea89083b87dbdb0763e45" > < td class = "memItemLeft" align = "right" valign = "top" > const < a class = "el" href = "structremwharead_1_1archive__answer.html" > archive_answer< / a >   < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a4252486ed37ea89083b87dbdb0763e45" > archive< / a > ()< / td > < / tr >
< tr class = "memdesc:a4252486ed37ea89083b87dbdb0763e45" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Save URI in archive and return archive-URI. < a href = "#a4252486ed37ea89083b87dbdb0763e45" > More...< / a > < br / > < / td > < / tr >
< tr class = "separator:a4252486ed37ea89083b87dbdb0763e45" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-27 22:46:58 +02:00
< / table > < table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a name = "pro-methods" > < / a >
Protected Member Functions< / h2 > < / td > < / tr >
2019-08-08 11:28:50 +02:00
< tr class = "memitem:a9f4e1777bfbff72f098d7e1a7623a3c5" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a9f4e1777bfbff72f098d7e1a7623a3c5" > make_request< / a > (const string & uri, bool < a class = "el" href = "classremwharead_1_1URI.html#a4252486ed37ea89083b87dbdb0763e45" > archive< / a > =false) const< / td > < / tr >
< tr class = "memdesc:a9f4e1777bfbff72f098d7e1a7623a3c5" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Make a HTTP(S) request. < a href = "#a9f4e1777bfbff72f098d7e1a7623a3c5" > More...< / a > < br / > < / td > < / tr >
< tr class = "separator:a9f4e1777bfbff72f098d7e1a7623a3c5" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:a37f93c46371d9b3753ae04bd2ef2c362" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a37f93c46371d9b3753ae04bd2ef2c362" > extract_title< / a > (const string & html)< / td > < / tr >
< tr class = "memdesc:a37f93c46371d9b3753ae04bd2ef2c362" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Extract the title from an HTML page. < a href = "#a37f93c46371d9b3753ae04bd2ef2c362" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:a37f93c46371d9b3753ae04bd2ef2c362" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:ae6ff7a41b9529eb8f4c7f2ace7260dc7" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#ae6ff7a41b9529eb8f4c7f2ace7260dc7" > extract_description< / a > (const string & html)< / td > < / tr >
< tr class = "memdesc:ae6ff7a41b9529eb8f4c7f2ace7260dc7" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Extract the description from an HTML page. < a href = "#ae6ff7a41b9529eb8f4c7f2ace7260dc7" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:ae6ff7a41b9529eb8f4c7f2ace7260dc7" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:ad6ad5351ecf2983e01f9f4a51c2057a5" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#ad6ad5351ecf2983e01f9f4a51c2057a5" > strip_html< / a > (const string & html)< / td > < / tr >
< tr class = "memdesc:ad6ad5351ecf2983e01f9f4a51c2057a5" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Removes HTML tags and superflous spaces from an HTML page. < a href = "#ad6ad5351ecf2983e01f9f4a51c2057a5" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:ad6ad5351ecf2983e01f9f4a51c2057a5" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:a8b340b13ccf0bc3ae9059872ce48e06a" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a8b340b13ccf0bc3ae9059872ce48e06a" > remove_html_tags< / a > (const string & html, const string & tag=" " )< / td > < / tr >
< tr class = "memdesc:a8b340b13ccf0bc3ae9059872ce48e06a" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Remove HTML tags. < a href = "#a8b340b13ccf0bc3ae9059872ce48e06a" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:a8b340b13ccf0bc3ae9059872ce48e06a" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-09-20 21:35:37 +02:00
< tr class = "memitem:ac98523e5fb23ca4adab57a7caa473eaa" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > unescape_html< / a > (string html)< / td > < / tr >
< tr class = "memdesc:ac98523e5fb23ca4adab57a7caa473eaa" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Convert HTML entities to UTF-8. < a href = "#ac98523e5fb23ca4adab57a7caa473eaa" > More...< / a > < br / > < / td > < / tr >
< tr class = "separator:ac98523e5fb23ca4adab57a7caa473eaa" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2019-07-28 02:30:30 +02:00
< tr class = "memitem:a9373cb28de198ae2db624980273ece4a" > < td class = "memItemLeft" align = "right" valign = "top" > const string  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "classremwharead_1_1URI.html#a9373cb28de198ae2db624980273ece4a" > remove_newlines< / a > (string text)< / td > < / tr >
< tr class = "memdesc:a9373cb28de198ae2db624980273ece4a" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Replace newlines with spaces. < a href = "#a9373cb28de198ae2db624980273ece4a" > More...< / a > < br / > < / td > < / tr >
2019-07-27 22:46:58 +02:00
< tr class = "separator:a9373cb28de198ae2db624980273ece4a" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< / table > < table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a name = "pro-attribs" > < / a >
Protected Attributes< / h2 > < / td > < / tr >
< tr class = "memitem:a6d76848066779348084046a63bdaedc0" > < td class = "memItemLeft" align = "right" valign = "top" > < a id = "a6d76848066779348084046a63bdaedc0" > < / a >
string  < / td > < td class = "memItemRight" valign = "bottom" > < b > _uri< / b > < / td > < / tr >
< tr class = "separator:a6d76848066779348084046a63bdaedc0" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< / table >
2019-07-28 02:30:30 +02:00
< a name = "details" id = "details" > < / a > < h2 class = "groupheader" > Detailed Description< / h2 >
< div class = "textblock" > < p > Download, archive and process an URI. < / p >
2019-08-05 23:28:43 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-07-28 02:30:30 +02:00
< / div > < h2 class = "groupheader" > Constructor & Destructor Documentation< / h2 >
< a id = "acda508768b1fd3b4df81ea66dd4fab87" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#acda508768b1fd3b4df81ea66dd4fab87" > ◆ < / a > < / span > URI()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > remwharead::URI::URI < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > uri< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > explicit< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Construct object and set URL. < / p >
2019-08-08 11:28:50 +02:00
< p > Initializes TLS and sets proxy from the environment variable < code > http_proxy< / code > , if possible.< / p >
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00063" > < / a > < span class = "lineno" > 63< / span >   :_uri(uri)< / div > < div class = "line" > < a name = "l00064" > < / a > < span class = "lineno" > 64< / span >   {< / div > < div class = "line" > < a name = "l00065" > < / a > < span class = "lineno" > 65< / span >   Poco::Net::initializeSSL();< / div > < div class = "line" > < a name = "l00066" > < / a > < span class = "lineno" > 66< / span >   < / div > < div class = "line" > < a name = "l00067" > < / a > < span class = "lineno" > 67< / span >   < span class = "keywordflow" > try< / span > < / div > < div class = "line" > < a name = "l00068" > < / a > < span class = "lineno" > 68< / span >   {< / div > < div class = "line" > < a name = "l00069" > < / a > < span class = "lineno" > 69< / span >   HTTPClientSession::ProxyConfig proxy;< / div > < div class = "line" > < a name = "l00070" > < / a > < span class = "lineno" > 70< / span >   < span class = "keywordtype" > string< / span > proxy_env = Environment::get(< span class = "stringliteral" > " http_proxy" < / span > );< / div > < div class = "line" > < a name = "l00071" > < / a > < span class = "lineno" > 71< / span >   < span class = "keywordtype" > size_t< / span > pos;< / div > < div class = "line" > < a name = "l00072" > < / a > < span class = "lineno" > 72< / span >   < / div > < div class = "line" > < a name = "l00073" > < / a > < span class = "lineno" > 73< / span >   < span class = "comment" > // Only keep text between // and /.< / span > < / div > < div class = "line" > < a name = "l00074" > < / a > < span class = "lineno" > 74< / span >   < span class = "keywordflow" > if< / span > ((pos = proxy_env.find(< span class = "stringliteral" > " //" < / span > )) != string::npos)< / div > < div class = "line" > < a name = "l00075" > < / a > < span class = "lineno" > 75< / span >   {< / div > < div class = "line" > < a name = "l00076" > < / a > < span class = "lineno" > 76< / span >   proxy_env = proxy_env.substr(pos + 2);< / div > < div class = "line" > < a name = "l00077" > < / a > < span class = "lineno" > 77< / span >   }< / div > < div class = "line" > < a name = "l00078" > < / a > < span class = "lineno" > 78< / span >   < span class = "keywordflow" > if< / span > ((pos = proxy_env.find(< span class = "charliteral" > ' /' < / span > )) != string::npos)< / div > < div class = "line" > < a name = "l00079" > < / a > < span class = "lineno" > 79< / span >   {< / div > < div class = "line" > < a name = "l00080" > < / a > < span class = "lineno" > 80< / span >   proxy_env = proxy_env.substr(0, pos);< / div > < div class = "line" > < a name = "l00081" > < / a > < span class = "lineno" > 81< / span >   }< / div > < div class = "line" > < a name = "l00082" > < / a > < span class = "lineno" > 82< / span >   < / div > < div class = "line" > < a name = "l00083" > < / a > < span class = "lineno" > 83< / span >   < span class = "keywordflow" > if< / span > ((pos = proxy_env.find(< span class = "charliteral" > ' :' < / span > )) != string::npos)< / div > < div class = "line" > < a name = "l00084" > < / a > < span class = "lineno" > 84< / span >   {< / div > < div class = "line" > < a name = "l00085" > < / a > < span class = "lineno" > 85< / span >   proxy.host = proxy_env.substr(0, pos);< / div > < div class = "line" > < a name = "l00086" > < / a > < span class = "lineno" > 86< / span >   proxy.port = std::stoi(proxy_env.substr(pos + 1));< / div > < div class = "line" > < a name = "l00087" > < / a > < span class = "lineno" > 87< / span >   }< / div > < div class = "line" > < a name = "l00088" > < / a > < span class = "lineno" > 88< / span >   < span class = "keywordflow" > else< / span > < / div > < div class = "line" > < a name = "l00089" > < / a > < span class = "lineno" > 89< / span >   {< / div > < div class = "line" > < a name = "l00090" > < / a > < span class = "lineno" > 90< / span >   proxy.host = proxy_env;< / div > < div class = "line" > < a name = "l00091" > < / a > < span class = "lineno" > 91< / span >   }< / div > < div class = "line" > < a name = "l00092" > < / a > < span class = "lineno" > 92< / span >   < / div > < div class = "line" > < a name = "l00093" > < / a > < span class = "lineno" > 93< / span >   HTTPClientSession::setGlobalProxyConfig(proxy);< / div > < div class = "line" > < a name = "l00094" > < / a > < span class = "lineno" > 94< / span >   }< / div > < div class = "line" > < a name = "l00095" > < / a > < span class = "lineno"
2019-07-28 02:30:30 +02:00
< / div >
< / div >
< h2 class = "groupheader" > Member Function Documentation< / h2 >
2019-08-08 11:28:50 +02:00
< a id = "a4252486ed37ea89083b87dbdb0763e45" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a4252486ed37ea89083b87dbdb0763e45" > ◆ < / a > < / span > archive()< / h2 >
2019-07-28 02:30:30 +02:00
< div class = "memitem" >
< div class = "memproto" >
< table class = "memname" >
< tr >
2019-08-08 11:28:50 +02:00
< td class = "memname" > const < a class = "el" href = "structremwharead_1_1archive__answer.html" > archive_answer< / a > remwharead::URI::archive < / td >
2019-07-28 02:30:30 +02:00
< td > (< / td >
< td class = "paramname" > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Save URI in archive and return archive-URI. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00599" > < / a > < span class = "lineno" > 599< / span >   {< / div > < div class = "line" > < a name = "l00600" > < / a > < span class = "lineno" > 600< / span >   < span class = "keywordflow" > if< / span > (_uri.substr(0, 4) != < span class = "stringliteral" > " http" < / span > )< / div > < div class = "line" > < a name = "l00601" > < / a > < span class = "lineno" > 601< / span >   {< / div > < div class = "line" > < a name = "l00602" > < / a > < span class = "lineno" > 602< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > false< / span > , < span class = "stringliteral" > " Only HTTP(S) is archivable." < / span > , < span class = "stringliteral" > " " < / span > };< / div > < div class = "line" > < a name = "l00603" > < / a > < span class = "lineno" > 603< / span >   }< / div > < div class = "line" > < a name = "l00604" > < / a > < span class = "lineno" > 604< / span >   < / div > < div class = "line" > < a name = "l00605" > < / a > < span class = "lineno" > 605< / span >   < span class = "keywordflow" > try< / span > < / div > < div class = "line" > < a name = "l00606" > < / a > < span class = "lineno" > 606< / span >   {< / div > < div class = "line" > < a name = "l00607" > < / a > < span class = "lineno" > 607< / span >   < span class = "keyword" > const< / span > < span class = "keywordtype" > string< / span > answer = < a class = "code" href = "classremwharead_1_1URI.html#a9f4e1777bfbff72f098d7e1a7623a3c5" > make_request< / a > (< span class = "stringliteral" > " https://web.archive.org/save/" < / span > < / div > < div class = "line" > < a name = "l00608" > < / a > < span class = "lineno" > 608< / span >   + _uri, < span class = "keyword" > true< / span > );< / div > < div class = "line" > < a name = "l00609" > < / a > < span class = "lineno" > 609< / span >   < / div > < div class = "line" > < a name = "l00610" > < / a > < span class = "lineno" > 610< / span >   < span class = "keywordflow" > if< / span > (!answer.empty())< / div > < div class = "line" > < a name = "l00611" > < / a > < span class = "lineno" > 611< / span >   {< / div > < div class = "line" > < a name = "l00612" > < / a > < span class = "lineno" > 612< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > true< / span > , < span class = "stringliteral" > " " < / span > , < span class = "stringliteral" > " https://web.archive.org" < / span > + answer };< / div > < div class = "line" > < a name = "l00613" > < / a > < span class = "lineno" > 613< / span >   }< / div > < div class = "line" > < a name = "l00614" > < / a > < span class = "lineno" > 614< / span >   }< / div > < div class = "line" > < a name = "l00615" > < / a > < span class = "lineno" > 615< / span >   < span class = "keywordflow" > catch< / span > (< span class = "keyword" > const< / span > Poco::Exception & e)< / div > < div class = "line" > < a name = "l00616" > < / a > < span class = "lineno" > 616< / span >   {< / div > < div class = "line" > < a name = "l00617" > < / a > < span class = "lineno" > 617< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > false< / span > , e.displayText(), < span class = "stringliteral" > " " < / span > };< / div > < div class = "line" > < a name = "l00618" > < / a > < span class = "lineno" > 618< / span >   }< / div > < div class = "line" > < a name = "l00619" > < / a > < span class = "lineno" > 619< / span >   < / div > < div class = "line" > < a name = "l00620" > < / a > < span class = "lineno" > 620< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > false< / span > , < span class = "stringliteral" > " Unknown error." < / span > , < span class = "stringliteral" > " " < / span > };< / div > < div class = "line" > < a name = "l00621" > < / a > < span class = "lineno" > 621< / span >   }< / div > < div class = "ttc" id = "classremwharead_1_1URI_html_a9f4e1777bfbff72f098d7e1a7623a3c5" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a9f4e1777bfbff72f098d7e1a7623a3c5" > remwharead::URI::make_request< / a > < / div > < div class = "ttdeci" > const string make_request(const string & uri, bool archive=false) const< / div > < div class = "ttdoc" > Make a HTTP(S) request.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:131< / div > < / div >
2019-07-28 02:30:30 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
< a id = "ae6ff7a41b9529eb8f4c7f2ace7260dc7" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ae6ff7a41b9529eb8f4c7f2ace7260dc7" > ◆ < / a > < / span > extract_description()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::extract_description < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > html< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Extract the description from an HTML page. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00222" > < / a > < span class = "lineno" > 222< / span >   {< / div > < div class = "line" > < a name = "l00223" > < / a > < span class = "lineno" > 223< / span >   < span class = "keyword" > const< / span > RegEx re_htmlfile(< span class = "stringliteral" > " .*\\.(.?html?|xml|rss)$" < / span > , RegEx::RE_CASELESS);< / div > < div class = "line" > < a name = "l00224" > < / a > < span class = "lineno" > 224< / span >   < span class = "keywordflow" > if< / span > (_uri.substr(0, 4) == < span class = "stringliteral" > " http" < / span > || re_htmlfile.match(_uri))< / div > < div class = "line" > < a name = "l00225" > < / a > < span class = "lineno" > 225< / span >   {< / div > < div class = "line" > < a name = "l00226" > < / a > < span class = "lineno" > 226< / span >   < span class = "keyword" > const< / span > RegEx re_desc(< span class = "stringliteral" > " description\" [^> ]+content=\" ([^\" ]+)" < / span > ,< / div > < div class = "line" > < a name = "l00227" > < / a > < span class = "lineno" > 227< / span >   RegEx::RE_CASELESS);< / div > < div class = "line" > < a name = "l00228" > < / a > < span class = "lineno" > 228< / span >   vector< string> matches;< / div > < div class = "line" > < a name = "l00229" > < / a > < span class = "lineno" > 229< / span >   re_desc.split(html, matches);< / div > < div class = "line" > < a name = "l00230" > < / a > < span class = "lineno" > 230< / span >   < span class = "keywordflow" > if< / span > (matches.size() > = 2)< / div > < div class = "line" > < a name = "l00231" > < / a > < span class = "lineno" > 231< / span >   {< / div > < div class = "line" > < a name = "l00232" > < / a > < span class = "lineno" > 232< / span >   < span class = "keywordflow" > return< / span > < a class = "code" href = "classremwharead_1_1URI.html#a9373cb28de198ae2db624980273ece4a" > remove_newlines< / a > (< a class = "code" href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > unescape_html< / a > (matches[1]));< / div > < div class = "line" > < a name = "l00233" > < / a > < span class = "lineno" > 233< / span >   }< / div > < div class = "line" > < a name = "l00234" > < / a > < span class = "lineno" > 234< / span >   }< / div > < div class = "line" > < a name = "l00235" > < / a > < span class = "lineno" > 235< / span >   < / div > < div class = "line" > < a name = "l00236" > < / a > < span class = "lineno" > 236< / span >   < span class = "keywordflow" > return< / span > < span class = "stringliteral" > " " < / span > ;< / div > < div class = "line" > < a name = "l00237" > < / a > < span class = "lineno" > 237< / span >   }< / div > < div class = "ttc" id = "classremwharead_1_1URI_html_ac98523e5fb23ca4adab57a7caa473eaa" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > remwharead::URI::unescape_html< / a > < / div > < div class = "ttdeci" > const string unescape_html(string html)< / div > < div class = "ttdoc" > Convert HTML entities to UTF-8.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:298< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_a9373cb28de198ae2db624980273ece4a" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a9373cb28de198ae2db624980273ece4a" > remwharead::URI::remove_newlines< / a > < / div > < div class = "ttdeci" > const string remove_newlines(string text)< / div > < div class = "ttdoc" > Replace newlines with spaces.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:623< / div > < / div >
2019-07-28 02:30:30 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
< a id = "a37f93c46371d9b3753ae04bd2ef2c362" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a37f93c46371d9b3753ae04bd2ef2c362" > ◆ < / a > < / span > extract_title()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::extract_title < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > html< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Extract the title from an HTML page. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00205" > < / a > < span class = "lineno" > 205< / span >   {< / div > < div class = "line" > < a name = "l00206" > < / a > < span class = "lineno" > 206< / span >   < span class = "keyword" > const< / span > RegEx re_htmlfile(< span class = "stringliteral" > " .*\\.(.?html?|xml|rss)$" < / span > , RegEx::RE_CASELESS);< / div > < div class = "line" > < a name = "l00207" > < / a > < span class = "lineno" > 207< / span >   < span class = "keywordflow" > if< / span > (_uri.substr(0, 4) == < span class = "stringliteral" > " http" < / span > || re_htmlfile.match(_uri))< / div > < div class = "line" > < a name = "l00208" > < / a > < span class = "lineno" > 208< / span >   {< / div > < div class = "line" > < a name = "l00209" > < / a > < span class = "lineno" > 209< / span >   < span class = "keyword" > const< / span > RegEx re_title(< span class = "stringliteral" > " < title> ([^< ]+)" < / span > , RegEx::RE_CASELESS);< / div > < div class = "line" > < a name = "l00210" > < / a > < span class = "lineno" > 210< / span >   vector< string> matches;< / div > < div class = "line" > < a name = "l00211" > < / a > < span class = "lineno" > 211< / span >   re_title.split(html, matches);< / div > < div class = "line" > < a name = "l00212" > < / a > < span class = "lineno" > 212< / span >   < span class = "keywordflow" > if< / span > (matches.size() > = 2)< / div > < div class = "line" > < a name = "l00213" > < / a > < span class = "lineno" > 213< / span >   {< / div > < div class = "line" > < a name = "l00214" > < / a > < span class = "lineno" > 214< / span >   < span class = "keywordflow" > return< / span > < a class = "code" href = "classremwharead_1_1URI.html#a9373cb28de198ae2db624980273ece4a" > remove_newlines< / a > (< a class = "code" href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > unescape_html< / a > (matches[1]));< / div > < div class = "line" > < a name = "l00215" > < / a > < span class = "lineno" > 215< / span >   }< / div > < div class = "line" > < a name = "l00216" > < / a > < span class = "lineno" > 216< / span >   }< / div > < div class = "line" > < a name = "l00217" > < / a > < span class = "lineno" > 217< / span >   < / div > < div class = "line" > < a name = "l00218" > < / a > < span class = "lineno" > 218< / span >   < span class = "keywordflow" > return< / span > < span class = "stringliteral" > " " < / span > ;< / div > < div class = "line" > < a name = "l00219" > < / a > < span class = "lineno" > 219< / span >   }< / div > < div class = "ttc" id = "classremwharead_1_1URI_html_ac98523e5fb23ca4adab57a7caa473eaa" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > remwharead::URI::unescape_html< / a > < / div > < div class = "ttdeci" > const string unescape_html(string html)< / div > < div class = "ttdoc" > Convert HTML entities to UTF-8.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:298< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_a9373cb28de198ae2db624980273ece4a" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a9373cb28de198ae2db624980273ece4a" > remwharead::URI::remove_newlines< / a > < / div > < div class = "ttdeci" > const string remove_newlines(string text)< / div > < div class = "ttdoc" > Replace newlines with spaces.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:623< / div > < / div >
2019-07-28 02:30:30 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
< a id = "a8d6ac084a823749ed38c12e7bf8f3461" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a8d6ac084a823749ed38c12e7bf8f3461" > ◆ < / a > < / span > get()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "memname" >
< tr >
< td class = "memname" > const < a class = "el" href = "structremwharead_1_1html__extract.html" > html_extract< / a > remwharead::URI::get < / td >
< td > (< / td >
< td class = "paramname" > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Download URI and extract title, description and full text. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00107" > < / a > < span class = "lineno" > 107< / span >   {< / div > < div class = "line" > < a name = "l00108" > < / a > < span class = "lineno" > 108< / span >   < span class = "keywordflow" > try< / span > < / div > < div class = "line" > < a name = "l00109" > < / a > < span class = "lineno" > 109< / span >   {< / div > < div class = "line" > < a name = "l00110" > < / a > < span class = "lineno" > 110< / span >   < span class = "keyword" > const< / span > < span class = "keywordtype" > string< / span > answer = < a class = "code" href = "classremwharead_1_1URI.html#a9f4e1777bfbff72f098d7e1a7623a3c5" > make_request< / a > (_uri);< / div > < div class = "line" > < a name = "l00111" > < / a > < span class = "lineno" > 111< / span >   < span class = "keywordflow" > if< / span > (!answer.empty())< / div > < div class = "line" > < a name = "l00112" > < / a > < span class = "lineno" > 112< / span >   {< / div > < div class = "line" > < a name = "l00113" > < / a > < span class = "lineno" > 113< / span >   < span class = "keywordflow" > return< / span > < / div > < div class = "line" > < a name = "l00114" > < / a > < span class = "lineno" > 114< / span >   {< / div > < div class = "line" > < a name = "l00115" > < / a > < span class = "lineno" > 115< / span >   < span class = "keyword" > true< / span > ,< / div > < div class = "line" > < a name = "l00116" > < / a > < span class = "lineno" > 116< / span >   < span class = "stringliteral" > " " < / span > ,< / div > < div class = "line" > < a name = "l00117" > < / a > < span class = "lineno" > 117< / span >   < a class = "code" href = "classremwharead_1_1URI.html#a37f93c46371d9b3753ae04bd2ef2c362" > extract_title< / a > (answer),< / div > < div class = "line" > < a name = "l00118" > < / a > < span class = "lineno" > 118< / span >   < a class = "code" href = "classremwharead_1_1URI.html#ae6ff7a41b9529eb8f4c7f2ace7260dc7" > extract_description< / a > (answer),< / div > < div class = "line" > < a name = "l00119" > < / a > < span class = "lineno" > 119< / span >   < a class = "code" href = "classremwharead_1_1URI.html#ad6ad5351ecf2983e01f9f4a51c2057a5" > strip_html< / a > (answer)< / div > < div class = "line" > < a name = "l00120" > < / a > < span class = "lineno" > 120< / span >   };< / div > < div class = "line" > < a name = "l00121" > < / a > < span class = "lineno" > 121< / span >   }< / div > < div class = "line" > < a name = "l00122" > < / a > < span class = "lineno" > 122< / span >   }< / div > < div class = "line" > < a name = "l00123" > < / a > < span class = "lineno" > 123< / span >   < span class = "keywordflow" > catch< / span > (< span class = "keyword" > const< / span > Poco::Exception & e)< / div > < div class = "line" > < a name = "l00124" > < / a > < span class = "lineno" > 124< / span >   {< / div > < div class = "line" > < a name = "l00125" > < / a > < span class = "lineno" > 125< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > false< / span > , e.displayText(), < span class = "stringliteral" > " " < / span > , < span class = "stringliteral" > " " < / span > , < span class = "stringliteral" > " " < / span > };< / div > < div class = "line" > < a name = "l00126" > < / a > < span class = "lineno" > 126< / span >   }< / div > < div class = "line" > < a name = "l00127" > < / a > < span class = "lineno" > 127< / span >   < / div > < div class = "line" > < a name = "l00128" > < / a > < span class = "lineno" > 128< / span >   < span class = "keywordflow" > return< / span > { < span class = "keyword" > false< / span > , < span class = "stringliteral" > " Unknown error." < / span > , < span class = "stringliteral" > " " < / span > , < span class = "stringliteral" > " " < / span > , < span class = "stringliteral" > " " < / span > };< / div > < div class = "line" > < a name = "l00129" > < / a > < span class = "lineno" > 129< / span >   }< / div > < div class = "ttc" id = "classremwharead_1_1URI_html_ad6ad5351ecf2983e01f9f4a51c2057a5" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#ad6ad5351ecf2983e01f9f4a51c2057a5" > remwharead::URI::strip_html< / a > < / div > < div class = "ttdeci" > const string strip_html(const string & html)< / div > < div class = "ttdoc" > Removes HTML tags and superflous spaces from an HTML page.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:239< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_a37f93c46371d9b3753ae04bd2ef2c362" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a37f93c46371d9b3753ae04bd2ef2c362" > remwharead::URI::extract_title< / a > < / div > < div class = "ttdeci" > const string extract_title(const string & html)< / div > < div class = "ttdoc" > Extract the title from an HTML page.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:204< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_a9f4e1777bfbff72f098d7e1a7623a3c5" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a9f4e1777bfbff72f098d7e1a7623a3c5" > remwharead::URI::make_request< / a > < / div > < div class = "ttdeci" > const string make_request(const string & uri, bool archive=false) const< / div > < div class = "ttdoc" > Make a HTTP(S) request.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:131< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_ae6ff7a41b9529eb8f4c7f2ace7260dc7" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#ae6ff7a41b9529eb8f4c7f2ace7260dc7" > remwharead::URI::extract_description< / a > < / div > < div class = "ttdeci" > const string extract_description(const string & html)< / div > < div class = "ttdoc" > Extract the description from an HTML page.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:221< / div > < / div >
2019-08-05 22:01:08 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
2019-08-08 11:28:50 +02:00
< a id = "a9f4e1777bfbff72f098d7e1a7623a3c5" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a9f4e1777bfbff72f098d7e1a7623a3c5" > ◆ < / a > < / span > make_request()< / h2 >
2019-08-05 22:01:08 +02:00
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::make_request < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
2019-08-08 11:28:50 +02:00
< td class = "paramname" > < em > uri< / em > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > bool  < / td >
< td class = "paramname" > < em > archive< / em > = < code > false< / code >   < / td >
< / tr >
< tr >
< td > < / td >
< td > )< / td >
< td > < / td > < td > const< / td >
2019-08-05 22:01:08 +02:00
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Make a HTTP(S) request. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00132" > < / a > < span class = "lineno" > 132< / span >   {< / div > < div class = "line" > < a name = "l00133" > < / a > < span class = "lineno" > 133< / span >   Poco::URI poco_uri(uri);< / div > < div class = "line" > < a name = "l00134" > < / a > < span class = "lineno" > 134< / span >   < span class = "keywordtype" > string< / span > method =< / div > < div class = "line" > < a name = "l00135" > < / a > < span class = "lineno" > 135< / span >   < a class = "code" href = "classremwharead_1_1URI.html#a4252486ed37ea89083b87dbdb0763e45" > archive< / a > ? HTTPRequest::HTTP_HEAD : HTTPRequest::HTTP_GET;< / div > < div class = "line" > < a name = "l00136" > < / a > < span class = "lineno" > 136< / span >   < span class = "keywordtype" > string< / span > path = poco_uri.getPathAndQuery();< / div > < div class = "line" > < a name = "l00137" > < / a > < span class = "lineno" > 137< / span >   < span class = "keywordflow" > if< / span > (path.empty())< / div > < div class = "line" > < a name = "l00138" > < / a > < span class = "lineno" > 138< / span >   {< / div > < div class = "line" > < a name = "l00139" > < / a > < span class = "lineno" > 139< / span >   path = < span class = "stringliteral" > " /" < / span > ;< / div > < div class = "line" > < a name = "l00140" > < / a > < span class = "lineno" > 140< / span >   }< / div > < div class = "line" > < a name = "l00141" > < / a > < span class = "lineno" > 141< / span >   < / div > < div class = "line" > < a name = "l00142" > < / a > < span class = "lineno" > 142< / span >   unique_ptr< HTTPClientSession> session;< / div > < div class = "line" > < a name = "l00143" > < / a > < span class = "lineno" > 143< / span >   < span class = "keywordflow" > if< / span > (poco_uri.getScheme() == < span class = "stringliteral" > " https" < / span > )< / div > < div class = "line" > < a name = "l00144" > < / a > < span class = "lineno" > 144< / span >   {< / div > < div class = "line" > < a name = "l00145" > < / a > < span class = "lineno" > 145< / span >   session = make_unique< HTTPSClientSession> (poco_uri.getHost(),< / div > < div class = "line" > < a name = "l00146" > < / a > < span class = "lineno" > 146< / span >   poco_uri.getPort());< / div > < div class = "line" > < a name = "l00147" > < / a > < span class = "lineno" > 147< / span >   }< / div > < div class = "line" > < a name = "l00148" > < / a > < span class = "lineno" > 148< / span >   < span class = "keywordflow" > else< / span > < span class = "keywordflow" > if< / span > (poco_uri.getScheme() == < span class = "stringliteral" > " http" < / span > )< / div > < div class = "line" > < a name = "l00149" > < / a > < span class = "lineno" > 149< / span >   {< / div > < div class = "line" > < a name = "l00150" > < / a > < span class = "lineno" > 150< / span >   session = make_unique< HTTPClientSession> (poco_uri.getHost(),< / div > < div class = "line" > < a name = "l00151" > < / a > < span class = "lineno" > 151< / span >   poco_uri.getPort());< / div > < div class = "line" > < a name = "l00152" > < / a > < span class = "lineno" > 152< / span >   }< / div > < div class = "line" > < a name = "l00153" > < / a > < span class = "lineno" > 153< / span >   < span class = "keywordflow" > else< / span > < / div > < div class = "line" > < a name = "l00154" > < / a > < span class = "lineno" > 154< / span >   {< / div > < div class = "line" > < a name = "l00155" > < / a > < span class = "lineno" > 155< / span >   < span class = "keywordflow" > throw< / span > Poco::Exception(< span class = "stringliteral" > " Protocol not supported." < / span > );< / div > < div class = "line" > < a name = "l00156" > < / a > < span class = "lineno" > 156< / span >   }< / div > < div class = "line" > < a name = "l00157" > < / a > < span class = "lineno" > 157< / span >   < / div > < div class = "line" > < a name = "l00158" > < / a > < span class = "lineno" > 158< / span >   HTTPRequest request(method, path, HTTPMessage::HTTP_1_1);< / div > < div class = "line" > < a name = "l00159" > < / a > < span class = "lineno" > 159< / span >   request.set(< span class = "stringliteral" > " User-Agent" < / span > , < span class = "keywordtype" > string< / span > (< span class = "stringliteral" > " remwharead/" < / span > ) + global::version);< / div > < div class = "line" > < a name = "l00160" > < / a > < span class = "lineno" > 160< / span >   < / div > < div class = "line" > < a n
< div class = "ttc" id = "classremwharead_1_1URI_html_a4252486ed37ea89083b87dbdb0763e45" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a4252486ed37ea89083b87dbdb0763e45" > remwharead::URI::archive< / a > < / div > < div class = "ttdeci" > const archive_answer archive()< / div > < div class = "ttdoc" > Save URI in archive and return archive-URI.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:598< / div > < / div >
2019-07-28 02:30:30 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
< a id = "a8b340b13ccf0bc3ae9059872ce48e06a" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a8b340b13ccf0bc3ae9059872ce48e06a" > ◆ < / a > < / span > remove_html_tags()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::remove_html_tags < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > html< / em > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > tag< / em > = < code > " " < / code >   < / td >
< / tr >
< tr >
< td > < / td >
< td > )< / td >
< td > < / td > < td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Remove HTML tags. < / p >
< dl class = "params" > < dt > Parameters< / dt > < dd >
< table class = "params" >
< tr > < td class = "paramname" > html< / td > < td > HTML page. < / td > < / tr >
2019-08-08 11:28:50 +02:00
< tr > < td class = "paramname" > tag< / td > < td > If set, only remove this tag.< / td > < / tr >
2019-07-28 02:30:30 +02:00
< / table >
< / dd >
< / dl >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00261" > < / a > < span class = "lineno" > 261< / span >   {< / div > < div class = "line" > < a name = "l00262" > < / a > < span class = "lineno" > 262< / span >   < span class = "comment" > // NOTE: I did this with regex_replace before, but libstdc++ segfaulted.< / span > < / div > < div class = "line" > < a name = "l00263" > < / a > < span class = "lineno" > 263< / span >   < span class = "keywordtype" > string< / span > out;< / div > < div class = "line" > < a name = "l00264" > < / a > < span class = "lineno" > 264< / span >   < span class = "keywordflow" > if< / span > (tag.empty())< / div > < div class = "line" > < a name = "l00265" > < / a > < span class = "lineno" > 265< / span >   {< / div > < div class = "line" > < a name = "l00266" > < / a > < span class = "lineno" > 266< / span >   < span class = "keywordtype" > size_t< / span > pos = 0;< / div > < div class = "line" > < a name = "l00267" > < / a > < span class = "lineno" > 267< / span >   < span class = "keywordflow" > while< / span > (pos != std::string::npos)< / div > < div class = "line" > < a name = "l00268" > < / a > < span class = "lineno" > 268< / span >   {< / div > < div class = "line" > < a name = "l00269" > < / a > < span class = "lineno" > 269< / span >   < span class = "keywordtype" > size_t< / span > startpos = html.find(< span class = "charliteral" > ' < ' < / span > , pos);< / div > < div class = "line" > < a name = "l00270" > < / a > < span class = "lineno" > 270< / span >   < span class = "keywordtype" > size_t< / span > endpos = html.find(< span class = "charliteral" > ' > ' < / span > , startpos);< / div > < div class = "line" > < a name = "l00271" > < / a > < span class = "lineno" > 271< / span >   out += html.substr(pos, startpos - pos);< / div > < div class = "line" > < a name = "l00272" > < / a > < span class = "lineno" > 272< / span >   pos = endpos;< / div > < div class = "line" > < a name = "l00273" > < / a > < span class = "lineno" > 273< / span >   < span class = "keywordflow" > if< / span > (pos != std::string::npos)< / div > < div class = "line" > < a name = "l00274" > < / a > < span class = "lineno" > 274< / span >   {< / div > < div class = "line" > < a name = "l00275" > < / a > < span class = "lineno" > 275< / span >   ++pos;< / div > < div class = "line" > < a name = "l00276" > < / a > < span class = "lineno" > 276< / span >   }< / div > < div class = "line" > < a name = "l00277" > < / a > < span class = "lineno" > 277< / span >   }< / div > < div class = "line" > < a name = "l00278" > < / a > < span class = "lineno" > 278< / span >   }< / div > < div class = "line" > < a name = "l00279" > < / a > < span class = "lineno" > 279< / span >   < span class = "keywordflow" > else< / span > < / div > < div class = "line" > < a name = "l00280" > < / a > < span class = "lineno" > 280< / span >   {< / div > < div class = "line" > < a name = "l00281" > < / a > < span class = "lineno" > 281< / span >   < span class = "keywordtype" > size_t< / span > pos = 0;< / div > < div class = "line" > < a name = "l00282" > < / a > < span class = "lineno" > 282< / span >   out = html;< / div > < div class = "line" > < a name = "l00283" > < / a > < span class = "lineno" > 283< / span >   < span class = "keywordflow" > while< / span > ((pos = out.find(< span class = "stringliteral" > " < " < / span > + tag)) != std::string::npos)< / div > < div class = "line" > < a name = "l00284" > < / a > < span class = "lineno" > 284< / span >   {< / div > < div class = "line" > < a name = "l00285" > < / a > < span class = "lineno" > 285< / span >   < span class = "keywordtype" > size_t< / span > endpos = out.find(< span class = "stringliteral" > " < /" < / span > + tag, pos);< / div > < div class = "line" > < a name = "l00286" > < / a > < span class = "lineno" > 286< / span >   < span class = "keywordflow" > if< / span > (endpos == std::string::npos)< / div > < div class = "line" > < a name = "l00287" > < / a > < span class = "lineno" > 287< / span >   {< / div > < div class = "line" > < a name = "l00288" > < / a > < span class = "lineno" > 288< / span >   < span class = "keywordflow" > break< / span > ;< / div > < div class = "line" > < a name = "l00289" > < / a > < span class = "lineno" > 289< / span >   }< / div > < div class = "line" > < a name = "l00290" > < / a > < span class = "lineno" > 290< / span >   endpos += 3 + tag.length(); < span class
2019-07-28 02:30:30 +02:00
< / div >
< / div >
< a id = "a9373cb28de198ae2db624980273ece4a" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a9373cb28de198ae2db624980273ece4a" > ◆ < / a > < / span > remove_newlines()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::remove_newlines < / td >
< td > (< / td >
< td class = "paramtype" > string  < / td >
< td class = "paramname" > < em > text< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Replace newlines with spaces. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00624" > < / a > < span class = "lineno" > 624< / span >   {< / div > < div class = "line" > < a name = "l00625" > < / a > < span class = "lineno" > 625< / span >   < span class = "keywordtype" > size_t< / span > posn = 0;< / div > < div class = "line" > < a name = "l00626" > < / a > < span class = "lineno" > 626< / span >   < span class = "keywordflow" > while< / span > ((posn = text.find(< span class = "charliteral" > ' \n' < / span > , posn)) != std::string::npos)< / div > < div class = "line" > < a name = "l00627" > < / a > < span class = "lineno" > 627< / span >   {< / div > < div class = "line" > < a name = "l00628" > < / a > < span class = "lineno" > 628< / span >   text.replace(posn, 1, < span class = "stringliteral" > " " < / span > );< / div > < div class = "line" > < a name = "l00629" > < / a > < span class = "lineno" > 629< / span >   < / div > < div class = "line" > < a name = "l00630" > < / a > < span class = "lineno" > 630< / span >   < span class = "keywordtype" > size_t< / span > posr = posn - 1;< / div > < div class = "line" > < a name = "l00631" > < / a > < span class = "lineno" > 631< / span >   < span class = "keywordflow" > if< / span > (text[posr] == < span class = "charliteral" > ' \r' < / span > )< / div > < div class = "line" > < a name = "l00632" > < / a > < span class = "lineno" > 632< / span >   {< / div > < div class = "line" > < a name = "l00633" > < / a > < span class = "lineno" > 633< / span >   text.replace(posr, 1, < span class = "stringliteral" > " " < / span > );< / div > < div class = "line" > < a name = "l00634" > < / a > < span class = "lineno" > 634< / span >   }< / div > < div class = "line" > < a name = "l00635" > < / a > < span class = "lineno" > 635< / span >   ++posn;< / div > < div class = "line" > < a name = "l00636" > < / a > < span class = "lineno" > 636< / span >   }< / div > < div class = "line" > < a name = "l00637" > < / a > < span class = "lineno" > 637< / span >   < / div > < div class = "line" > < a name = "l00638" > < / a > < span class = "lineno" > 638< / span >   < span class = "keywordflow" > return< / span > text;< / div > < div class = "line" > < a name = "l00639" > < / a > < span class = "lineno" > 639< / span >   }< / div > < / div > <!-- fragment -->
2019-07-28 02:30:30 +02:00
< / div >
< / div >
< a id = "ad6ad5351ecf2983e01f9f4a51c2057a5" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ad6ad5351ecf2983e01f9f4a51c2057a5" > ◆ < / a > < / span > strip_html()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::strip_html < / td >
< td > (< / td >
< td class = "paramtype" > const string &   < / td >
< td class = "paramname" > < em > html< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Removes HTML tags and superflous spaces from an HTML page. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00240" > < / a > < span class = "lineno" > 240< / span >   {< / div > < div class = "line" > < a name = "l00241" > < / a > < span class = "lineno" > 241< / span >   < span class = "keywordtype" > string< / span > out;< / div > < div class = "line" > < a name = "l00242" > < / a > < span class = "lineno" > 242< / span >   < / div > < div class = "line" > < a name = "l00243" > < / a > < span class = "lineno" > 243< / span >   out = < a class = "code" href = "classremwharead_1_1URI.html#a8b340b13ccf0bc3ae9059872ce48e06a" > remove_html_tags< / a > (html, < span class = "stringliteral" > " script" < / span > ); < span class = "comment" > // Remove JavaScript.< / span > < / div > < div class = "line" > < a name = "l00244" > < / a > < span class = "lineno" > 244< / span >   out = < a class = "code" href = "classremwharead_1_1URI.html#a8b340b13ccf0bc3ae9059872ce48e06a" > remove_html_tags< / a > (out, < span class = "stringliteral" > " style" < / span > ); < span class = "comment" > // Remove CSS.< / span > < / div > < div class = "line" > < a name = "l00245" > < / a > < span class = "lineno" > 245< / span >   out = < a class = "code" href = "classremwharead_1_1URI.html#a8b340b13ccf0bc3ae9059872ce48e06a" > remove_html_tags< / a > (out); < span class = "comment" > // Remove tags.< / span > < / div > < div class = "line" > < a name = "l00246" > < / a > < span class = "lineno" > 246< / span >   < / div > < div class = "line" > < a name = "l00247" > < / a > < span class = "lineno" > 247< / span >   < span class = "keywordtype" > size_t< / span > pos = 0;< / div > < div class = "line" > < a name = "l00248" > < / a > < span class = "lineno" > 248< / span >   < span class = "keywordflow" > while< / span > ((pos = out.find(< span class = "stringliteral" > " \r" < / span > , pos)) != std::string::npos) < span class = "comment" > // Remove CR.< / span > < / div > < div class = "line" > < a name = "l00249" > < / a > < span class = "lineno" > 249< / span >   {< / div > < div class = "line" > < a name = "l00250" > < / a > < span class = "lineno" > 250< / span >   out.replace(pos, 1, < span class = "stringliteral" > " " < / span > );< / div > < div class = "line" > < a name = "l00251" > < / a > < span class = "lineno" > 251< / span >   }< / div > < div class = "line" > < a name = "l00252" > < / a > < span class = "lineno" > 252< / span >   < / div > < div class = "line" > < a name = "l00253" > < / a > < span class = "lineno" > 253< / span >   < span class = "comment" > // Remove whitespace at eol.< / span > < / div > < div class = "line" > < a name = "l00254" > < / a > < span class = "lineno" > 254< / span >   RegEx(< span class = "stringliteral" > " \\s+\n" < / span > ).subst(out, < span class = "stringliteral" > " \n" < / span > , RegEx::RE_GLOBAL);< / div > < div class = "line" > < a name = "l00255" > < / a > < span class = "lineno" > 255< / span >   RegEx(< span class = "stringliteral" > " \n{2,}" < / span > ).subst(out, < span class = "stringliteral" > " \n" < / span > , RegEx::RE_GLOBAL); < span class = "comment" > // Reduce newlines.< / span > < / div > < div class = "line" > < a name = "l00256" > < / a > < span class = "lineno" > 256< / span >   < / div > < div class = "line" > < a name = "l00257" > < / a > < span class = "lineno" > 257< / span >   < span class = "keywordflow" > return< / span > < a class = "code" href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > unescape_html< / a > (out);< / div > < div class = "line" > < a name = "l00258" > < / a > < span class = "lineno" > 258< / span >   }< / div > < div class = "ttc" id = "classremwharead_1_1URI_html_ac98523e5fb23ca4adab57a7caa473eaa" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#ac98523e5fb23ca4adab57a7caa473eaa" > remwharead::URI::unescape_html< / a > < / div > < div class = "ttdeci" > const string unescape_html(string html)< / div > < div class = "ttdoc" > Convert HTML entities to UTF-8.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:298< / div > < / div >
< div class = "ttc" id = "classremwharead_1_1URI_html_a8b340b13ccf0bc3ae9059872ce48e06a" > < div class = "ttname" > < a href = "classremwharead_1_1URI.html#a8b340b13ccf0bc3ae9059872ce48e06a" > remwharead::URI::remove_html_tags< / a > < / div > < div class = "ttdeci" > const string remove_html_tags(const string & html, const string & tag=" " )< / div > < div class = "ttdoc" > Remove HTML tags.< / div > < div class = "ttdef" > < b > Definition:< / b > uri.cpp:260< / div > < / div >
2019-07-28 02:30:30 +02:00
< / div > <!-- fragment -->
< / div >
< / div >
2019-09-20 21:35:37 +02:00
< a id = "ac98523e5fb23ca4adab57a7caa473eaa" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ac98523e5fb23ca4adab57a7caa473eaa" > ◆ < / a > < / span > unescape_html()< / h2 >
2019-07-28 02:30:30 +02:00
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > const string remwharead::URI::unescape_html < / td >
< td > (< / td >
2019-09-20 21:35:37 +02:00
< td class = "paramtype" > string  < / td >
2019-07-28 02:30:30 +02:00
< td class = "paramname" > < em > html< / em > < / td > < td > )< / td >
< td > < / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > protected< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Convert HTML entities to UTF-8. < / p >
2019-08-08 11:28:50 +02:00
< dl class = "section since" > < dt > Since< / dt > < dd > 0.6.0 < / dd > < / dl >
2019-09-20 21:35:37 +02:00
< div class = "fragment" > < div class = "line" > < a name = "l00299" > < / a > < span class = "lineno" > 299< / span >   {< / div > < div class = "line" > < a name = "l00300" > < / a > < span class = "lineno" > 300< / span >   < span class = "comment" > // Used to convert int to utf-8 char.< / span > < / div > < div class = "line" > < a name = "l00301" > < / a > < span class = "lineno" > 301< / span >   std::wstring_convert< std::codecvt_utf8< char32_t> , char32_t> u8c;< / div > < div class = "line" > < a name = "l00302" > < / a > < span class = "lineno" > 302< / span >   < span class = "keyword" > const< / span > RegEx re_entity(< span class = "stringliteral" > " & #(x)?([[:alnum:]]{1,8});" < / span > );< / div > < div class = "line" > < a name = "l00303" > < / a > < span class = "lineno" > 303< / span >   RegEx::MatchVec matches;< / div > < div class = "line" > < a name = "l00304" > < / a > < span class = "lineno" > 304< / span >   string::size_type pos = 0;< / div > < div class = "line" > < a name = "l00305" > < / a > < span class = "lineno" > 305< / span >   < / div > < div class = "line" > < a name = "l00306" > < / a > < span class = "lineno" > 306< / span >   < span class = "keywordflow" > while< / span > (re_entity.match(html, pos, matches) != 0)< / div > < div class = "line" > < a name = "l00307" > < / a > < span class = "lineno" > 307< / span >   {< / div > < div class = "line" > < a name = "l00308" > < / a > < span class = "lineno" > 308< / span >   char32_t codepoint = 0;< / div > < div class = "line" > < a name = "l00309" > < / a > < span class = "lineno" > 309< / span >   < span class = "keyword" > const< / span > < span class = "keywordtype" > string< / span > number = html.substr(matches[2].offset,< / div > < div class = "line" > < a name = "l00310" > < / a > < span class = "lineno" > 310< / span >   matches[2].length);< / div > < div class = "line" > < a name = "l00311" > < / a > < span class = "lineno" > 311< / span >   < span class = "comment" > // ' x' in front of the number means it' s hexadecimal, else decimal.< / span > < / div > < div class = "line" > < a name = "l00312" > < / a > < span class = "lineno" > 312< / span >   < span class = "keywordflow" > if< / span > (matches[1].length != 0)< / div > < div class = "line" > < a name = "l00313" > < / a > < span class = "lineno" > 313< / span >   {< / div > < div class = "line" > < a name = "l00314" > < / a > < span class = "lineno" > 314< / span >   codepoint = std::stoi(number, < span class = "keyword" > nullptr< / span > , 16);< / div > < div class = "line" > < a name = "l00315" > < / a > < span class = "lineno" > 315< / span >   }< / div > < div class = "line" > < a name = "l00316" > < / a > < span class = "lineno" > 316< / span >   < span class = "keywordflow" > else< / span > < / div > < div class = "line" > < a name = "l00317" > < / a > < span class = "lineno" > 317< / span >   {< / div > < div class = "line" > < a name = "l00318" > < / a > < span class = "lineno" > 318< / span >   codepoint = std::stoi(number, < span class = "keyword" > nullptr< / span > , 10);< / div > < div class = "line" > < a name = "l00319" > < / a > < span class = "lineno" > 319< / span >   }< / div > < div class = "line" > < a name = "l00320" > < / a > < span class = "lineno" > 320< / span >   < span class = "keyword" > const< / span > < span class = "keywordtype" > string< / span > unicode = u8c.to_bytes(codepoint);< / div > < div class = "line" > < a name = "l00321" > < / a > < span class = "lineno" > 321< / span >   html.replace(matches[0].offset, matches[0].length, unicode);< / div > < div class = "line" > < a name = "l00322" > < / a > < span class = "lineno" > 322< / span >   pos = matches[0].offset + unicode.length();< / div > < div class = "line" > < a name = "l00323" > < / a > < span class = "lineno" > 323< / span >   }< / div > < div class = "line" > < a name = "l00324" > < / a > < span class = "lineno" > 324< / span >   < / div > < div class = "line" > < a name = "l00325" > < / a > < span class = "lineno" > 325< / span >   < span class = "comment" > // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_< / span > < / div > < div class = "line" > < a name = "l00326" > < / a > < span class = "lineno" > 326< / span >   < span class = "comment" > // entity_references#Character_entity_references_in_HTML< / span > < / div > < div class = "line" > < a name = "l00327" > < / a > < span class = "lineno" > 327< / span >   < span cla
2019-07-28 02:30:30 +02:00
< / div >
< / div >
2019-07-27 22:46:58 +02:00
< hr / > The documentation for this class was generated from the following files:< ul >
2019-08-05 22:01:08 +02:00
< li > include/< a class = "el" href = "uri_8hpp_source.html" > uri.hpp< / a > < / li >
2019-07-27 22:46:58 +02:00
< li > src/lib/uri.cpp< / li >
< / ul >
< / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by   < a href = "http://www.doxygen.org/index.html" >
< img class = "footer" src = "doxygen.png" alt = "doxygen" / >
2019-08-08 22:13:06 +02:00
< / a > 1.8.15
2019-07-27 22:46:58 +02:00
< / small > < / address >
< / body >
< / html >