remwharead/docs/classremwharead_1_1URI.html

502 lines
114 KiB
HTML
Raw Normal View History

2019-08-08 22:13:06 +02:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2019-07-27 22:46:58 +02:00
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
2019-08-08 22:13:06 +02:00
<meta name="generator" content="Doxygen 1.8.15"/>
2019-07-27 22:46:58 +02:00
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>remwharead: remwharead::URI Class Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">remwharead
2019-09-23 00:14:17 +02:00
&#160;<span id="projectnumber">0.8.4</span>
2019-07-27 22:46:58 +02:00
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
2019-08-08 22:13:06 +02:00
<!-- Generated by Doxygen 1.8.15 -->
2019-07-27 22:46:58 +02:00
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
/* @license-end */</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><b>remwharead</b></li><li class="navelem"><a class="el" href="classremwharead_1_1URI.html">URI</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#pub-methods">Public Member Functions</a> &#124;
<a href="#pro-methods">Protected Member Functions</a> &#124;
<a href="#pro-attribs">Protected Attributes</a> &#124;
<a href="classremwharead_1_1URI-members.html">List of all members</a> </div>
<div class="headertitle">
<div class="title">remwharead::URI Class Reference</div> </div>
</div><!--header-->
<div class="contents">
2019-07-28 02:30:30 +02:00
<p>Download, archive and process an URI.
<a href="classremwharead_1_1URI.html#details">More...</a></p>
2019-08-05 23:28:43 +02:00
<p><code>#include &lt;<a class="el" href="uri_8hpp_source.html">remwharead/uri.hpp</a>&gt;</code></p>
2019-07-27 22:46:58 +02:00
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
Public Member Functions</h2></td></tr>
2019-09-30 16:09:31 +02:00
<tr class="memitem:a34633f88b14dcd2c3a618794040bc154"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a34633f88b14dcd2c3a618794040bc154">URI</a> (string uri)</td></tr>
<tr class="memdesc:a34633f88b14dcd2c3a618794040bc154"><td class="mdescLeft">&#160;</td><td class="mdescRight">Construct object and set URL. <a href="#a34633f88b14dcd2c3a618794040bc154">More...</a><br /></td></tr>
<tr class="separator:a34633f88b14dcd2c3a618794040bc154"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a232fb7afd928e7d531924e1de41d141c"><td class="memItemLeft" align="right" valign="top"><a id="a232fb7afd928e7d531924e1de41d141c"></a>
&#160;</td><td class="memItemRight" valign="bottom"><b>URI</b> (const <a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;other)=default</td></tr>
<tr class="separator:a232fb7afd928e7d531924e1de41d141c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a412348c5f7fe88a193e94e1463aef7a5"><td class="memItemLeft" align="right" valign="top"><a id="a412348c5f7fe88a193e94e1463aef7a5"></a>
<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&#160;</td><td class="memItemRight" valign="bottom"><b>operator=</b> (const <a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;other)=default</td></tr>
<tr class="separator:a412348c5f7fe88a193e94e1463aef7a5"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:afd5bb3d2ba39246a9ab5e6fc30040d46"><td class="memItemLeft" align="right" valign="top"><a id="afd5bb3d2ba39246a9ab5e6fc30040d46"></a>
&#160;</td><td class="memItemRight" valign="bottom"><b>URI</b> (<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&amp;other)=default</td></tr>
<tr class="separator:afd5bb3d2ba39246a9ab5e6fc30040d46"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a02a4bd54312b39219d0c51371978518f"><td class="memItemLeft" align="right" valign="top"><a id="a02a4bd54312b39219d0c51371978518f"></a>
<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&#160;</td><td class="memItemRight" valign="bottom"><b>operator=</b> (<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&amp;other)=default</td></tr>
<tr class="separator:a02a4bd54312b39219d0c51371978518f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a32de53a91487e5cc68550d1479cbd081"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structremwharead_1_1html__extract.html">html_extract</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a32de53a91487e5cc68550d1479cbd081">get</a> ()</td></tr>
<tr class="memdesc:a32de53a91487e5cc68550d1479cbd081"><td class="mdescLeft">&#160;</td><td class="mdescRight">Download URI and extract title, description and full text. <a href="#a32de53a91487e5cc68550d1479cbd081">More...</a><br /></td></tr>
<tr class="separator:a32de53a91487e5cc68550d1479cbd081"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a20f272b5b1638269e5ef1bec69a7032e"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structremwharead_1_1archive__answer.html">archive_answer</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a20f272b5b1638269e5ef1bec69a7032e">archive</a> ()</td></tr>
<tr class="memdesc:a20f272b5b1638269e5ef1bec69a7032e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Save URI in archive and return archive-URI. <a href="#a20f272b5b1638269e5ef1bec69a7032e">More...</a><br /></td></tr>
<tr class="separator:a20f272b5b1638269e5ef1bec69a7032e"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-07-27 22:46:58 +02:00
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pro-methods"></a>
Protected Member Functions</h2></td></tr>
2019-09-30 16:09:31 +02:00
<tr class="memitem:ad13540201da297653f5a8608e3b0df07"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">make_request</a> (const string &amp;uri, bool <a class="el" href="classremwharead_1_1URI.html#a20f272b5b1638269e5ef1bec69a7032e">archive</a>=false) const</td></tr>
<tr class="memdesc:ad13540201da297653f5a8608e3b0df07"><td class="mdescLeft">&#160;</td><td class="mdescRight">Make a HTTP(S) request. <a href="#ad13540201da297653f5a8608e3b0df07">More...</a><br /></td></tr>
<tr class="separator:ad13540201da297653f5a8608e3b0df07"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa67d8399d1637c09ba59e1af020055b7"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#aa67d8399d1637c09ba59e1af020055b7">extract_title</a> (const string &amp;html)</td></tr>
<tr class="memdesc:aa67d8399d1637c09ba59e1af020055b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">Extract the title from an HTML page. <a href="#aa67d8399d1637c09ba59e1af020055b7">More...</a><br /></td></tr>
<tr class="separator:aa67d8399d1637c09ba59e1af020055b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a77b680aacab6dbcbfe7d357b60e87842"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a77b680aacab6dbcbfe7d357b60e87842">extract_description</a> (const string &amp;html)</td></tr>
<tr class="memdesc:a77b680aacab6dbcbfe7d357b60e87842"><td class="mdescLeft">&#160;</td><td class="mdescRight">Extract the description from an HTML page. <a href="#a77b680aacab6dbcbfe7d357b60e87842">More...</a><br /></td></tr>
<tr class="separator:a77b680aacab6dbcbfe7d357b60e87842"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:afe80e4fafe35023c25d109517ffd9388"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#afe80e4fafe35023c25d109517ffd9388">strip_html</a> (const string &amp;html)</td></tr>
<tr class="memdesc:afe80e4fafe35023c25d109517ffd9388"><td class="mdescLeft">&#160;</td><td class="mdescRight">Removes HTML tags and superflous spaces from an HTML page. <a href="#afe80e4fafe35023c25d109517ffd9388">More...</a><br /></td></tr>
<tr class="separator:afe80e4fafe35023c25d109517ffd9388"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8b02cabdb493ba0db7931b9f51c05590"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a8b02cabdb493ba0db7931b9f51c05590">remove_html_tags</a> (const string &amp;html, const string &amp;tag=&quot;&quot;)</td></tr>
<tr class="memdesc:a8b02cabdb493ba0db7931b9f51c05590"><td class="mdescLeft">&#160;</td><td class="mdescRight">Remove HTML tags. <a href="#a8b02cabdb493ba0db7931b9f51c05590">More...</a><br /></td></tr>
<tr class="separator:a8b02cabdb493ba0db7931b9f51c05590"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a870da973d8a91bcaab28aa27e7cc3888"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a870da973d8a91bcaab28aa27e7cc3888">unescape_html</a> (string html)</td></tr>
<tr class="memdesc:a870da973d8a91bcaab28aa27e7cc3888"><td class="mdescLeft">&#160;</td><td class="mdescRight">Convert HTML entities to UTF-8. <a href="#a870da973d8a91bcaab28aa27e7cc3888">More...</a><br /></td></tr>
<tr class="separator:a870da973d8a91bcaab28aa27e7cc3888"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a063a1f5a633c1016883f134215809d27"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a063a1f5a633c1016883f134215809d27">remove_newlines</a> (string text)</td></tr>
<tr class="memdesc:a063a1f5a633c1016883f134215809d27"><td class="mdescLeft">&#160;</td><td class="mdescRight">Replace newlines with spaces. <a href="#a063a1f5a633c1016883f134215809d27">More...</a><br /></td></tr>
<tr class="separator:a063a1f5a633c1016883f134215809d27"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">set_proxy</a> ()</td></tr>
<tr class="memdesc:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Set proxy server. <a href="#a754c3e988f7d8890d6e9794bc1e69e2c">More...</a><br /></td></tr>
<tr class="separator:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-07-27 22:46:58 +02:00
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pro-attribs"></a>
Protected Attributes</h2></td></tr>
<tr class="memitem:a6d76848066779348084046a63bdaedc0"><td class="memItemLeft" align="right" valign="top"><a id="a6d76848066779348084046a63bdaedc0"></a>
string&#160;</td><td class="memItemRight" valign="bottom"><b>_uri</b></td></tr>
<tr class="separator:a6d76848066779348084046a63bdaedc0"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
2019-07-28 02:30:30 +02:00
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>Download, archive and process an URI. </p>
2019-08-05 23:28:43 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-07-28 02:30:30 +02:00
</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
2019-09-30 16:09:31 +02:00
<a id="a34633f88b14dcd2c3a618794040bc154"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a34633f88b14dcd2c3a618794040bc154">&#9670;&nbsp;</a></span>URI()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">remwharead::URI::URI </td>
<td>(</td>
2019-09-30 16:09:31 +02:00
<td class="paramtype">string&#160;</td>
2019-07-28 02:30:30 +02:00
<td class="paramname"><em>uri</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">explicit</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Construct object and set URL. </p>
2019-08-08 11:28:50 +02:00
<p>Initializes TLS and sets proxy from the environment variable <code>http_proxy</code>, if possible.</p>
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00067"></a><span class="lineno"> 67</span>&#160; :_uri(move(uri))</div><div class="line"><a name="l00068"></a><span class="lineno"> 68</span>&#160;{</div><div class="line"><a name="l00069"></a><span class="lineno"> 69</span>&#160; Poco::Net::initializeSSL();</div><div class="line"><a name="l00070"></a><span class="lineno"> 70</span>&#160;</div><div class="line"><a name="l00071"></a><span class="lineno"> 71</span>&#160; <a class="code" href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">set_proxy</a>();</div><div class="line"><a name="l00072"></a><span class="lineno"> 72</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_a754c3e988f7d8890d6e9794bc1e69e2c"><div class="ttname"><a href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">remwharead::URI::set_proxy</a></div><div class="ttdeci">void set_proxy()</div><div class="ttdoc">Set proxy server.</div><div class="ttdef"><b>Definition:</b> uri.cpp:74</div></div>
</div><!-- fragment -->
2019-07-28 02:30:30 +02:00
</div>
</div>
<h2 class="groupheader">Member Function Documentation</h2>
2019-09-30 16:09:31 +02:00
<a id="a20f272b5b1638269e5ef1bec69a7032e"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a20f272b5b1638269e5ef1bec69a7032e">&#9670;&nbsp;</a></span>archive()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname"><a class="el" href="structremwharead_1_1archive__answer.html">archive_answer</a> remwharead::URI::archive </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Save URI in archive and return archive-URI. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a77b680aacab6dbcbfe7d357b60e87842"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a77b680aacab6dbcbfe7d357b60e87842">&#9670;&nbsp;</a></span>extract_description()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::extract_description </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Extract the description from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00241"></a><span class="lineno"> 241</span>&#160;{</div><div class="line"><a name="l00242"></a><span class="lineno"> 242</span>&#160; <span class="keyword">const</span> RegEx re_htmlfile(<span class="stringliteral">&quot;.*\\.(.?html?|xml|rss)$&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00243"></a><span class="lineno"> 243</span>&#160; <span class="keywordflow">if</span> (_uri.substr(0, 4) == <span class="stringliteral">&quot;http&quot;</span> || re_htmlfile.match(_uri))</div><div class="line"><a name="l00244"></a><span class="lineno"> 244</span>&#160; {</div><div class="line"><a name="l00245"></a><span class="lineno"> 245</span>&#160; <span class="keyword">const</span> RegEx re_desc(R<span class="stringliteral">&quot;(description&quot;[^&gt;]+content=&quot;([^&quot;]+))&quot;,</span></div><div class="line"><a name="l00246"></a><span class="lineno"> 246</span>&#160;<span class="stringliteral"> RegEx::RE_CASELESS);</span></div><div class="line"><a name="l00247"></a><span class="lineno"> 247</span>&#160;<span class="stringliteral"> vector&lt;string&gt; matches;</span></div><div class="line"><a name="l00248"></a><span class="lineno"> 248</span>&#160;<span class="stringliteral"> re_desc.split(html, matches);</span></div><div class="line"><a name="l00249"></a><span class="lineno"> 249</span>&#160;<span class="stringliteral"> </span><span class="keywordflow">if</span> (matches.size() &gt;= 2)</div><div class="line"><a name="l00250"></a><span class="lineno"> 250</span>&#160; {</div><div class="line"><a name="l00251"></a><span class="lineno"> 251</span>&#160; <span class="keywordflow">return</span> <a class="code" href="classremwharead_1_1URI.html#a063a1f5a633c1016883f134215809d27">remove_newlines</a>(<a class="code" href="classremwharead_1_1URI.html#a870da973d8a91bcaab28aa27e7cc3888">unescape_html</a>(matches[1]));</div><div class="line"><a name="l00252"></a><span class="lineno"> 252</span>&#160; }</div><div class="line"><a name="l00253"></a><span class="lineno"> 253</span>&#160; }</div><div class="line"><a name="l00254"></a><span class="lineno"> 254</span>&#160;</div><div class="line"><a name="l00255"></a><span class="lineno"> 255</span>&#160; <span class="keywordflow">return</span> <span class="stringliteral">&quot;&quot;</span>;</div><div class="line"><a name="l00256"></a><span class="lineno"> 256</span>&#160;}</div><div class="line"><a name="l00257"></a><span class="lineno"> 257</span>&#160;</div><div class="line"><a name="l00258"></a><span class="lineno"> 258</span>&#160;<span class="keywordtype">string</span> <a class="code" href="classremwharead_1_1URI.html#afe80e4fafe35023c25d109517ffd9388">URI::strip_html</a>(<span class="keyword">const</span> <span class="keywordtype">string</span> &amp;html)</div><div class="line"><a name="l00259"></a><span class="lineno"> 259</span>&#160;{</div><div class="line"><a name="l00260"></a><span class="lineno"> 260</span>&#160; <span class="keywordtype">string</span> out;</div><div class="line"><a name="l00261"></a><span class="lineno"> 261</span>&#160;</div><div class="line"><a name="l00262"></a><span class="lineno"> 262</span>&#160; out = <a class="code" href="classremwharead_1_1URI.html#a8b02cabdb493ba0db7931b9f51c05590">remove_html_tags</a>(html, <span class="stringliteral">&quot;script&quot;</span>); <span class="comment">// Remove JavaScript.</span></div><div class="line"><a name="l00263"></a><span class="lineno"> 263</span>&#160; out = <a class="code" href="classremwharead_1_1URI.html#a8b02cabdb493ba0db7931b9f51c05590">remove_html_tags</a>(out, <span class="stringliteral">&quot;style&quot;</span>); <span class="comment">// Remove CSS.</span></div><div class="line"><a name="l00264"></a><span class="lineno"> 264</span>&#160; out = <a class="code" href="classremwharead_1_1URI.html#a8b02cabdb493ba0db7931b9f51c05590">remove_html_tags</a>(out); <span class="comment">// Rem
<div class="ttc" id="classremwharead_1_1URI_html_ad13540201da297653f5a8608e3b0df07"><div class="ttname"><a href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">remwharead::URI::make_request</a></div><div class="ttdeci">string make_request(const string &amp;uri, bool archive=false) const</div><div class="ttdoc">Make a HTTP(S) request.</div><div class="ttdef"><b>Definition:</b> uri.cpp:151</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a870da973d8a91bcaab28aa27e7cc3888"><div class="ttname"><a href="classremwharead_1_1URI.html#a870da973d8a91bcaab28aa27e7cc3888">remwharead::URI::unescape_html</a></div><div class="ttdeci">string unescape_html(string html)</div><div class="ttdoc">Convert HTML entities to UTF-8.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_afe80e4fafe35023c25d109517ffd9388"><div class="ttname"><a href="classremwharead_1_1URI.html#afe80e4fafe35023c25d109517ffd9388">remwharead::URI::strip_html</a></div><div class="ttdeci">string strip_html(const string &amp;html)</div><div class="ttdoc">Removes HTML tags and superflous spaces from an HTML page.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a063a1f5a633c1016883f134215809d27"><div class="ttname"><a href="classremwharead_1_1URI.html#a063a1f5a633c1016883f134215809d27">remwharead::URI::remove_newlines</a></div><div class="ttdeci">string remove_newlines(string text)</div><div class="ttdoc">Replace newlines with spaces.</div><div class="ttdef"><b>Definition:</b> uri.cpp:641</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a8b02cabdb493ba0db7931b9f51c05590"><div class="ttname"><a href="classremwharead_1_1URI.html#a8b02cabdb493ba0db7931b9f51c05590">remwharead::URI::remove_html_tags</a></div><div class="ttdeci">string remove_html_tags(const string &amp;html, const string &amp;tag=&quot;&quot;)</div><div class="ttdoc">Remove HTML tags.</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="aa67d8399d1637c09ba59e1af020055b7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa67d8399d1637c09ba59e1af020055b7">&#9670;&nbsp;</a></span>extract_title()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::extract_title </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Extract the title from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00224"></a><span class="lineno"> 224</span>&#160;{</div><div class="line"><a name="l00225"></a><span class="lineno"> 225</span>&#160; <span class="keyword">const</span> RegEx re_htmlfile(<span class="stringliteral">&quot;.*\\.(.?html?|xml|rss)$&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00226"></a><span class="lineno"> 226</span>&#160; <span class="keywordflow">if</span> (_uri.substr(0, 4) == <span class="stringliteral">&quot;http&quot;</span> || re_htmlfile.match(_uri))</div><div class="line"><a name="l00227"></a><span class="lineno"> 227</span>&#160; {</div><div class="line"><a name="l00228"></a><span class="lineno"> 228</span>&#160; <span class="keyword">const</span> RegEx re_title(<span class="stringliteral">&quot;&lt;title&gt;([^&lt;]+)&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00229"></a><span class="lineno"> 229</span>&#160; vector&lt;string&gt; matches;</div><div class="line"><a name="l00230"></a><span class="lineno"> 230</span>&#160; re_title.split(html, matches);</div><div class="line"><a name="l00231"></a><span class="lineno"> 231</span>&#160; <span class="keywordflow">if</span> (matches.size() &gt;= 2)</div><div class="line"><a name="l00232"></a><span class="lineno"> 232</span>&#160; {</div><div class="line"><a name="l00233"></a><span class="lineno"> 233</span>&#160; <span class="keywordflow">return</span> <a class="code" href="classremwharead_1_1URI.html#a063a1f5a633c1016883f134215809d27">remove_newlines</a>(<a class="code" href="classremwharead_1_1URI.html#a870da973d8a91bcaab28aa27e7cc3888">unescape_html</a>(matches[1]));</div><div class="line"><a name="l00234"></a><span class="lineno"> 234</span>&#160; }</div><div class="line"><a name="l00235"></a><span class="lineno"> 235</span>&#160; }</div><div class="line"><a name="l00236"></a><span class="lineno"> 236</span>&#160;</div><div class="line"><a name="l00237"></a><span class="lineno"> 237</span>&#160; <span class="keywordflow">return</span> <span class="stringliteral">&quot;&quot;</span>;</div><div class="line"><a name="l00238"></a><span class="lineno"> 238</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_a870da973d8a91bcaab28aa27e7cc3888"><div class="ttname"><a href="classremwharead_1_1URI.html#a870da973d8a91bcaab28aa27e7cc3888">remwharead::URI::unescape_html</a></div><div class="ttdeci">string unescape_html(string html)</div><div class="ttdoc">Convert HTML entities to UTF-8.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a063a1f5a633c1016883f134215809d27"><div class="ttname"><a href="classremwharead_1_1URI.html#a063a1f5a633c1016883f134215809d27">remwharead::URI::remove_newlines</a></div><div class="ttdeci">string remove_newlines(string text)</div><div class="ttdoc">Replace newlines with spaces.</div><div class="ttdef"><b>Definition:</b> uri.cpp:641</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a32de53a91487e5cc68550d1479cbd081"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a32de53a91487e5cc68550d1479cbd081">&#9670;&nbsp;</a></span>get()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname"><a class="el" href="structremwharead_1_1html__extract.html">html_extract</a> remwharead::URI::get </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Download URI and extract title, description and full text. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00127"></a><span class="lineno"> 127</span>&#160;{</div><div class="line"><a name="l00128"></a><span class="lineno"> 128</span>&#160; <span class="keywordflow">try</span></div><div class="line"><a name="l00129"></a><span class="lineno"> 129</span>&#160; {</div><div class="line"><a name="l00130"></a><span class="lineno"> 130</span>&#160; <span class="keyword">const</span> <span class="keywordtype">string</span> answer = <a class="code" href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">make_request</a>(_uri);</div><div class="line"><a name="l00131"></a><span class="lineno"> 131</span>&#160; <span class="keywordflow">if</span> (!answer.empty())</div><div class="line"><a name="l00132"></a><span class="lineno"> 132</span>&#160; {</div><div class="line"><a name="l00133"></a><span class="lineno"> 133</span>&#160; <span class="keywordflow">return</span></div><div class="line"><a name="l00134"></a><span class="lineno"> 134</span>&#160; {</div><div class="line"><a name="l00135"></a><span class="lineno"> 135</span>&#160; <span class="keyword">true</span>,</div><div class="line"><a name="l00136"></a><span class="lineno"> 136</span>&#160; <span class="stringliteral">&quot;&quot;</span>,</div><div class="line"><a name="l00137"></a><span class="lineno"> 137</span>&#160; <a class="code" href="classremwharead_1_1URI.html#aa67d8399d1637c09ba59e1af020055b7">extract_title</a>(answer),</div><div class="line"><a name="l00138"></a><span class="lineno"> 138</span>&#160; <a class="code" href="classremwharead_1_1URI.html#a77b680aacab6dbcbfe7d357b60e87842">extract_description</a>(answer),</div><div class="line"><a name="l00139"></a><span class="lineno"> 139</span>&#160; <a class="code" href="classremwharead_1_1URI.html#afe80e4fafe35023c25d109517ffd9388">strip_html</a>(answer)</div><div class="line"><a name="l00140"></a><span class="lineno"> 140</span>&#160; };</div><div class="line"><a name="l00141"></a><span class="lineno"> 141</span>&#160; }</div><div class="line"><a name="l00142"></a><span class="lineno"> 142</span>&#160; }</div><div class="line"><a name="l00143"></a><span class="lineno"> 143</span>&#160; <span class="keywordflow">catch</span> (<span class="keyword">const</span> Poco::Exception &amp;e)</div><div class="line"><a name="l00144"></a><span class="lineno"> 144</span>&#160; {</div><div class="line"><a name="l00145"></a><span class="lineno"> 145</span>&#160; <span class="keywordflow">return</span> { <span class="keyword">false</span>, e.displayText(), <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span> };</div><div class="line"><a name="l00146"></a><span class="lineno"> 146</span>&#160; }</div><div class="line"><a name="l00147"></a><span class="lineno"> 147</span>&#160;</div><div class="line"><a name="l00148"></a><span class="lineno"> 148</span>&#160; <span class="keywordflow">return</span> { <span class="keyword">false</span>, <span class="stringliteral">&quot;Unknown error.&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span> };</div><div class="line"><a name="l00149"></a><span class="lineno"> 149</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_ad13540201da297653f5a8608e3b0df07"><div class="ttname"><a href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">remwharead::URI::make_request</a></div><div class="ttdeci">string make_request(const string &amp;uri, bool archive=false) const</div><div class="ttdoc">Make a HTTP(S) request.</div><div class="ttdef"><b>Definition:</b> uri.cpp:151</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_afe80e4fafe35023c25d109517ffd9388"><div class="ttname"><a href="classremwharead_1_1URI.html#afe80e4fafe35023c25d109517ffd9388">remwharead::URI::strip_html</a></div><div class="ttdeci">string strip_html(const string &amp;html)</div><div class="ttdoc">Removes HTML tags and superflous spaces from an HTML page.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_aa67d8399d1637c09ba59e1af020055b7"><div class="ttname"><a href="classremwharead_1_1URI.html#aa67d8399d1637c09ba59e1af020055b7">remwharead::URI::extract_title</a></div><div class="ttdeci">string extract_title(const string &amp;html)</div><div class="ttdoc">Extract the title from an HTML page.</div><div class="ttdef"><b>Definition:</b> uri.cpp:223</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a77b680aacab6dbcbfe7d357b60e87842"><div class="ttname"><a href="classremwharead_1_1URI.html#a77b680aacab6dbcbfe7d357b60e87842">remwharead::URI::extract_description</a></div><div class="ttdeci">string extract_description(const string &amp;html)</div><div class="ttdoc">Extract the description from an HTML page.</div><div class="ttdef"><b>Definition:</b> uri.cpp:240</div></div>
2019-08-05 22:01:08 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="ad13540201da297653f5a8608e3b0df07"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad13540201da297653f5a8608e3b0df07">&#9670;&nbsp;</a></span>make_request()</h2>
2019-08-05 22:01:08 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::make_request </td>
2019-08-05 22:01:08 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
2019-08-08 11:28:50 +02:00
<td class="paramname"><em>uri</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">bool&#160;</td>
<td class="paramname"><em>archive</em> = <code>false</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td> const</td>
2019-08-05 22:01:08 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Make a HTTP(S) request. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00152"></a><span class="lineno"> 152</span>&#160;{</div><div class="line"><a name="l00153"></a><span class="lineno"> 153</span>&#160; Poco::URI poco_uri(uri);</div><div class="line"><a name="l00154"></a><span class="lineno"> 154</span>&#160; <span class="keywordtype">string</span> method = <a class="code" href="classremwharead_1_1URI.html#a20f272b5b1638269e5ef1bec69a7032e">archive</a> ? HTTPRequest::HTTP_HEAD : HTTPRequest::HTTP_GET;</div><div class="line"><a name="l00155"></a><span class="lineno"> 155</span>&#160; <span class="keywordtype">string</span> path = poco_uri.getPathAndQuery();</div><div class="line"><a name="l00156"></a><span class="lineno"> 156</span>&#160; <span class="keywordflow">if</span> (path.empty())</div><div class="line"><a name="l00157"></a><span class="lineno"> 157</span>&#160; {</div><div class="line"><a name="l00158"></a><span class="lineno"> 158</span>&#160; path = <span class="stringliteral">&quot;/&quot;</span>;</div><div class="line"><a name="l00159"></a><span class="lineno"> 159</span>&#160; }</div><div class="line"><a name="l00160"></a><span class="lineno"> 160</span>&#160;</div><div class="line"><a name="l00161"></a><span class="lineno"> 161</span>&#160; unique_ptr&lt;HTTPClientSession&gt; session;</div><div class="line"><a name="l00162"></a><span class="lineno"> 162</span>&#160; <span class="keywordflow">if</span> (poco_uri.getScheme() == <span class="stringliteral">&quot;https&quot;</span>)</div><div class="line"><a name="l00163"></a><span class="lineno"> 163</span>&#160; {</div><div class="line"><a name="l00164"></a><span class="lineno"> 164</span>&#160; session = make_unique&lt;HTTPSClientSession&gt;(poco_uri.getHost(),</div><div class="line"><a name="l00165"></a><span class="lineno"> 165</span>&#160; poco_uri.getPort());</div><div class="line"><a name="l00166"></a><span class="lineno"> 166</span>&#160; }</div><div class="line"><a name="l00167"></a><span class="lineno"> 167</span>&#160; <span class="keywordflow">else</span> <span class="keywordflow">if</span> (poco_uri.getScheme() == <span class="stringliteral">&quot;http&quot;</span>)</div><div class="line"><a name="l00168"></a><span class="lineno"> 168</span>&#160; {</div><div class="line"><a name="l00169"></a><span class="lineno"> 169</span>&#160; session = make_unique&lt;HTTPClientSession&gt;(poco_uri.getHost(),</div><div class="line"><a name="l00170"></a><span class="lineno"> 170</span>&#160; poco_uri.getPort());</div><div class="line"><a name="l00171"></a><span class="lineno"> 171</span>&#160; }</div><div class="line"><a name="l00172"></a><span class="lineno"> 172</span>&#160; <span class="keywordflow">else</span></div><div class="line"><a name="l00173"></a><span class="lineno"> 173</span>&#160; {</div><div class="line"><a name="l00174"></a><span class="lineno"> 174</span>&#160; <span class="keywordflow">throw</span> Poco::Exception(<span class="stringliteral">&quot;Protocol not supported.&quot;</span>);</div><div class="line"><a name="l00175"></a><span class="lineno"> 175</span>&#160; }</div><div class="line"><a name="l00176"></a><span class="lineno"> 176</span>&#160;</div><div class="line"><a name="l00177"></a><span class="lineno"> 177</span>&#160; HTTPRequest request(method, path, HTTPMessage::HTTP_1_1);</div><div class="line"><a name="l00178"></a><span class="lineno"> 178</span>&#160; request.set(<span class="stringliteral">&quot;User-Agent&quot;</span>, <span class="keywordtype">string</span>(<span class="stringliteral">&quot;remwharead/&quot;</span>) + global::version);</div><div class="line"><a name="l00179"></a><span class="lineno"> 179</span>&#160;</div><div class="line"><a name="l00180"></a><span class="lineno"> 180</span>&#160; HTTPResponse response;</div><div class="line"><a name="l00181"></a><span class="lineno"> 181</span>&#160;</div><div class="line"><a na
<div class="ttc" id="classremwharead_1_1URI_html_ad13540201da297653f5a8608e3b0df07"><div class="ttname"><a href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">remwharead::URI::make_request</a></div><div class="ttdeci">string make_request(const string &amp;uri, bool archive=false) const</div><div class="ttdoc">Make a HTTP(S) request.</div><div class="ttdef"><b>Definition:</b> uri.cpp:151</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a8b02cabdb493ba0db7931b9f51c05590"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8b02cabdb493ba0db7931b9f51c05590">&#9670;&nbsp;</a></span>remove_html_tags()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::remove_html_tags </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>tag</em> = <code>&quot;&quot;</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Remove HTML tags. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">html</td><td>HTML page. </td></tr>
2019-08-08 11:28:50 +02:00
<tr><td class="paramname">tag</td><td>If set, only remove this tag.</td></tr>
2019-07-28 02:30:30 +02:00
</table>
</dd>
</dl>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a063a1f5a633c1016883f134215809d27"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a063a1f5a633c1016883f134215809d27">&#9670;&nbsp;</a></span>remove_newlines()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::remove_newlines </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">string&#160;</td>
<td class="paramname"><em>text</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Replace newlines with spaces. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
<div class="fragment"><div class="line"><a name="l00642"></a><span class="lineno"> 642</span>&#160;{</div><div class="line"><a name="l00643"></a><span class="lineno"> 643</span>&#160; <span class="keywordtype">size_t</span> posn = 0;</div><div class="line"><a name="l00644"></a><span class="lineno"> 644</span>&#160; <span class="keywordflow">while</span> ((posn = text.find(<span class="charliteral">&#39;\n&#39;</span>, posn)) != std::string::npos)</div><div class="line"><a name="l00645"></a><span class="lineno"> 645</span>&#160; {</div><div class="line"><a name="l00646"></a><span class="lineno"> 646</span>&#160; text.replace(posn, 1, <span class="stringliteral">&quot; &quot;</span>);</div><div class="line"><a name="l00647"></a><span class="lineno"> 647</span>&#160;</div><div class="line"><a name="l00648"></a><span class="lineno"> 648</span>&#160; <span class="keywordtype">size_t</span> posr = posn - 1;</div><div class="line"><a name="l00649"></a><span class="lineno"> 649</span>&#160; <span class="keywordflow">if</span> (text[posr] == <span class="charliteral">&#39;\r&#39;</span>)</div><div class="line"><a name="l00650"></a><span class="lineno"> 650</span>&#160; {</div><div class="line"><a name="l00651"></a><span class="lineno"> 651</span>&#160; text.replace(posr, 1, <span class="stringliteral">&quot; &quot;</span>);</div><div class="line"><a name="l00652"></a><span class="lineno"> 652</span>&#160; }</div><div class="line"><a name="l00653"></a><span class="lineno"> 653</span>&#160; ++posn;</div><div class="line"><a name="l00654"></a><span class="lineno"> 654</span>&#160; }</div><div class="line"><a name="l00655"></a><span class="lineno"> 655</span>&#160;</div><div class="line"><a name="l00656"></a><span class="lineno"> 656</span>&#160; <span class="keywordflow">return</span> text;</div><div class="line"><a name="l00657"></a><span class="lineno"> 657</span>&#160;}</div></div><!-- fragment -->
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a754c3e988f7d8890d6e9794bc1e69e2c"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a754c3e988f7d8890d6e9794bc1e69e2c">&#9670;&nbsp;</a></span>set_proxy()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">void remwharead::URI::set_proxy </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Set proxy server. </p>
<dl class="section since"><dt>Since</dt><dd>0.8.5 </dd></dl>
<div class="fragment"><div class="line"><a name="l00075"></a><span class="lineno"> 75</span>&#160;{</div><div class="line"><a name="l00076"></a><span class="lineno"> 76</span>&#160; <span class="keywordflow">try</span></div><div class="line"><a name="l00077"></a><span class="lineno"> 77</span>&#160; {</div><div class="line"><a name="l00078"></a><span class="lineno"> 78</span>&#160; HTTPClientSession::ProxyConfig proxy;</div><div class="line"><a name="l00079"></a><span class="lineno"> 79</span>&#160; <span class="keyword">const</span> <span class="keywordtype">string</span> env_proxy = Environment::get(<span class="stringliteral">&quot;http_proxy&quot;</span>);</div><div class="line"><a name="l00080"></a><span class="lineno"> 80</span>&#160; <span class="keyword">const</span> RegEx re_proxy(<span class="stringliteral">&quot;^(?:https?://)?(?:([^:]+):?([^@]*)@)?&quot;</span> <span class="comment">// user:pw</span></div><div class="line"><a name="l00081"></a><span class="lineno"> 81</span>&#160; <span class="stringliteral">&quot;([^:/]+)(?::([\\d]{1,5}))?/?$&quot;</span>); <span class="comment">// host:port</span></div><div class="line"><a name="l00082"></a><span class="lineno"> 82</span>&#160; vector&lt;string&gt; matches;</div><div class="line"><a name="l00083"></a><span class="lineno"> 83</span>&#160;</div><div class="line"><a name="l00084"></a><span class="lineno"> 84</span>&#160; <span class="keywordflow">if</span> (re_proxy.split(env_proxy, matches) &lt; 4)</div><div class="line"><a name="l00085"></a><span class="lineno"> 85</span>&#160; {</div><div class="line"><a name="l00086"></a><span class="lineno"> 86</span>&#160; <span class="keywordflow">return</span>;</div><div class="line"><a name="l00087"></a><span class="lineno"> 87</span>&#160; }</div><div class="line"><a name="l00088"></a><span class="lineno"> 88</span>&#160;</div><div class="line"><a name="l00089"></a><span class="lineno"> 89</span>&#160; proxy.username = matches[1];</div><div class="line"><a name="l00090"></a><span class="lineno"> 90</span>&#160; proxy.password = matches[2];</div><div class="line"><a name="l00091"></a><span class="lineno"> 91</span>&#160; proxy.host = matches[3];</div><div class="line"><a name="l00092"></a><span class="lineno"> 92</span>&#160; <span class="keywordflow">if</span> (!matches[4].empty())</div><div class="line"><a name="l00093"></a><span class="lineno"> 93</span>&#160; {</div><div class="line"><a name="l00094"></a><span class="lineno"> 94</span>&#160; <span class="keyword">const</span> std::uint32_t &amp;port = std::stoul(matches[4]);</div><div class="line"><a name="l00095"></a><span class="lineno"> 95</span>&#160; <span class="keywordflow">if</span> (port &gt; 65535)</div><div class="line"><a name="l00096"></a><span class="lineno"> 96</span>&#160; {</div><div class="line"><a name="l00097"></a><span class="lineno"> 97</span>&#160; <span class="keywordflow">throw</span> std::invalid_argument(<span class="stringliteral">&quot;Proxy port number out of range&quot;</span>);</div><div class="line"><a name="l00098"></a><span class="lineno"> 98</span>&#160; }</div><div class="line"><a name="l00099"></a><span class="lineno"> 99</span>&#160; proxy.port = port;</div><div class="line"><a name="l00100"></a><span class="lineno"> 100</span>&#160; }</div><div class="line"><a name="l00101"></a><span class="lineno"> 101</span>&#160; HTTPClientSession::setGlobalProxyConfig(proxy);</div><div class="line"><a name="l00102"></a><span class="lineno"> 102</span>&#160; }</div><div class="line"><a name="l00103"></a><span class="lineno"> 103</span>&#160; <span class="keywordflow">catch</span> (<span class="keyword">const</span> Poco::RegularExpressionException &amp;e)</div><div class="line"><a name="l00104"></a><span class="lineno"> 104</span>&#160; {</div><div class="line"><a n
</div>
</div>
<a id="afe80e4fafe35023c25d109517ffd9388"></a>
<h2 class="memtitle"><span class="permalink"><a href="#afe80e4fafe35023c25d109517ffd9388">&#9670;&nbsp;</a></span>strip_html()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">string remwharead::URI::strip_html </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Removes HTML tags and superflous spaces from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a870da973d8a91bcaab28aa27e7cc3888"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a870da973d8a91bcaab28aa27e7cc3888">&#9670;&nbsp;</a></span>unescape_html()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::unescape_html </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
2019-09-20 21:35:37 +02:00
<td class="paramtype">string&#160;</td>
2019-07-28 02:30:30 +02:00
<td class="paramname"><em>html</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Convert HTML entities to UTF-8. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-07-27 22:46:58 +02:00
<hr/>The documentation for this class was generated from the following files:<ul>
2019-08-05 22:01:08 +02:00
<li>include/<a class="el" href="uri_8hpp_source.html">uri.hpp</a></li>
2019-07-27 22:46:58 +02:00
<li>src/lib/uri.cpp</li>
</ul>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
2019-08-08 22:13:06 +02:00
</a> 1.8.15
2019-07-27 22:46:58 +02:00
</small></address>
</body>
</html>