remwharead/docs/classremwharead_1_1URI.html

545 lines
120 KiB
HTML
Raw Normal View History

2019-08-08 22:13:06 +02:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2019-07-27 22:46:58 +02:00
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
2019-08-08 22:13:06 +02:00
<meta name="generator" content="Doxygen 1.8.15"/>
2019-07-27 22:46:58 +02:00
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>remwharead: remwharead::URI Class Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">remwharead
2019-12-08 18:29:39 +01:00
&#160;<span id="projectnumber">0.9.1</span>
2019-07-27 22:46:58 +02:00
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
2019-08-08 22:13:06 +02:00
<!-- Generated by Doxygen 1.8.15 -->
2019-07-27 22:46:58 +02:00
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
/* @license-end */</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><b>remwharead</b></li><li class="navelem"><a class="el" href="classremwharead_1_1URI.html">URI</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#pub-methods">Public Member Functions</a> &#124;
<a href="#pro-methods">Protected Member Functions</a> &#124;
<a href="#pro-attribs">Protected Attributes</a> &#124;
<a href="classremwharead_1_1URI-members.html">List of all members</a> </div>
<div class="headertitle">
<div class="title">remwharead::URI Class Reference</div> </div>
</div><!--header-->
<div class="contents">
2019-07-28 02:30:30 +02:00
<p>Download, archive and process an URI.
<a href="classremwharead_1_1URI.html#details">More...</a></p>
2019-08-05 23:28:43 +02:00
<p><code>#include &lt;<a class="el" href="uri_8hpp_source.html">remwharead/uri.hpp</a>&gt;</code></p>
2019-07-27 22:46:58 +02:00
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
Public Member Functions</h2></td></tr>
2019-09-30 16:09:31 +02:00
<tr class="memitem:a34633f88b14dcd2c3a618794040bc154"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a34633f88b14dcd2c3a618794040bc154">URI</a> (string uri)</td></tr>
<tr class="memdesc:a34633f88b14dcd2c3a618794040bc154"><td class="mdescLeft">&#160;</td><td class="mdescRight">Construct object and set URL. <a href="#a34633f88b14dcd2c3a618794040bc154">More...</a><br /></td></tr>
<tr class="separator:a34633f88b14dcd2c3a618794040bc154"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a232fb7afd928e7d531924e1de41d141c"><td class="memItemLeft" align="right" valign="top"><a id="a232fb7afd928e7d531924e1de41d141c"></a>
&#160;</td><td class="memItemRight" valign="bottom"><b>URI</b> (const <a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;other)=default</td></tr>
<tr class="separator:a232fb7afd928e7d531924e1de41d141c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a412348c5f7fe88a193e94e1463aef7a5"><td class="memItemLeft" align="right" valign="top"><a id="a412348c5f7fe88a193e94e1463aef7a5"></a>
<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&#160;</td><td class="memItemRight" valign="bottom"><b>operator=</b> (const <a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;other)=default</td></tr>
<tr class="separator:a412348c5f7fe88a193e94e1463aef7a5"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:afd5bb3d2ba39246a9ab5e6fc30040d46"><td class="memItemLeft" align="right" valign="top"><a id="afd5bb3d2ba39246a9ab5e6fc30040d46"></a>
&#160;</td><td class="memItemRight" valign="bottom"><b>URI</b> (<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&amp;other)=default</td></tr>
<tr class="separator:afd5bb3d2ba39246a9ab5e6fc30040d46"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a02a4bd54312b39219d0c51371978518f"><td class="memItemLeft" align="right" valign="top"><a id="a02a4bd54312b39219d0c51371978518f"></a>
<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&#160;</td><td class="memItemRight" valign="bottom"><b>operator=</b> (<a class="el" href="classremwharead_1_1URI.html">URI</a> &amp;&amp;other)=default</td></tr>
<tr class="separator:a02a4bd54312b39219d0c51371978518f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a32de53a91487e5cc68550d1479cbd081"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structremwharead_1_1html__extract.html">html_extract</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a32de53a91487e5cc68550d1479cbd081">get</a> ()</td></tr>
<tr class="memdesc:a32de53a91487e5cc68550d1479cbd081"><td class="mdescLeft">&#160;</td><td class="mdescRight">Download URI and extract title, description and full text. <a href="#a32de53a91487e5cc68550d1479cbd081">More...</a><br /></td></tr>
<tr class="separator:a32de53a91487e5cc68550d1479cbd081"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-11-02 08:13:06 +01:00
<tr class="memitem:ac07375bc85ce0f8583b071fdd9a00b33"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structremwharead_1_1archive__answer.html">archive_answer</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ac07375bc85ce0f8583b071fdd9a00b33">archive</a> () const</td></tr>
<tr class="memdesc:ac07375bc85ce0f8583b071fdd9a00b33"><td class="mdescLeft">&#160;</td><td class="mdescRight">Save URI in archive and return archive-URI. <a href="#ac07375bc85ce0f8583b071fdd9a00b33">More...</a><br /></td></tr>
<tr class="separator:ac07375bc85ce0f8583b071fdd9a00b33"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-07-27 22:46:58 +02:00
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pro-methods"></a>
Protected Member Functions</h2></td></tr>
2019-11-02 08:13:06 +01:00
<tr class="memitem:ad13540201da297653f5a8608e3b0df07"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">make_request</a> (const string &amp;uri, bool <a class="el" href="classremwharead_1_1URI.html#ac07375bc85ce0f8583b071fdd9a00b33">archive</a>=false) const</td></tr>
2019-09-30 16:09:31 +02:00
<tr class="memdesc:ad13540201da297653f5a8608e3b0df07"><td class="mdescLeft">&#160;</td><td class="mdescRight">Make a HTTP(S) request. <a href="#ad13540201da297653f5a8608e3b0df07">More...</a><br /></td></tr>
<tr class="separator:ad13540201da297653f5a8608e3b0df07"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-11-02 08:13:06 +01:00
<tr class="memitem:a5de040798613f0c5d57a0671e3138934"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a5de040798613f0c5d57a0671e3138934">extract_title</a> (const string &amp;html) const</td></tr>
<tr class="memdesc:a5de040798613f0c5d57a0671e3138934"><td class="mdescLeft">&#160;</td><td class="mdescRight">Extract the title from an HTML page. <a href="#a5de040798613f0c5d57a0671e3138934">More...</a><br /></td></tr>
<tr class="separator:a5de040798613f0c5d57a0671e3138934"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac4d795032889378334b49ca4edc4a772"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ac4d795032889378334b49ca4edc4a772">extract_description</a> (const string &amp;html) const</td></tr>
<tr class="memdesc:ac4d795032889378334b49ca4edc4a772"><td class="mdescLeft">&#160;</td><td class="mdescRight">Extract the description from an HTML page. <a href="#ac4d795032889378334b49ca4edc4a772">More...</a><br /></td></tr>
<tr class="separator:ac4d795032889378334b49ca4edc4a772"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8a7bffcca80333bfec92ae4c2e794d36"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a8a7bffcca80333bfec92ae4c2e794d36">strip_html</a> (const string &amp;html) const</td></tr>
<tr class="memdesc:a8a7bffcca80333bfec92ae4c2e794d36"><td class="mdescLeft">&#160;</td><td class="mdescRight">Removes HTML tags and superflous spaces from an HTML page. <a href="#a8a7bffcca80333bfec92ae4c2e794d36">More...</a><br /></td></tr>
<tr class="separator:a8a7bffcca80333bfec92ae4c2e794d36"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac44c57dc8925f2601285c8b69574319d"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ac44c57dc8925f2601285c8b69574319d">remove_html_tags</a> (const string &amp;html, const string &amp;tag=&quot;&quot;) const</td></tr>
<tr class="memdesc:ac44c57dc8925f2601285c8b69574319d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Remove HTML tags. <a href="#ac44c57dc8925f2601285c8b69574319d">More...</a><br /></td></tr>
<tr class="separator:ac44c57dc8925f2601285c8b69574319d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8bfe665841ab414e8682b4d1f90a4c13"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a8bfe665841ab414e8682b4d1f90a4c13">unescape_html</a> (string html) const</td></tr>
<tr class="memdesc:a8bfe665841ab414e8682b4d1f90a4c13"><td class="mdescLeft">&#160;</td><td class="mdescRight">Convert HTML entities to UTF-8. <a href="#a8bfe665841ab414e8682b4d1f90a4c13">More...</a><br /></td></tr>
<tr class="separator:a8bfe665841ab414e8682b4d1f90a4c13"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab607928f099bcaa3f50033cf059d7a46"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#ab607928f099bcaa3f50033cf059d7a46">remove_newlines</a> (string text) const</td></tr>
<tr class="memdesc:ab607928f099bcaa3f50033cf059d7a46"><td class="mdescLeft">&#160;</td><td class="mdescRight">Replace newlines with spaces. <a href="#ab607928f099bcaa3f50033cf059d7a46">More...</a><br /></td></tr>
<tr class="separator:ab607928f099bcaa3f50033cf059d7a46"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-09-30 16:09:31 +02:00
<tr class="memitem:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">set_proxy</a> ()</td></tr>
<tr class="memdesc:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Set proxy server. <a href="#a754c3e988f7d8890d6e9794bc1e69e2c">More...</a><br /></td></tr>
<tr class="separator:a754c3e988f7d8890d6e9794bc1e69e2c"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-11-02 08:13:06 +01:00
<tr class="memitem:aee67ad31c3578b1df86dacc6f1eedf1a"><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classremwharead_1_1URI.html#aee67ad31c3578b1df86dacc6f1eedf1a">cut_text</a> (const string &amp;text, uint16_t n_chars) const</td></tr>
<tr class="memdesc:aee67ad31c3578b1df86dacc6f1eedf1a"><td class="mdescLeft">&#160;</td><td class="mdescRight">Limits text to N characters, cuts at space. <a href="#aee67ad31c3578b1df86dacc6f1eedf1a">More...</a><br /></td></tr>
<tr class="separator:aee67ad31c3578b1df86dacc6f1eedf1a"><td class="memSeparator" colspan="2">&#160;</td></tr>
2019-07-27 22:46:58 +02:00
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pro-attribs"></a>
Protected Attributes</h2></td></tr>
<tr class="memitem:a6d76848066779348084046a63bdaedc0"><td class="memItemLeft" align="right" valign="top"><a id="a6d76848066779348084046a63bdaedc0"></a>
string&#160;</td><td class="memItemRight" valign="bottom"><b>_uri</b></td></tr>
<tr class="separator:a6d76848066779348084046a63bdaedc0"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
2019-07-28 02:30:30 +02:00
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>Download, archive and process an URI. </p>
2019-08-05 23:28:43 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-07-28 02:30:30 +02:00
</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
2019-09-30 16:09:31 +02:00
<a id="a34633f88b14dcd2c3a618794040bc154"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a34633f88b14dcd2c3a618794040bc154">&#9670;&nbsp;</a></span>URI()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">remwharead::URI::URI </td>
<td>(</td>
2019-09-30 16:09:31 +02:00
<td class="paramtype">string&#160;</td>
2019-07-28 02:30:30 +02:00
<td class="paramname"><em>uri</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">explicit</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Construct object and set URL. </p>
2019-08-08 11:28:50 +02:00
<p>Initializes TLS and sets proxy from the environment variable <code>http_proxy</code>, if possible.</p>
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00069"></a><span class="lineno"> 69</span>&#160; :_uri(move(uri))</div><div class="line"><a name="l00070"></a><span class="lineno"> 70</span>&#160;{</div><div class="line"><a name="l00071"></a><span class="lineno"> 71</span>&#160; Poco::Net::initializeSSL();</div><div class="line"><a name="l00072"></a><span class="lineno"> 72</span>&#160;</div><div class="line"><a name="l00073"></a><span class="lineno"> 73</span>&#160; <a class="code" href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">set_proxy</a>();</div><div class="line"><a name="l00074"></a><span class="lineno"> 74</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_a754c3e988f7d8890d6e9794bc1e69e2c"><div class="ttname"><a href="classremwharead_1_1URI.html#a754c3e988f7d8890d6e9794bc1e69e2c">remwharead::URI::set_proxy</a></div><div class="ttdeci">void set_proxy()</div><div class="ttdoc">Set proxy server.</div><div class="ttdef"><b>Definition:</b> uri.cpp:76</div></div>
2019-09-30 16:09:31 +02:00
</div><!-- fragment -->
2019-07-28 02:30:30 +02:00
</div>
</div>
<h2 class="groupheader">Member Function Documentation</h2>
2019-11-02 08:13:06 +01:00
<a id="ac07375bc85ce0f8583b071fdd9a00b33"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac07375bc85ce0f8583b071fdd9a00b33">&#9670;&nbsp;</a></span>archive()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname"><a class="el" href="structremwharead_1_1archive__answer.html">archive_answer</a> remwharead::URI::archive </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramname"></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</div><div class="memdoc">
<p>Save URI in archive and return archive-URI. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="aee67ad31c3578b1df86dacc6f1eedf1a"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aee67ad31c3578b1df86dacc6f1eedf1a">&#9670;&nbsp;</a></span>cut_text()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">string remwharead::URI::cut_text </td>
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>text</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint16_t&#160;</td>
<td class="paramname"><em>n_chars</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td> const</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Limits text to N characters, cuts at space. </p>
<dl class="section since"><dt>Since</dt><dd>0.8.5 </dd></dl>
<div class="fragment"><div class="line"><a name="l00664"></a><span class="lineno"> 664</span>&#160;{</div><div class="line"><a name="l00665"></a><span class="lineno"> 665</span>&#160; <span class="keywordflow">if</span> (text.size() &gt; n_chars)</div><div class="line"><a name="l00666"></a><span class="lineno"> 666</span>&#160; {</div><div class="line"><a name="l00667"></a><span class="lineno"> 667</span>&#160; constexpr <span class="keywordtype">char</span> suffix[] = <span class="stringliteral">&quot; […]&quot;</span>;</div><div class="line"><a name="l00668"></a><span class="lineno"> 668</span>&#160; constexpr <span class="keyword">auto</span> suffix_len = std::end(suffix) - std::begin(suffix) - 1;</div><div class="line"><a name="l00669"></a><span class="lineno"> 669</span>&#160; <span class="keywordflow">if</span> (n_chars &lt;= suffix_len)</div><div class="line"><a name="l00670"></a><span class="lineno"> 670</span>&#160; {</div><div class="line"><a name="l00671"></a><span class="lineno"> 671</span>&#160; <span class="keywordflow">throw</span> std::invalid_argument(<span class="stringliteral">&quot;n_chars has to be greater than &quot;</span></div><div class="line"><a name="l00672"></a><span class="lineno"> 672</span>&#160; + std::to_string(suffix_len));</div><div class="line"><a name="l00673"></a><span class="lineno"> 673</span>&#160; }</div><div class="line"><a name="l00674"></a><span class="lineno"> 674</span>&#160;</div><div class="line"><a name="l00675"></a><span class="lineno"> 675</span>&#160; <span class="keyword">const</span> <span class="keywordtype">size_t</span> pos =</div><div class="line"><a name="l00676"></a><span class="lineno"> 676</span>&#160; text.rfind(<span class="charliteral">&#39; &#39;</span>, static_cast&lt;size_t&gt;(n_chars - suffix_len));</div><div class="line"><a name="l00677"></a><span class="lineno"> 677</span>&#160;</div><div class="line"><a name="l00678"></a><span class="lineno"> 678</span>&#160; <span class="keywordflow">return</span> text.substr(0, pos) + suffix;</div><div class="line"><a name="l00679"></a><span class="lineno"> 679</span>&#160; }</div><div class="line"><a name="l00680"></a><span class="lineno"> 680</span>&#160;</div><div class="line"><a name="l00681"></a><span class="lineno"> 681</span>&#160; <span class="keywordflow">return</span> text;</div><div class="line"><a name="l00682"></a><span class="lineno"> 682</span>&#160;}</div></div><!-- fragment -->
</div>
</div>
<a id="ac4d795032889378334b49ca4edc4a772"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac4d795032889378334b49ca4edc4a772">&#9670;&nbsp;</a></span>extract_description()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::extract_description </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Extract the description from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00245"></a><span class="lineno"> 245</span>&#160;{</div><div class="line"><a name="l00246"></a><span class="lineno"> 246</span>&#160; <span class="keyword">const</span> RegEx re_htmlfile(<span class="stringliteral">&quot;.*\\.(.?html?|xml|rss)$&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00247"></a><span class="lineno"> 247</span>&#160; <span class="keywordflow">if</span> (_uri.substr(0, 4) == <span class="stringliteral">&quot;http&quot;</span> || re_htmlfile.match(_uri))</div><div class="line"><a name="l00248"></a><span class="lineno"> 248</span>&#160; {</div><div class="line"><a name="l00249"></a><span class="lineno"> 249</span>&#160; <span class="keyword">const</span> RegEx re_desc(R<span class="stringliteral">&quot;(description&quot;[^&gt;]+content=&quot;([^&quot;]+))&quot;,</span></div><div class="line"><a name="l00250"></a><span class="lineno"> 250</span>&#160;<span class="stringliteral"> RegEx::RE_CASELESS);</span></div><div class="line"><a name="l00251"></a><span class="lineno"> 251</span>&#160;<span class="stringliteral"> vector&lt;string&gt; matches;</span></div><div class="line"><a name="l00252"></a><span class="lineno"> 252</span>&#160;<span class="stringliteral"> re_desc.split(html, matches);</span></div><div class="line"><a name="l00253"></a><span class="lineno"> 253</span>&#160;<span class="stringliteral"> </span><span class="keywordflow">if</span> (matches.size() &gt;= 2)</div><div class="line"><a name="l00254"></a><span class="lineno"> 254</span>&#160; {</div><div class="line"><a name="l00255"></a><span class="lineno"> 255</span>&#160; <span class="keywordflow">return</span> <a class="code" href="classremwharead_1_1URI.html#ab607928f099bcaa3f50033cf059d7a46">remove_newlines</a>(<a class="code" href="classremwharead_1_1URI.html#aee67ad31c3578b1df86dacc6f1eedf1a">cut_text</a>(<a class="code" href="classremwharead_1_1URI.html#a8bfe665841ab414e8682b4d1f90a4c13">unescape_html</a>(matches[1]), 500));</div><div class="line"><a name="l00256"></a><span class="lineno"> 256</span>&#160; }</div><div class="line"><a name="l00257"></a><span class="lineno"> 257</span>&#160; }</div><div class="line"><a name="l00258"></a><span class="lineno"> 258</span>&#160;</div><div class="line"><a name="l00259"></a><span class="lineno"> 259</span>&#160; <span class="keywordflow">return</span> <span class="stringliteral">&quot;&quot;</span>;</div><div class="line"><a name="l00260"></a><span class="lineno"> 260</span>&#160;}</div><div class="line"><a name="l00261"></a><span class="lineno"> 261</span>&#160;</div><div class="line"><a name="l00262"></a><span class="lineno"> 262</span>&#160;<span class="keywordtype">string</span> <a class="code" href="classremwharead_1_1URI.html#a8a7bffcca80333bfec92ae4c2e794d36">URI::strip_html</a>(<span class="keyword">const</span> <span class="keywordtype">string</span> &amp;html)<span class="keyword"> const</span></div><div class="line"><a name="l00263"></a><span class="lineno"> 263</span>&#160;<span class="keyword"></span>{</div><div class="line"><a name="l00264"></a><span class="lineno"> 264</span>&#160; <span class="keywordtype">string</span> out;</div><div class="line"><a name="l00265"></a><span class="lineno"> 265</span>&#160;</div><div class="line"><a name="l00266"></a><span class="lineno"> 266</span>&#160; out = <a class="code" href="classremwharead_1_1URI.html#ac44c57dc8925f2601285c8b69574319d">remove_html_tags</a>(html, <span class="stringliteral">&quot;script&quot;</span>); <span class="comment">// Remove JavaScript.</span></div><div class="line"><a name="l00267"></a><span class="lineno"> 267</span>&#160; out = <a class="code" href="classremwharead_1_1URI.html#ac44c57dc8925f2601285c8b69574319d">remove_html_tags</a>(out, <span class="stringliteral">&quot;style&quot;</span>); <span class="comment">// Remove CSS.</span></div><div class="line"><a name="l00268"></a><span class="lineno"> 268</span
<div class="ttc" id="classremwharead_1_1URI_html_ad13540201da297653f5a8608e3b0df07"><div class="ttname"><a href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">remwharead::URI::make_request</a></div><div class="ttdeci">string make_request(const string &amp;uri, bool archive=false) const</div><div class="ttdoc">Make a HTTP(S) request.</div><div class="ttdef"><b>Definition:</b> uri.cpp:154</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_ab607928f099bcaa3f50033cf059d7a46"><div class="ttname"><a href="classremwharead_1_1URI.html#ab607928f099bcaa3f50033cf059d7a46">remwharead::URI::remove_newlines</a></div><div class="ttdeci">string remove_newlines(string text) const</div><div class="ttdoc">Replace newlines with spaces.</div><div class="ttdef"><b>Definition:</b> uri.cpp:645</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_ac44c57dc8925f2601285c8b69574319d"><div class="ttname"><a href="classremwharead_1_1URI.html#ac44c57dc8925f2601285c8b69574319d">remwharead::URI::remove_html_tags</a></div><div class="ttdeci">string remove_html_tags(const string &amp;html, const string &amp;tag=&quot;&quot;) const</div><div class="ttdoc">Remove HTML tags.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a8a7bffcca80333bfec92ae4c2e794d36"><div class="ttname"><a href="classremwharead_1_1URI.html#a8a7bffcca80333bfec92ae4c2e794d36">remwharead::URI::strip_html</a></div><div class="ttdeci">string strip_html(const string &amp;html) const</div><div class="ttdoc">Removes HTML tags and superflous spaces from an HTML page.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_aee67ad31c3578b1df86dacc6f1eedf1a"><div class="ttname"><a href="classremwharead_1_1URI.html#aee67ad31c3578b1df86dacc6f1eedf1a">remwharead::URI::cut_text</a></div><div class="ttdeci">string cut_text(const string &amp;text, uint16_t n_chars) const</div><div class="ttdoc">Limits text to N characters, cuts at space.</div><div class="ttdef"><b>Definition:</b> uri.cpp:663</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_ac07375bc85ce0f8583b071fdd9a00b33"><div class="ttname"><a href="classremwharead_1_1URI.html#ac07375bc85ce0f8583b071fdd9a00b33">remwharead::URI::archive</a></div><div class="ttdeci">archive_answer archive() const</div><div class="ttdoc">Save URI in archive and return archive-URI.</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="a5de040798613f0c5d57a0671e3138934"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a5de040798613f0c5d57a0671e3138934">&#9670;&nbsp;</a></span>extract_title()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::extract_title </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Extract the title from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-12-08 18:29:39 +01:00
<div class="fragment"><div class="line"><a name="l00228"></a><span class="lineno"> 228</span>&#160;{</div><div class="line"><a name="l00229"></a><span class="lineno"> 229</span>&#160; <span class="keyword">const</span> RegEx re_htmlfile(<span class="stringliteral">&quot;.*\\.(.?html?|xml|rss)$&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00230"></a><span class="lineno"> 230</span>&#160; <span class="keywordflow">if</span> (_uri.substr(0, 4) == <span class="stringliteral">&quot;http&quot;</span> || re_htmlfile.match(_uri))</div><div class="line"><a name="l00231"></a><span class="lineno"> 231</span>&#160; {</div><div class="line"><a name="l00232"></a><span class="lineno"> 232</span>&#160; <span class="keyword">const</span> RegEx re_title(<span class="stringliteral">&quot;&lt;title(?: [^&gt;]+)?&gt;([^&lt;]+)&quot;</span>, RegEx::RE_CASELESS);</div><div class="line"><a name="l00233"></a><span class="lineno"> 233</span>&#160; vector&lt;string&gt; matches;</div><div class="line"><a name="l00234"></a><span class="lineno"> 234</span>&#160; re_title.split(html, matches);</div><div class="line"><a name="l00235"></a><span class="lineno"> 235</span>&#160; <span class="keywordflow">if</span> (matches.size() &gt;= 2)</div><div class="line"><a name="l00236"></a><span class="lineno"> 236</span>&#160; {</div><div class="line"><a name="l00237"></a><span class="lineno"> 237</span>&#160; <span class="keywordflow">return</span> <a class="code" href="classremwharead_1_1URI.html#ab607928f099bcaa3f50033cf059d7a46">remove_newlines</a>(<a class="code" href="classremwharead_1_1URI.html#a8bfe665841ab414e8682b4d1f90a4c13">unescape_html</a>(matches[1]));</div><div class="line"><a name="l00238"></a><span class="lineno"> 238</span>&#160; }</div><div class="line"><a name="l00239"></a><span class="lineno"> 239</span>&#160; }</div><div class="line"><a name="l00240"></a><span class="lineno"> 240</span>&#160;</div><div class="line"><a name="l00241"></a><span class="lineno"> 241</span>&#160; <span class="keywordflow">return</span> <span class="stringliteral">&quot;&quot;</span>;</div><div class="line"><a name="l00242"></a><span class="lineno"> 242</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_a8bfe665841ab414e8682b4d1f90a4c13"><div class="ttname"><a href="classremwharead_1_1URI.html#a8bfe665841ab414e8682b4d1f90a4c13">remwharead::URI::unescape_html</a></div><div class="ttdeci">string unescape_html(string html) const</div><div class="ttdoc">Convert HTML entities to UTF-8.</div></div>
2019-11-02 08:13:06 +01:00
<div class="ttc" id="classremwharead_1_1URI_html_ab607928f099bcaa3f50033cf059d7a46"><div class="ttname"><a href="classremwharead_1_1URI.html#ab607928f099bcaa3f50033cf059d7a46">remwharead::URI::remove_newlines</a></div><div class="ttdeci">string remove_newlines(string text) const</div><div class="ttdoc">Replace newlines with spaces.</div><div class="ttdef"><b>Definition:</b> uri.cpp:645</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a32de53a91487e5cc68550d1479cbd081"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a32de53a91487e5cc68550d1479cbd081">&#9670;&nbsp;</a></span>get()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname"><a class="el" href="structremwharead_1_1html__extract.html">html_extract</a> remwharead::URI::get </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Download URI and extract title, description and full text. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00130"></a><span class="lineno"> 130</span>&#160;{</div><div class="line"><a name="l00131"></a><span class="lineno"> 131</span>&#160; <span class="keywordflow">try</span></div><div class="line"><a name="l00132"></a><span class="lineno"> 132</span>&#160; {</div><div class="line"><a name="l00133"></a><span class="lineno"> 133</span>&#160; <span class="keyword">const</span> <span class="keywordtype">string</span> answer = <a class="code" href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">make_request</a>(_uri);</div><div class="line"><a name="l00134"></a><span class="lineno"> 134</span>&#160; <span class="keywordflow">if</span> (!answer.empty())</div><div class="line"><a name="l00135"></a><span class="lineno"> 135</span>&#160; {</div><div class="line"><a name="l00136"></a><span class="lineno"> 136</span>&#160; <span class="keywordflow">return</span></div><div class="line"><a name="l00137"></a><span class="lineno"> 137</span>&#160; {</div><div class="line"><a name="l00138"></a><span class="lineno"> 138</span>&#160; <span class="keyword">true</span>,</div><div class="line"><a name="l00139"></a><span class="lineno"> 139</span>&#160; <span class="stringliteral">&quot;&quot;</span>,</div><div class="line"><a name="l00140"></a><span class="lineno"> 140</span>&#160; <a class="code" href="classremwharead_1_1URI.html#a5de040798613f0c5d57a0671e3138934">extract_title</a>(answer),</div><div class="line"><a name="l00141"></a><span class="lineno"> 141</span>&#160; <a class="code" href="classremwharead_1_1URI.html#ac4d795032889378334b49ca4edc4a772">extract_description</a>(answer),</div><div class="line"><a name="l00142"></a><span class="lineno"> 142</span>&#160; <a class="code" href="classremwharead_1_1URI.html#a8a7bffcca80333bfec92ae4c2e794d36">strip_html</a>(answer)</div><div class="line"><a name="l00143"></a><span class="lineno"> 143</span>&#160; };</div><div class="line"><a name="l00144"></a><span class="lineno"> 144</span>&#160; }</div><div class="line"><a name="l00145"></a><span class="lineno"> 145</span>&#160; }</div><div class="line"><a name="l00146"></a><span class="lineno"> 146</span>&#160; <span class="keywordflow">catch</span> (<span class="keyword">const</span> Poco::Exception &amp;e)</div><div class="line"><a name="l00147"></a><span class="lineno"> 147</span>&#160; {</div><div class="line"><a name="l00148"></a><span class="lineno"> 148</span>&#160; <span class="keywordflow">return</span> { <span class="keyword">false</span>, e.displayText(), <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span> };</div><div class="line"><a name="l00149"></a><span class="lineno"> 149</span>&#160; }</div><div class="line"><a name="l00150"></a><span class="lineno"> 150</span>&#160;</div><div class="line"><a name="l00151"></a><span class="lineno"> 151</span>&#160; <span class="keywordflow">return</span> { <span class="keyword">false</span>, <span class="stringliteral">&quot;Unknown error.&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span>, <span class="stringliteral">&quot;&quot;</span> };</div><div class="line"><a name="l00152"></a><span class="lineno"> 152</span>&#160;}</div><div class="ttc" id="classremwharead_1_1URI_html_ad13540201da297653f5a8608e3b0df07"><div class="ttname"><a href="classremwharead_1_1URI.html#ad13540201da297653f5a8608e3b0df07">remwharead::URI::make_request</a></div><div class="ttdeci">string make_request(const string &amp;uri, bool archive=false) const</div><div class="ttdoc">Make a HTTP(S) request.</div><div class="ttdef"><b>Definition:</b> uri.cpp:154</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a8a7bffcca80333bfec92ae4c2e794d36"><div class="ttname"><a href="classremwharead_1_1URI.html#a8a7bffcca80333bfec92ae4c2e794d36">remwharead::URI::strip_html</a></div><div class="ttdeci">string strip_html(const string &amp;html) const</div><div class="ttdoc">Removes HTML tags and superflous spaces from an HTML page.</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_a5de040798613f0c5d57a0671e3138934"><div class="ttname"><a href="classremwharead_1_1URI.html#a5de040798613f0c5d57a0671e3138934">remwharead::URI::extract_title</a></div><div class="ttdeci">string extract_title(const string &amp;html) const</div><div class="ttdoc">Extract the title from an HTML page.</div><div class="ttdef"><b>Definition:</b> uri.cpp:227</div></div>
<div class="ttc" id="classremwharead_1_1URI_html_ac4d795032889378334b49ca4edc4a772"><div class="ttname"><a href="classremwharead_1_1URI.html#ac4d795032889378334b49ca4edc4a772">remwharead::URI::extract_description</a></div><div class="ttdeci">string extract_description(const string &amp;html) const</div><div class="ttdoc">Extract the description from an HTML page.</div><div class="ttdef"><b>Definition:</b> uri.cpp:244</div></div>
2019-08-05 22:01:08 +02:00
</div><!-- fragment -->
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="ad13540201da297653f5a8608e3b0df07"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad13540201da297653f5a8608e3b0df07">&#9670;&nbsp;</a></span>make_request()</h2>
2019-08-05 22:01:08 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::make_request </td>
2019-08-05 22:01:08 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
2019-08-08 11:28:50 +02:00
<td class="paramname"><em>uri</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">bool&#160;</td>
<td class="paramname"><em>archive</em> = <code>false</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td> const</td>
2019-08-05 22:01:08 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Make a HTTP(S) request. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00155"></a><span class="lineno"> 155</span>&#160;{</div><div class="line"><a name="l00156"></a><span class="lineno"> 156</span>&#160; Poco::URI poco_uri(uri);</div><div class="line"><a name="l00157"></a><span class="lineno"> 157</span>&#160; <span class="keywordtype">string</span> method = <a class="code" href="classremwharead_1_1URI.html#ac07375bc85ce0f8583b071fdd9a00b33">archive</a> ? HTTPRequest::HTTP_HEAD : HTTPRequest::HTTP_GET;</div><div class="line"><a name="l00158"></a><span class="lineno"> 158</span>&#160; <span class="keywordtype">string</span> path = poco_uri.getPathAndQuery();</div><div class="line"><a name="l00159"></a><span class="lineno"> 159</span>&#160; <span class="keywordflow">if</span> (path.empty())</div><div class="line"><a name="l00160"></a><span class="lineno"> 160</span>&#160; {</div><div class="line"><a name="l00161"></a><span class="lineno"> 161</span>&#160; path = <span class="stringliteral">&quot;/&quot;</span>;</div><div class="line"><a name="l00162"></a><span class="lineno"> 162</span>&#160; }</div><div class="line"><a name="l00163"></a><span class="lineno"> 163</span>&#160;</div><div class="line"><a name="l00164"></a><span class="lineno"> 164</span>&#160; unique_ptr&lt;HTTPClientSession&gt; session;</div><div class="line"><a name="l00165"></a><span class="lineno"> 165</span>&#160; <span class="keywordflow">if</span> (poco_uri.getScheme() == <span class="stringliteral">&quot;https&quot;</span>)</div><div class="line"><a name="l00166"></a><span class="lineno"> 166</span>&#160; {</div><div class="line"><a name="l00167"></a><span class="lineno"> 167</span>&#160; session = make_unique&lt;HTTPSClientSession&gt;(poco_uri.getHost(),</div><div class="line"><a name="l00168"></a><span class="lineno"> 168</span>&#160; poco_uri.getPort());</div><div class="line"><a name="l00169"></a><span class="lineno"> 169</span>&#160; }</div><div class="line"><a name="l00170"></a><span class="lineno"> 170</span>&#160; <span class="keywordflow">else</span> <span class="keywordflow">if</span> (poco_uri.getScheme() == <span class="stringliteral">&quot;http&quot;</span>)</div><div class="line"><a name="l00171"></a><span class="lineno"> 171</span>&#160; {</div><div class="line"><a name="l00172"></a><span class="lineno"> 172</span>&#160; session = make_unique&lt;HTTPClientSession&gt;(poco_uri.getHost(),</div><div class="line"><a name="l00173"></a><span class="lineno"> 173</span>&#160; poco_uri.getPort());</div><div class="line"><a name="l00174"></a><span class="lineno"> 174</span>&#160; }</div><div class="line"><a name="l00175"></a><span class="lineno"> 175</span>&#160; <span class="keywordflow">else</span></div><div class="line"><a name="l00176"></a><span class="lineno"> 176</span>&#160; {</div><div class="line"><a name="l00177"></a><span class="lineno"> 177</span>&#160; <span class="comment">// NOLINTNEXTLINE(cert-err60-cpp)</span></div><div class="line"><a name="l00178"></a><span class="lineno"> 178</span>&#160; <span class="keywordflow">throw</span> Poco::Exception(<span class="stringliteral">&quot;Protocol not supported.&quot;</span>);</div><div class="line"><a name="l00179"></a><span class="lineno"> 179</span>&#160; }</div><div class="line"><a name="l00180"></a><span class="lineno"> 180</span>&#160;</div><div class="line"><a name="l00181"></a><span class="lineno"> 181</span>&#160; HTTPRequest request(method, path, HTTPMessage::HTTP_1_1);</div><div class="line"><a name="l00182"></a><span class="lineno"> 182</span>&#160; request.set(<span class="stringliteral">&quot;User-Agent&quot;</span>, <span class="keywordtype">string</span>(<span class="stringliteral">&quot;remwharead/&quot;</span>) + global::version);</div><div class="line"><a name="l00183"></a><span class="lineno"> 183</span>&#160;</div><div class="line"><a name="l00184"></a><span class="lineno"> 1
<div class="ttc" id="classremwharead_1_1URI_html_ac07375bc85ce0f8583b071fdd9a00b33"><div class="ttname"><a href="classremwharead_1_1URI.html#ac07375bc85ce0f8583b071fdd9a00b33">remwharead::URI::archive</a></div><div class="ttdeci">archive_answer archive() const</div><div class="ttdoc">Save URI in archive and return archive-URI.</div></div>
2019-07-28 02:30:30 +02:00
</div><!-- fragment -->
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="ac44c57dc8925f2601285c8b69574319d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac44c57dc8925f2601285c8b69574319d">&#9670;&nbsp;</a></span>remove_html_tags()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::remove_html_tags </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>tag</em> = <code>&quot;&quot;</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
2019-11-02 08:13:06 +01:00
<td></td><td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Remove HTML tags. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">html</td><td>HTML page. </td></tr>
2019-08-08 11:28:50 +02:00
<tr><td class="paramname">tag</td><td>If set, only remove this tag.</td></tr>
2019-07-28 02:30:30 +02:00
</table>
</dd>
</dl>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="ab607928f099bcaa3f50033cf059d7a46"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ab607928f099bcaa3f50033cf059d7a46">&#9670;&nbsp;</a></span>remove_newlines()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::remove_newlines </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">string&#160;</td>
<td class="paramname"><em>text</em></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Replace newlines with spaces. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00646"></a><span class="lineno"> 646</span>&#160;{</div><div class="line"><a name="l00647"></a><span class="lineno"> 647</span>&#160; <span class="keywordtype">size_t</span> posn = 0;</div><div class="line"><a name="l00648"></a><span class="lineno"> 648</span>&#160; <span class="keywordflow">while</span> ((posn = text.find(<span class="charliteral">&#39;\n&#39;</span>, posn)) != std::string::npos)</div><div class="line"><a name="l00649"></a><span class="lineno"> 649</span>&#160; {</div><div class="line"><a name="l00650"></a><span class="lineno"> 650</span>&#160; text.replace(posn, 1, <span class="stringliteral">&quot; &quot;</span>);</div><div class="line"><a name="l00651"></a><span class="lineno"> 651</span>&#160;</div><div class="line"><a name="l00652"></a><span class="lineno"> 652</span>&#160; <span class="keywordtype">size_t</span> posr = posn - 1;</div><div class="line"><a name="l00653"></a><span class="lineno"> 653</span>&#160; <span class="keywordflow">if</span> (text[posr] == <span class="charliteral">&#39;\r&#39;</span>)</div><div class="line"><a name="l00654"></a><span class="lineno"> 654</span>&#160; {</div><div class="line"><a name="l00655"></a><span class="lineno"> 655</span>&#160; text.replace(posr, 1, <span class="stringliteral">&quot; &quot;</span>);</div><div class="line"><a name="l00656"></a><span class="lineno"> 656</span>&#160; }</div><div class="line"><a name="l00657"></a><span class="lineno"> 657</span>&#160; ++posn;</div><div class="line"><a name="l00658"></a><span class="lineno"> 658</span>&#160; }</div><div class="line"><a name="l00659"></a><span class="lineno"> 659</span>&#160;</div><div class="line"><a name="l00660"></a><span class="lineno"> 660</span>&#160; <span class="keywordflow">return</span> text;</div><div class="line"><a name="l00661"></a><span class="lineno"> 661</span>&#160;}</div></div><!-- fragment -->
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-09-30 16:09:31 +02:00
<a id="a754c3e988f7d8890d6e9794bc1e69e2c"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a754c3e988f7d8890d6e9794bc1e69e2c">&#9670;&nbsp;</a></span>set_proxy()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">void remwharead::URI::set_proxy </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Set proxy server. </p>
<dl class="section since"><dt>Since</dt><dd>0.8.5 </dd></dl>
2019-11-02 08:13:06 +01:00
<div class="fragment"><div class="line"><a name="l00077"></a><span class="lineno"> 77</span>&#160;{</div><div class="line"><a name="l00078"></a><span class="lineno"> 78</span>&#160; <span class="keywordflow">try</span></div><div class="line"><a name="l00079"></a><span class="lineno"> 79</span>&#160; {</div><div class="line"><a name="l00080"></a><span class="lineno"> 80</span>&#160; HTTPClientSession::ProxyConfig proxy;</div><div class="line"><a name="l00081"></a><span class="lineno"> 81</span>&#160; <span class="keyword">const</span> <span class="keywordtype">string</span> env_proxy = Environment::get(<span class="stringliteral">&quot;http_proxy&quot;</span>);</div><div class="line"><a name="l00082"></a><span class="lineno"> 82</span>&#160; <span class="keyword">const</span> RegEx re_proxy(<span class="stringliteral">&quot;^(?:https?://)?(?:([^:]+):?([^@]*)@)?&quot;</span> <span class="comment">// user:pw</span></div><div class="line"><a name="l00083"></a><span class="lineno"> 83</span>&#160; <span class="stringliteral">&quot;([^:/]+)(?::([\\d]{1,5}))?/?$&quot;</span>); <span class="comment">// host:port</span></div><div class="line"><a name="l00084"></a><span class="lineno"> 84</span>&#160; vector&lt;string&gt; matches;</div><div class="line"><a name="l00085"></a><span class="lineno"> 85</span>&#160;</div><div class="line"><a name="l00086"></a><span class="lineno"> 86</span>&#160; <span class="keywordflow">if</span> (re_proxy.split(env_proxy, matches) &lt; 4)</div><div class="line"><a name="l00087"></a><span class="lineno"> 87</span>&#160; {</div><div class="line"><a name="l00088"></a><span class="lineno"> 88</span>&#160; <span class="keywordflow">return</span>;</div><div class="line"><a name="l00089"></a><span class="lineno"> 89</span>&#160; }</div><div class="line"><a name="l00090"></a><span class="lineno"> 90</span>&#160;</div><div class="line"><a name="l00091"></a><span class="lineno"> 91</span>&#160; proxy.username = matches[1];</div><div class="line"><a name="l00092"></a><span class="lineno"> 92</span>&#160; proxy.password = matches[2];</div><div class="line"><a name="l00093"></a><span class="lineno"> 93</span>&#160; proxy.host = matches[3];</div><div class="line"><a name="l00094"></a><span class="lineno"> 94</span>&#160; <span class="keywordflow">if</span> (!matches[4].empty())</div><div class="line"><a name="l00095"></a><span class="lineno"> 95</span>&#160; {</div><div class="line"><a name="l00096"></a><span class="lineno"> 96</span>&#160; <span class="comment">// NOLINTNEXTLINE(google-runtime-int) - Need to use same as stoul.</span></div><div class="line"><a name="l00097"></a><span class="lineno"> 97</span>&#160; <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">long</span> port = std::stoul(matches[4]);</div><div class="line"><a name="l00098"></a><span class="lineno"> 98</span>&#160; <span class="keywordflow">if</span> (port &gt; 65535)</div><div class="line"><a name="l00099"></a><span class="lineno"> 99</span>&#160; {</div><div class="line"><a name="l00100"></a><span class="lineno"> 100</span>&#160; <span class="keywordflow">throw</span> std::invalid_argument(<span class="stringliteral">&quot;Proxy port number out of range&quot;</span>);</div><div class="line"><a name="l00101"></a><span class="lineno"> 101</span>&#160; }</div><div class="line"><a name="l00102"></a><span class="lineno"> 102</span>&#160; proxy.port = static_cast&lt;uint16_t&gt;(port);</div><div class="line"><a name="l00103"></a><span class="lineno"> 103</span>&#160; }</div><div class="line"><a name="l00104"></a><span class="lineno"> 104</span>&#160; HTTPClientSession::setGlobalProxyConfig(proxy);</div><div class="line"><a name="l00105"></a><span class="lineno"> 105</span>&#160; }</div><div class="line"><a name="l00106
2019-09-30 16:09:31 +02:00
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="a8a7bffcca80333bfec92ae4c2e794d36"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8a7bffcca80333bfec92ae4c2e794d36">&#9670;&nbsp;</a></span>strip_html()</h2>
2019-09-30 16:09:31 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">string remwharead::URI::strip_html </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
<td class="paramtype">const string &amp;&#160;</td>
<td class="paramname"><em>html</em></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Removes HTML tags and superflous spaces from an HTML page. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-11-02 08:13:06 +01:00
<a id="a8bfe665841ab414e8682b4d1f90a4c13"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8bfe665841ab414e8682b4d1f90a4c13">&#9670;&nbsp;</a></span>unescape_html()</h2>
2019-07-28 02:30:30 +02:00
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
2019-09-30 16:09:31 +02:00
<td class="memname">string remwharead::URI::unescape_html </td>
2019-07-28 02:30:30 +02:00
<td>(</td>
2019-09-20 21:35:37 +02:00
<td class="paramtype">string&#160;</td>
2019-07-28 02:30:30 +02:00
<td class="paramname"><em>html</em></td><td>)</td>
2019-11-02 08:13:06 +01:00
<td> const</td>
2019-07-28 02:30:30 +02:00
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">protected</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Convert HTML entities to UTF-8. </p>
2019-08-08 11:28:50 +02:00
<dl class="section since"><dt>Since</dt><dd>0.6.0 </dd></dl>
2019-09-30 16:09:31 +02:00
2019-07-28 02:30:30 +02:00
</div>
</div>
2019-07-27 22:46:58 +02:00
<hr/>The documentation for this class was generated from the following files:<ul>
2019-08-05 22:01:08 +02:00
<li>include/<a class="el" href="uri_8hpp_source.html">uri.hpp</a></li>
2019-07-27 22:46:58 +02:00
<li>src/lib/uri.cpp</li>
</ul>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
2019-08-08 22:13:06 +02:00
</a> 1.8.15
2019-07-27 22:46:58 +02:00
</small></address>
</body>
</html>