remwharead  0.9.1
Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
remwharead::URI Class Reference

Download, archive and process an URI. More...

#include <remwharead/uri.hpp>

Public Member Functions

 URI (string uri)
 Construct object and set URL. More...
 
 URI (const URI &other)=default
 
URIoperator= (const URI &other)=default
 
 URI (URI &&other)=default
 
URIoperator= (URI &&other)=default
 
html_extract get ()
 Download URI and extract title, description and full text. More...
 
archive_answer archive () const
 Save URI in archive and return archive-URI. More...
 

Protected Member Functions

string make_request (const string &uri, bool archive=false) const
 Make a HTTP(S) request. More...
 
string extract_title (const string &html) const
 Extract the title from an HTML page. More...
 
string extract_description (const string &html) const
 Extract the description from an HTML page. More...
 
string strip_html (const string &html) const
 Removes HTML tags and superflous spaces from an HTML page. More...
 
string remove_html_tags (const string &html, const string &tag="") const
 Remove HTML tags. More...
 
string unescape_html (string html) const
 Convert HTML entities to UTF-8. More...
 
string remove_newlines (string text) const
 Replace newlines with spaces. More...
 
void set_proxy ()
 Set proxy server. More...
 
string cut_text (const string &text, uint16_t n_chars) const
 Limits text to N characters, cuts at space. More...
 

Protected Attributes

string _uri
 

Detailed Description

Download, archive and process an URI.

Since
0.6.0

Constructor & Destructor Documentation

◆ URI()

remwharead::URI::URI ( string  uri)
explicit

Construct object and set URL.

Initializes TLS and sets proxy from the environment variable http_proxy, if possible.

Since
0.6.0
69  :_uri(move(uri))
70 {
71  Poco::Net::initializeSSL();
72 
73  set_proxy();
74 }
void set_proxy()
Set proxy server.
Definition: uri.cpp:76

Member Function Documentation

◆ archive()

archive_answer remwharead::URI::archive ( ) const

Save URI in archive and return archive-URI.

Since
0.6.0

◆ cut_text()

string remwharead::URI::cut_text ( const string &  text,
uint16_t  n_chars 
) const
protected

Limits text to N characters, cuts at space.

Since
0.8.5
664 {
665  if (text.size() > n_chars)
666  {
667  constexpr char suffix[] = " […]";
668  constexpr auto suffix_len = std::end(suffix) - std::begin(suffix) - 1;
669  if (n_chars <= suffix_len)
670  {
671  throw std::invalid_argument("n_chars has to be greater than "
672  + std::to_string(suffix_len));
673  }
674 
675  const size_t pos =
676  text.rfind(' ', static_cast<size_t>(n_chars - suffix_len));
677 
678  return text.substr(0, pos) + suffix;
679  }
680 
681  return text;
682 }

◆ extract_description()

string remwharead::URI::extract_description ( const string &  html) const
protected

Extract the description from an HTML page.

Since
0.6.0
245 {
246  const RegEx re_htmlfile(".*\\.(.?html?|xml|rss)$", RegEx::RE_CASELESS);
247  if (_uri.substr(0, 4) == "http" || re_htmlfile.match(_uri))
248  {
249  const RegEx re_desc(R"(description"[^>]+content="([^"]+))",
250  RegEx::RE_CASELESS);
251  vector<string> matches;
252  re_desc.split(html, matches);
253  if (matches.size() >= 2)
254  {
255  return remove_newlines(cut_text(unescape_html(matches[1]), 500));
256  }
257  }
258 
259  return "";
260 }
261 
262 string URI::strip_html(const string &html) const
263 {
264  string out;
265 
266  out = remove_html_tags(html, "script"); // Remove JavaScript.
267  out = remove_html_tags(out, "style"); // Remove CSS.
268  out = remove_html_tags(out); // Remove tags.
269 
270  size_t pos = 0;
271  while ((pos = out.find('\r', pos)) != std::string::npos) // Remove CR.
272  {
273  out.replace(pos, 1, "");
274  }
275 
276  // Remove whitespace at eol.
277  RegEx("\\s+\n").subst(out, "\n", RegEx::RE_GLOBAL);
278  RegEx("\n{2,}").subst(out, "\n", RegEx::RE_GLOBAL); // Reduce newlines.
279 
280  return unescape_html(out);
281 }
282 
283 string URI::remove_html_tags(const string &html, const string &tag) const
284 {
285  // NOTE: I did this with regex_replace before, but libstdc++ segfaulted.
286  string out;
287  if (tag.empty())
288  {
289  size_t pos = 0;
290  while (pos != std::string::npos)
291  {
292  size_t startpos = html.find('<', pos);
293  size_t endpos = html.find('>', startpos);
294  out += html.substr(pos, startpos - pos);
295  pos = endpos;
296  if (pos != std::string::npos)
297  {
298  ++pos;
299  }
300  }
301  }
302  else
303  {
304  size_t pos = 0;
305  out = html;
306  while ((pos = out.find("<" + tag)) != std::string::npos)
307  {
308  size_t endpos = out.find("</" + tag, pos);
309  if (endpos == std::string::npos)
310  {
311  break;
312  }
313  endpos += 3 + tag.length(); // tag + </ + >
314  out.replace(pos, endpos - pos, "");
315  }
316  }
317 
318  return out;
319 }
320 
321 string URI::unescape_html(string html) const
322 {
323  // Used to convert int to utf-8 char.
324  std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c;
325  const RegEx re_entity("&#(x)?([[:alnum:]]{1,8});");
326  RegEx::MatchVec matches;
327  string::size_type pos = 0;
328 
329  while (re_entity.match(html, pos, matches) != 0)
330  {
331  char32_t codepoint = 0;
332  const string number = html.substr(matches[2].offset, matches[2].length);
333  // 'x' in front of the number means it's hexadecimal, else decimal.
334  if (matches[1].length != 0)
335  {
336  codepoint = static_cast<char32_t>(std::stoul(number, nullptr, 16));
337  }
338  else
339  {
340  codepoint = static_cast<char32_t>(std::stoi(number, nullptr, 10));
341  }
342  const string unicode = u8c.to_bytes(codepoint);
343  html.replace(matches[0].offset, matches[0].length, unicode);
344  pos = matches[0].offset + unicode.length();
345  }
346 
347  // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
348  // entity_references#Character_entity_references_in_HTML
349  const array<const std::pair<const string, const char32_t>, 258> names =
350  {{
351  { "exclamation", 0x0021 },
352  { "quot", 0x0022 },
353  { "percent", 0x0025 },
354  { "amp", 0x0026 },
355  { "apos", 0x0027 },
356  { "add", 0x002B },
357  { "lt", 0x003C },
358  { "equal", 0x003D },
359  { "gt", 0x003E },
360  { "nbsp", 0x00A0 },
361  { "iexcl", 0x00A1 },
362  { "cent", 0x00A2 },
363  { "pound", 0x00A3 },
364  { "curren", 0x00A4 },
365  { "yen", 0x00A5 },
366  { "brvbar", 0x00A6 },
367  { "sect", 0x00A7 },
368  { "uml", 0x00A8 },
369  { "copy", 0x00A9 },
370  { "ordf", 0x00AA },
371  { "laquo", 0x00AB },
372  { "not", 0x00AC },
373  { "shy", 0x00AD },
374  { "reg", 0x00AE },
375  { "macr", 0x00AF },
376  { "deg", 0x00B0 },
377  { "plusmn", 0x00B1 },
378  { "sup2", 0x00B2 },
379  { "sup3", 0x00B3 },
380  { "acute", 0x00B4 },
381  { "micro", 0x00B5 },
382  { "para", 0x00B6 },
383  { "middot", 0x00B7 },
384  { "cedil", 0x00B8 },
385  { "sup1", 0x00B9 },
386  { "ordm", 0x00BA },
387  { "raquo", 0x00BB },
388  { "frac14", 0x00BC },
389  { "frac12", 0x00BD },
390  { "frac34", 0x00BE },
391  { "iquest", 0x00BF },
392  { "Agrave", 0x00C0 },
393  { "Aacute", 0x00C1 },
394  { "Acirc", 0x00C2 },
395  { "Atilde", 0x00C3 },
396  { "Auml", 0x00C4 },
397  { "Aring", 0x00C5 },
398  { "AElig", 0x00C6 },
399  { "Ccedil", 0x00C7 },
400  { "Egrave", 0x00C8 },
401  { "Eacute", 0x00C9 },
402  { "Ecirc", 0x00CA },
403  { "Euml", 0x00CB },
404  { "Igrave", 0x00CC },
405  { "Iacute", 0x00CD },
406  { "Icirc", 0x00CE },
407  { "Iuml", 0x00CF },
408  { "ETH", 0x00D0 },
409  { "Ntilde", 0x00D1 },
410  { "Ograve", 0x00D2 },
411  { "Oacute", 0x00D3 },
412  { "Ocirc", 0x00D4 },
413  { "Otilde", 0x00D5 },
414  { "Ouml", 0x00D6 },
415  { "times", 0x00D7 },
416  { "Oslash", 0x00D8 },
417  { "Ugrave", 0x00D9 },
418  { "Uacute", 0x00DA },
419  { "Ucirc", 0x00DB },
420  { "Uuml", 0x00DC },
421  { "Yacute", 0x00DD },
422  { "THORN", 0x00DE },
423  { "szlig", 0x00DF },
424  { "agrave", 0x00E0 },
425  { "aacute", 0x00E1 },
426  { "acirc", 0x00E2 },
427  { "atilde", 0x00E3 },
428  { "auml", 0x00E4 },
429  { "aring", 0x00E5 },
430  { "aelig", 0x00E6 },
431  { "ccedil", 0x00E7 },
432  { "egrave", 0x00E8 },
433  { "eacute", 0x00E9 },
434  { "ecirc", 0x00EA },
435  { "euml", 0x00EB },
436  { "igrave", 0x00EC },
437  { "iacute", 0x00ED },
438  { "icirc", 0x00EE },
439  { "iuml", 0x00EF },
440  { "eth", 0x00F0 },
441  { "ntilde", 0x00F1 },
442  { "ograve", 0x00F2 },
443  { "oacute", 0x00F3 },
444  { "ocirc", 0x00F4 },
445  { "otilde", 0x00F5 },
446  { "ouml", 0x00F6 },
447  { "divide", 0x00F7 },
448  { "oslash", 0x00F8 },
449  { "ugrave", 0x00F9 },
450  { "uacute", 0x00FA },
451  { "ucirc", 0x00FB },
452  { "uuml", 0x00FC },
453  { "yacute", 0x00FD },
454  { "thorn", 0x00FE },
455  { "yuml", 0x00FF },
456  { "OElig", 0x0152 },
457  { "oelig", 0x0153 },
458  { "Scaron", 0x0160 },
459  { "scaron", 0x0161 },
460  { "Yuml", 0x0178 },
461  { "fnof", 0x0192 },
462  { "circ", 0x02C6 },
463  { "tilde", 0x02DC },
464  { "Alpha", 0x0391 },
465  { "Beta", 0x0392 },
466  { "Gamma", 0x0393 },
467  { "Delta", 0x0394 },
468  { "Epsilon", 0x0395 },
469  { "Zeta", 0x0396 },
470  { "Eta", 0x0397 },
471  { "Theta", 0x0398 },
472  { "Iota", 0x0399 },
473  { "Kappa", 0x039A },
474  { "Lambda", 0x039B },
475  { "Mu", 0x039C },
476  { "Nu", 0x039D },
477  { "Xi", 0x039E },
478  { "Omicron", 0x039F },
479  { "Pi", 0x03A0 },
480  { "Rho", 0x03A1 },
481  { "Sigma", 0x03A3 },
482  { "Tau", 0x03A4 },
483  { "Upsilon", 0x03A5 },
484  { "Phi", 0x03A6 },
485  { "Chi", 0x03A7 },
486  { "Psi", 0x03A8 },
487  { "Omega", 0x03A9 },
488  { "alpha", 0x03B1 },
489  { "beta", 0x03B2 },
490  { "gamma", 0x03B3 },
491  { "delta", 0x03B4 },
492  { "epsilon", 0x03B5 },
493  { "zeta", 0x03B6 },
494  { "eta", 0x03B7 },
495  { "theta", 0x03B8 },
496  { "iota", 0x03B9 },
497  { "kappa", 0x03BA },
498  { "lambda", 0x03BB },
499  { "mu", 0x03BC },
500  { "nu", 0x03BD },
501  { "xi", 0x03BE },
502  { "omicron", 0x03BF },
503  { "pi", 0x03C0 },
504  { "rho", 0x03C1 },
505  { "sigmaf", 0x03C2 },
506  { "sigma", 0x03C3 },
507  { "tau", 0x03C4 },
508  { "upsilon", 0x03C5 },
509  { "phi", 0x03C6 },
510  { "chi", 0x03C7 },
511  { "psi", 0x03C8 },
512  { "omega", 0x03C9 },
513  { "thetasym", 0x03D1 },
514  { "upsih", 0x03D2 },
515  { "piv", 0x03D6 },
516  { "ensp", 0x2002 },
517  { "emsp", 0x2003 },
518  { "thinsp", 0x2009 },
519  { "zwnj", 0x200C },
520  { "zwj", 0x200D },
521  { "lrm", 0x200E },
522  { "rlm", 0x200F },
523  { "ndash", 0x2013 },
524  { "mdash", 0x2014 },
525  { "horbar", 0x2015 },
526  { "lsquo", 0x2018 },
527  { "rsquo", 0x2019 },
528  { "sbquo", 0x201A },
529  { "ldquo", 0x201C },
530  { "rdquo", 0x201D },
531  { "bdquo", 0x201E },
532  { "dagger", 0x2020 },
533  { "Dagger", 0x2021 },
534  { "bull", 0x2022 },
535  { "hellip", 0x2026 },
536  { "permil", 0x2030 },
537  { "prime", 0x2032 },
538  { "Prime", 0x2033 },
539  { "lsaquo", 0x2039 },
540  { "rsaquo", 0x203A },
541  { "oline", 0x203E },
542  { "frasl", 0x2044 },
543  { "euro", 0x20AC },
544  { "image", 0x2111 },
545  { "weierp", 0x2118 },
546  { "real", 0x211C },
547  { "trade", 0x2122 },
548  { "alefsym", 0x2135 },
549  { "larr", 0x2190 },
550  { "uarr", 0x2191 },
551  { "rarr", 0x2192 },
552  { "darr", 0x2193 },
553  { "harr", 0x2194 },
554  { "crarr", 0x21B5 },
555  { "lArr", 0x21D0 },
556  { "uArr", 0x21D1 },
557  { "rArr", 0x21D2 },
558  { "dArr", 0x21D3 },
559  { "hArr", 0x21D4 },
560  { "forall", 0x2200 },
561  { "part", 0x2202 },
562  { "exist", 0x2203 },
563  { "empty", 0x2205 },
564  { "nabla", 0x2207 },
565  { "isin", 0x2208 },
566  { "notin", 0x2209 },
567  { "ni", 0x220B },
568  { "prod", 0x220F },
569  { "sum", 0x2211 },
570  { "minus", 0x2212 },
571  { "lowast", 0x2217 },
572  { "radic", 0x221A },
573  { "prop", 0x221D },
574  { "infin", 0x221E },
575  { "ang", 0x2220 },
576  { "and", 0x2227 },
577  { "or", 0x2228 },
578  { "cap", 0x2229 },
579  { "cup", 0x222A },
580  { "int", 0x222B },
581  { "there4", 0x2234 },
582  { "sim", 0x223C },
583  { "cong", 0x2245 },
584  { "asymp", 0x2248 },
585  { "ne", 0x2260 },
586  { "equiv", 0x2261 },
587  { "le", 0x2264 },
588  { "ge", 0x2265 },
589  { "sub", 0x2282 },
590  { "sup", 0x2283 },
591  { "nsub", 0x2284 },
592  { "sube", 0x2286 },
593  { "supe", 0x2287 },
594  { "oplus", 0x2295 },
595  { "otimes", 0x2297 },
596  { "perp", 0x22A5 },
597  { "sdot", 0x22C5 },
598  { "lceil", 0x2308 },
599  { "rceil", 0x2309 },
600  { "lfloor", 0x230A },
601  { "rfloor", 0x230B },
602  { "lang", 0x2329 },
603  { "rang", 0x232A },
604  { "loz", 0x25CA },
605  { "spades", 0x2660 },
606  { "clubs", 0x2663 },
607  { "hearts", 0x2665 },
608  { "diams", 0x2666 }
609  }};
610 
611  for (auto &pair : names)
612  {
613  const RegEx re('&' + pair.first + ';');
614  re.subst(html, u8c.to_bytes(pair.second), RegEx::RE_GLOBAL);
615  }
616 
617  return html;
618 }
619 
620 archive_answer URI::archive() const
621 {
622  if (_uri.substr(0, 4) != "http")
623  {
624  return { false, "Only HTTP(S) is archivable.", "" };
625  }
626 
627  try
628  {
629  const string answer = make_request("https://web.archive.org/save/"
630  + _uri, true);
631 
632  if (!answer.empty())
633  {
634  return { true, "", "https://web.archive.org" + answer };
635  }
636  }
637  catch (const Poco::Exception &e)
638  {
639  return { false, e.displayText(), "" };
640  }
641 
642  return { false, "Unknown error.", "" };
643 }
string unescape_html(string html) const
Convert HTML entities to UTF-8.
string make_request(const string &uri, bool archive=false) const
Make a HTTP(S) request.
Definition: uri.cpp:154
string remove_newlines(string text) const
Replace newlines with spaces.
Definition: uri.cpp:645
string remove_html_tags(const string &html, const string &tag="") const
Remove HTML tags.
string strip_html(const string &html) const
Removes HTML tags and superflous spaces from an HTML page.
string cut_text(const string &text, uint16_t n_chars) const
Limits text to N characters, cuts at space.
Definition: uri.cpp:663
archive_answer archive() const
Save URI in archive and return archive-URI.

◆ extract_title()

string remwharead::URI::extract_title ( const string &  html) const
protected

Extract the title from an HTML page.

Since
0.6.0
228 {
229  const RegEx re_htmlfile(".*\\.(.?html?|xml|rss)$", RegEx::RE_CASELESS);
230  if (_uri.substr(0, 4) == "http" || re_htmlfile.match(_uri))
231  {
232  const RegEx re_title("<title(?: [^>]+)?>([^<]+)", RegEx::RE_CASELESS);
233  vector<string> matches;
234  re_title.split(html, matches);
235  if (matches.size() >= 2)
236  {
237  return remove_newlines(unescape_html(matches[1]));
238  }
239  }
240 
241  return "";
242 }
string unescape_html(string html) const
Convert HTML entities to UTF-8.
string remove_newlines(string text) const
Replace newlines with spaces.
Definition: uri.cpp:645

◆ get()

html_extract remwharead::URI::get ( )

Download URI and extract title, description and full text.

Since
0.6.0
130 {
131  try
132  {
133  const string answer = make_request(_uri);
134  if (!answer.empty())
135  {
136  return
137  {
138  true,
139  "",
140  extract_title(answer),
141  extract_description(answer),
142  strip_html(answer)
143  };
144  }
145  }
146  catch (const Poco::Exception &e)
147  {
148  return { false, e.displayText(), "", "", "" };
149  }
150 
151  return { false, "Unknown error.", "", "", "" };
152 }
string make_request(const string &uri, bool archive=false) const
Make a HTTP(S) request.
Definition: uri.cpp:154
string strip_html(const string &html) const
Removes HTML tags and superflous spaces from an HTML page.
string extract_title(const string &html) const
Extract the title from an HTML page.
Definition: uri.cpp:227
string extract_description(const string &html) const
Extract the description from an HTML page.
Definition: uri.cpp:244

◆ make_request()

string remwharead::URI::make_request ( const string &  uri,
bool  archive = false 
) const
protected

Make a HTTP(S) request.

Since
0.6.0
155 {
156  Poco::URI poco_uri(uri);
157  string method = archive ? HTTPRequest::HTTP_HEAD : HTTPRequest::HTTP_GET;
158  string path = poco_uri.getPathAndQuery();
159  if (path.empty())
160  {
161  path = "/";
162  }
163 
164  unique_ptr<HTTPClientSession> session;
165  if (poco_uri.getScheme() == "https")
166  {
167  session = make_unique<HTTPSClientSession>(poco_uri.getHost(),
168  poco_uri.getPort());
169  }
170  else if (poco_uri.getScheme() == "http")
171  {
172  session = make_unique<HTTPClientSession>(poco_uri.getHost(),
173  poco_uri.getPort());
174  }
175  else
176  {
177  // NOLINTNEXTLINE(cert-err60-cpp)
178  throw Poco::Exception("Protocol not supported.");
179  }
180 
181  HTTPRequest request(method, path, HTTPMessage::HTTP_1_1);
182  request.set("User-Agent", string("remwharead/") + global::version);
183 
184  HTTPResponse response;
185 
186  session->sendRequest(request);
187  istream &rs = session->receiveResponse(response);
188 
189  // Not using the constants because some are too new for Debian stretch.
190  switch (response.getStatus())
191  {
192  case 301: // HTTPResponse::HTTP_MOVED_PERMANENTLY
193  case 308: // HTTPResponse::HTTP_PERMANENT_REDIRECT
194  case 302: // HTTPResponse::HTTP_FOUND
195  case 303: // HTTPResponse::HTTP_SEE_OTHER
196  case 307: // HTTPResponse::HTTP_TEMPORARY_REDIRECT
197  {
198  string location = response.get("Location");
199  if (location.substr(0, 4) != "http")
200  {
201  location = poco_uri.getScheme() + "://" + poco_uri.getHost()
202  + location;
203  }
204  return make_request(location, archive);
205  }
206  case HTTPResponse::HTTP_OK:
207  {
208  string answer;
209  if (archive)
210  {
211  answer = response.get("Content-Location");
212  }
213  else
214  {
215  StreamCopier::copyToString(rs, answer);
216  }
217  return answer;
218  }
219  default:
220  {
221  throw Poco::Exception(response.getReason()); // NOLINT(cert-err60-cpp)
222  return "";
223  }
224  }
225 }
string make_request(const string &uri, bool archive=false) const
Make a HTTP(S) request.
Definition: uri.cpp:154
archive_answer archive() const
Save URI in archive and return archive-URI.

◆ remove_html_tags()

string remwharead::URI::remove_html_tags ( const string &  html,
const string &  tag = "" 
) const
protected

Remove HTML tags.

Parameters
htmlHTML page.
tagIf set, only remove this tag.
Since
0.6.0

◆ remove_newlines()

string remwharead::URI::remove_newlines ( string  text) const
protected

Replace newlines with spaces.

Since
0.6.0
646 {
647  size_t posn = 0;
648  while ((posn = text.find('\n', posn)) != std::string::npos)
649  {
650  text.replace(posn, 1, " ");
651 
652  size_t posr = posn - 1;
653  if (text[posr] == '\r')
654  {
655  text.replace(posr, 1, " ");
656  }
657  ++posn;
658  }
659 
660  return text;
661 }

◆ set_proxy()

void remwharead::URI::set_proxy ( )
protected

Set proxy server.

Since
0.8.5
77 {
78  try
79  {
80  HTTPClientSession::ProxyConfig proxy;
81  const string env_proxy = Environment::get("http_proxy");
82  const RegEx re_proxy("^(?:https?://)?(?:([^:]+):?([^@]*)@)?" // user:pw
83  "([^:/]+)(?::([\\d]{1,5}))?/?$"); // host:port
84  vector<string> matches;
85 
86  if (re_proxy.split(env_proxy, matches) < 4)
87  {
88  return;
89  }
90 
91  proxy.username = matches[1];
92  proxy.password = matches[2];
93  proxy.host = matches[3];
94  if (!matches[4].empty())
95  {
96  // NOLINTNEXTLINE(google-runtime-int) - Need to use same as stoul.
97  const unsigned long port = std::stoul(matches[4]);
98  if (port > 65535)
99  {
100  throw std::invalid_argument("Proxy port number out of range");
101  }
102  proxy.port = static_cast<uint16_t>(port);
103  }
104  HTTPClientSession::setGlobalProxyConfig(proxy);
105  }
106  catch (const Poco::RegularExpressionException &e)
107  {
108  cerr << "Error: Proxy could not be set (" << e.displayText() << ")\n";
109  }
110  catch (const std::invalid_argument &e)
111  {
112  cerr << "Error: " << e.what() << endl;
113  }
114  catch (const Poco::NotFoundException &)
115  {
116  // No proxy found, no problem.
117  }
118  catch (const std::exception &e)
119  {
120  cerr << "Unexpected exception: " << e.what() << endl;
121  }
122 }

◆ strip_html()

string remwharead::URI::strip_html ( const string &  html) const
protected

Removes HTML tags and superflous spaces from an HTML page.

Since
0.6.0

◆ unescape_html()

string remwharead::URI::unescape_html ( string  html) const
protected

Convert HTML entities to UTF-8.

Since
0.6.0

The documentation for this class was generated from the following files: