4d2632302ca19206f25a683ee36b72788116fe9e/docs/uri_8hpp_source.html

 /*  This file is part of remwharead.
  *  Copyright © 2019 tastytea <tastytea@tastytea.de>
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation, version 3.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */

 #ifndef REMWHAREAD_URI_HPP
 #define REMWHAREAD_URI_HPP

 #include <cstdint>
 #include <string>

 namespace remwharead
 {
 using std::uint16_t;
 using std::string;

 struct html_extract
 {
     bool successful = false;
     string error;
     string title;
     string description;
     string fulltext;

     explicit operator bool();
 };

 struct archive_answer
 {
     bool successful = false;
     string error;
     string uri;

     explicit operator bool();
 };

 class URI
 {
 public:
     explicit URI(string uri);
     virtual ~URI();

     URI(const URI &other) = default;
     URI &operator=(const URI &other) = default;
     URI(URI &&other) = default;
     URI &operator=(URI &&other) = default;

     [[nodiscard]]
     html_extract get();

     [[nodiscard]]
     archive_answer archive() const;

 protected:
     string _uri;

     [[nodiscard]]
     string make_request(const string &uri, bool archive = false) const;

     [[nodiscard]]
     string extract_title(const string &html) const;

     [[nodiscard]]
     string extract_description(const string &html) const;

     [[nodiscard]]
     string strip_html(const string &html) const;

     [[nodiscard]]
     string remove_html_tags(const string &html, const string &tag = "") const;

     [[nodiscard]]
     string unescape_html(string html) const;

     [[nodiscard]]
     string remove_newlines(string text) const;

     void set_proxy();

     [[nodiscard]]
     string cut_text(const string &text, uint16_t n_chars) const;
 };
 } // namespace remwharead

 #endif  // REMWHAREAD_URI_HPP
remwharead::URI::get
html_extract get()
Download URI and extract title, description and full text.
Definition: uri.cpp:129

remwharead::URI::URI
URI(string uri)
Construct object and set URL.
Definition: uri.cpp:68

remwharead::html_extract
A processed HTML page.
Definition: uri.hpp:37

remwharead::URI::unescape_html
string unescape_html(string html) const
Convert HTML entities to UTF-8.

remwharead::URI::set_proxy
void set_proxy()
Set proxy server.
Definition: uri.cpp:76

remwharead::URI::make_request
string make_request(const string &uri, bool archive=false) const
Make a HTTP(S) request.
Definition: uri.cpp:154

remwharead::URI::remove_newlines
string remove_newlines(string text) const
Replace newlines with spaces.
Definition: uri.cpp:645

remwharead::URI::remove_html_tags
string remove_html_tags(const string &html, const string &tag="") const
Remove HTML tags.

remwharead::URI::strip_html
string strip_html(const string &html) const
Removes HTML tags and superflous spaces from an HTML page.

remwharead::URI::extract_title
string extract_title(const string &html) const
Extract the title from an HTML page.
Definition: uri.cpp:227

remwharead::archive_answer
The result of the call to the archive service.
Definition: uri.hpp:57

remwharead::URI::cut_text
string cut_text(const string &text, uint16_t n_chars) const
Limits text to N characters, cuts at space.
Definition: uri.cpp:663

remwharead::URI
Download, archive and process an URI.
Definition: uri.hpp:73

remwharead::URI::archive
archive_answer archive() const
Save URI in archive and return archive-URI.

remwharead::URI::extract_description
string extract_description(const string &html) const
Extract the description from an HTML page.
Definition: uri.cpp:244