remwharead  0.8.4
uri.hpp
1 /* This file is part of remwharead.
2  * Copyright © 2019 tastytea <tastytea@tastytea.de>
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, version 3.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program. If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 #ifndef REMWHAREAD_URI_HPP
18 #define REMWHAREAD_URI_HPP
19 
20 #include <string>
21 
22 namespace remwharead
23 {
24 using std::string;
25 
36 {
37  bool successful = false;
38  string error;
39  string title;
40  string description;
41  string fulltext;
42 
43  explicit operator bool();
44 };
45 
56 {
57  bool successful = false;
58  string error;
59  string uri;
60 
61  explicit operator bool();
62 };
63 
71 class URI
72 {
73 public:
82  explicit URI(string uri);
83  virtual ~URI();
84 
85  URI(const URI &other) = default;
86  URI &operator=(const URI &other) = default;
87  URI(URI &&other) = default;
88  URI &operator=(URI &&other) = default;
89 
95  html_extract get();
96 
103 
104 protected:
105  string _uri;
106 
112  string make_request(const string &uri, bool archive = false) const;
113 
119  string extract_title(const string &html);
120 
126  string extract_description(const string &html);
127 
133  string strip_html(const string &html);
134 
143  string remove_html_tags(const string &html, const string &tag = "");
144 
150  string unescape_html(string html);
151 
157  string remove_newlines(string text);
158 
164  void set_proxy();
165 };
166 } // namespace remwharead
167 
168 #endif // REMWHAREAD_URI_HPP
html_extract get()
Download URI and extract title, description and full text.
Definition: uri.cpp:126
URI(string uri)
Construct object and set URL.
Definition: uri.cpp:66
A processed HTML page.
Definition: uri.hpp:35
archive_answer archive()
Save URI in archive and return archive-URI.
void set_proxy()
Set proxy server.
Definition: uri.cpp:74
string make_request(const string &uri, bool archive=false) const
Make a HTTP(S) request.
Definition: uri.cpp:151
string unescape_html(string html)
Convert HTML entities to UTF-8.
string strip_html(const string &html)
Removes HTML tags and superflous spaces from an HTML page.
string extract_title(const string &html)
Extract the title from an HTML page.
Definition: uri.cpp:223
The result of the call to the archive service.
Definition: uri.hpp:55
string remove_newlines(string text)
Replace newlines with spaces.
Definition: uri.cpp:641
Download, archive and process an URI.
Definition: uri.hpp:71
string remove_html_tags(const string &html, const string &tag="")
Remove HTML tags.
string extract_description(const string &html)
Extract the description from an HTML page.
Definition: uri.cpp:240