diff --git a/include/uri.hpp b/include/uri.hpp index a9ad5d2..5dc2c36 100644 --- a/include/uri.hpp +++ b/include/uri.hpp @@ -43,6 +43,24 @@ namespace remwharead operator bool(); } html_extract; + /*! + * @brief The result of the call to the archive service. + * + * @return true if successful, when cast to bool. + * + * @since 0.7.0 + * + * @headerfile uri.hpp remwharead/uri.hpp + */ + typedef struct archive_answer + { + bool successful = false; + string error; + string uri; + + operator bool(); + } archive_answer; + /*! * @brief Download, archive and process an %URI. * @@ -61,7 +79,7 @@ namespace remwharead const html_extract get(); //! Save %URI in archive and return archive-URI. - const string archive(); + const archive_answer archive(); protected: string _uri; diff --git a/src/cli/main.cpp b/src/cli/main.cpp index 5e7794c..443214d 100644 --- a/src/cli/main.cpp +++ b/src/cli/main.cpp @@ -65,12 +65,16 @@ int main(const int argc, const char *argv[]) cerr << page.error << endl; return 4; } - string archive_uri; + archive_answer archive; if (opts.archive) { - archive_uri = uri.archive(); + archive = uri.archive(); + if (!archive) + { + cerr << "Error archiving URL: " << archive.error << endl; + } } - db.store({opts.uri, archive_uri, system_clock::now(), opts.tags, + db.store({opts.uri, archive.uri, system_clock::now(), opts.tags, page.title, page.description, page.fulltext}); } diff --git a/src/lib/uri.cpp b/src/lib/uri.cpp index 2f908f0..06ea4b8 100644 --- a/src/lib/uri.cpp +++ b/src/lib/uri.cpp @@ -34,8 +34,6 @@ namespace remwharead { - using std::cerr; - using std::endl; using std::regex; using std::regex_replace; using std::regex_search; @@ -58,6 +56,11 @@ namespace remwharead return successful; } + archive_answer::operator bool() + { + return successful; + } + URI::URI(const string &uri) :_uri(uri) { @@ -176,7 +179,7 @@ namespace remwharead } default: { - cerr << response.getStatus() << " " << response.getReason() << endl; + throw Poco::Exception(response.getReason()); return ""; } } @@ -228,6 +231,7 @@ namespace remwharead return unescape_html(out); } + const string URI::remove_html_tags(const string &html, const string &tag) { // NOTE: I did this with regex_replace before, but libstdc++ segfaulted. @@ -566,11 +570,11 @@ namespace remwharead return output; } - const string URI::archive() + const archive_answer URI::archive() { if (_uri.substr(0, 4) != "http") { - return ""; + return { false, "Only HTTP(S) is archivable.", "" }; } try @@ -581,19 +585,15 @@ namespace remwharead smatch match; if (regex_search(answer, match, regex("Content-Location: (.+)\r"))) { - return "https://web.archive.org" + match[1].str(); - } - else - { - cerr << "Error: Could not archive page.\n"; + return { true, "", "https://web.archive.org" + match[1].str() }; } } - catch (const std::exception &e) + catch (const Poco::Exception &e) { - cerr << "Error in " << __func__ << ": " << e.what() << ".\n"; + return { false, e.displayText(), "" }; } - return ""; + return { false, "Unknown error.", "" }; } const string URI::remove_newlines(string text)