Added error handling to calls to archive service.
This commit is contained in:
parent
862e90274e
commit
90fff35e0e
|
@ -43,6 +43,24 @@ namespace remwharead
|
|||
operator bool();
|
||||
} html_extract;
|
||||
|
||||
/*!
|
||||
* @brief The result of the call to the archive service.
|
||||
*
|
||||
* @return true if successful, when cast to bool.
|
||||
*
|
||||
* @since 0.7.0
|
||||
*
|
||||
* @headerfile uri.hpp remwharead/uri.hpp
|
||||
*/
|
||||
typedef struct archive_answer
|
||||
{
|
||||
bool successful = false;
|
||||
string error;
|
||||
string uri;
|
||||
|
||||
operator bool();
|
||||
} archive_answer;
|
||||
|
||||
/*!
|
||||
* @brief Download, archive and process an %URI.
|
||||
*
|
||||
|
@ -61,7 +79,7 @@ namespace remwharead
|
|||
const html_extract get();
|
||||
|
||||
//! Save %URI in archive and return archive-URI.
|
||||
const string archive();
|
||||
const archive_answer archive();
|
||||
|
||||
protected:
|
||||
string _uri;
|
||||
|
|
|
@ -65,12 +65,16 @@ int main(const int argc, const char *argv[])
|
|||
cerr << page.error << endl;
|
||||
return 4;
|
||||
}
|
||||
string archive_uri;
|
||||
archive_answer archive;
|
||||
if (opts.archive)
|
||||
{
|
||||
archive_uri = uri.archive();
|
||||
archive = uri.archive();
|
||||
if (!archive)
|
||||
{
|
||||
cerr << "Error archiving URL: " << archive.error << endl;
|
||||
}
|
||||
}
|
||||
db.store({opts.uri, archive_uri, system_clock::now(), opts.tags,
|
||||
db.store({opts.uri, archive.uri, system_clock::now(), opts.tags,
|
||||
page.title, page.description, page.fulltext});
|
||||
}
|
||||
|
||||
|
|
|
@ -34,8 +34,6 @@
|
|||
|
||||
namespace remwharead
|
||||
{
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::regex;
|
||||
using std::regex_replace;
|
||||
using std::regex_search;
|
||||
|
@ -58,6 +56,11 @@ namespace remwharead
|
|||
return successful;
|
||||
}
|
||||
|
||||
archive_answer::operator bool()
|
||||
{
|
||||
return successful;
|
||||
}
|
||||
|
||||
URI::URI(const string &uri)
|
||||
:_uri(uri)
|
||||
{
|
||||
|
@ -176,7 +179,7 @@ namespace remwharead
|
|||
}
|
||||
default:
|
||||
{
|
||||
cerr << response.getStatus() << " " << response.getReason() << endl;
|
||||
throw Poco::Exception(response.getReason());
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
@ -228,6 +231,7 @@ namespace remwharead
|
|||
|
||||
return unescape_html(out);
|
||||
}
|
||||
|
||||
const string URI::remove_html_tags(const string &html, const string &tag)
|
||||
{
|
||||
// NOTE: I did this with regex_replace before, but libstdc++ segfaulted.
|
||||
|
@ -566,11 +570,11 @@ namespace remwharead
|
|||
return output;
|
||||
}
|
||||
|
||||
const string URI::archive()
|
||||
const archive_answer URI::archive()
|
||||
{
|
||||
if (_uri.substr(0, 4) != "http")
|
||||
{
|
||||
return "";
|
||||
return { false, "Only HTTP(S) is archivable.", "" };
|
||||
}
|
||||
|
||||
try
|
||||
|
@ -581,19 +585,15 @@ namespace remwharead
|
|||
smatch match;
|
||||
if (regex_search(answer, match, regex("Content-Location: (.+)\r")))
|
||||
{
|
||||
return "https://web.archive.org" + match[1].str();
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Error: Could not archive page.\n";
|
||||
return { true, "", "https://web.archive.org" + match[1].str() };
|
||||
}
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
catch (const Poco::Exception &e)
|
||||
{
|
||||
cerr << "Error in " << __func__ << ": " << e.what() << ".\n";
|
||||
return { false, e.displayText(), "" };
|
||||
}
|
||||
|
||||
return "";
|
||||
return { false, "Unknown error.", "" };
|
||||
}
|
||||
|
||||
const string URI::remove_newlines(string text)
|
||||
|
|
Loading…
Reference in New Issue
Block a user