diff --git a/include/uri.hpp b/include/uri.hpp index 8d2c5f1..71b0a57 100644 --- a/include/uri.hpp +++ b/include/uri.hpp @@ -107,6 +107,7 @@ public: protected: string _uri; + string _document; /*! * @brief Make a HTTP(S) request. @@ -122,7 +123,7 @@ protected: * @since 0.6.0 */ [[nodiscard]] - string extract_title(const string &html) const; + string extract_title() const; /*! * @brief Extract the description from an HTML page. @@ -130,7 +131,7 @@ protected: * @since 0.6.0 */ [[nodiscard]] - string extract_description(const string &html) const; + string extract_description() const; /*! * @brief Removes HTML tags and superflous spaces from an HTML page. @@ -138,7 +139,7 @@ protected: * @since 0.6.0 */ [[nodiscard]] - string strip_html(const string &html) const; + string strip_html() const; /*! * @brief Remove HTML tags. @@ -181,6 +182,13 @@ protected: */ [[nodiscard]] string cut_text(const string &text, uint16_t n_chars) const; + + /*! + * @brief Returns true if document is *HTML. + * + * @since 0.9.2 + */ + bool is_html() const; }; } // namespace remwharead diff --git a/src/lib/uri.cpp b/src/lib/uri.cpp index ca11dec..c875a52 100644 --- a/src/lib/uri.cpp +++ b/src/lib/uri.cpp @@ -66,7 +66,7 @@ archive_answer::operator bool() } URI::URI(string uri) - :_uri(move(uri)) + : _uri(move(uri)) { Poco::Net::initializeSSL(); @@ -130,16 +130,16 @@ html_extract URI::get() { try { - const string answer = make_request(_uri); - if (!answer.empty()) + _document = make_request(_uri); + if (!_document.empty()) { return { true, "", - extract_title(answer), - extract_description(answer), - strip_html(answer) + extract_title(), + extract_description(), + strip_html() }; } } @@ -224,14 +224,13 @@ string URI::make_request(const string &uri, bool archive) const } } -string URI::extract_title(const string &html) const +string URI::extract_title() const { - const RegEx re_htmlfile(".*\\.(.?html?|xml|rss)$", RegEx::RE_CASELESS); - if (_uri.substr(0, 4) == "http" || re_htmlfile.match(_uri)) + if (is_html()) { const RegEx re_title("
A short sentence.
" + ""; + } bool test_title() { - if (extract_title(_html) == "title") - { - return true; - } - return false; + return (extract_title() == "title"); } bool test_description() { - if (extract_description(_html) == "description") - { - return true; - } - return false; + return (extract_description() == "description"); } bool test_fulltext() { - if (strip_html(_html) == "titleA short sentence.") - { - return true; - } - return false; + return (strip_html() == "titleA short sentence."); } - - private: - const string _html = - "A short sentence.
" - ""; }; WHEN ("extract_title() is called")