From 6123c7fbb373f447cd4f0906aab1ee7946b6db65 Mon Sep 17 00:00:00 2001 From: tastytea Date: Fri, 13 Nov 2020 22:41:56 +0100 Subject: [PATCH] Use sub-project curl_wrapper instead of custom implementation. Via git subtree from . --- CMakeLists.txt | 1 + include/curl_wrapper.hpp | 175 --------------------------------------- src/lib/CMakeLists.txt | 14 +--- src/lib/curl_wrapper.cpp | 158 ----------------------------------- src/lib/uri.cpp | 27 ++++-- 5 files changed, 23 insertions(+), 352 deletions(-) delete mode 100644 include/curl_wrapper.hpp delete mode 100644 src/lib/curl_wrapper.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f2b147..521bde2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ if(WITH_CLANG-TIDY) endif() add_subdirectory(src) +add_subdirectory(src/curl_wrapper) add_subdirectory(src/lib) add_subdirectory(include) add_subdirectory(src/cli) diff --git a/include/curl_wrapper.hpp b/include/curl_wrapper.hpp deleted file mode 100644 index 2666618..0000000 --- a/include/curl_wrapper.hpp +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef REMWHAREAD_CURL_WRAPPER_HPP -#define REMWHAREAD_CURL_WRAPPER_HPP - -#include "curl/curl.h" - -#include -#include - -namespace remwharead -{ - -using std::string; -using std::string_view; - -class CURLWrapper -{ -public: - /*! - * @brief Initializes curl and sets up connection. - * - * The first time an instance of CURLWrapper is created, it calls - * `curl_global_init`, which is not thread-safe. For more information - * consult [curl_global_init(3)] - * (https://curl.haxx.se/libcurl/c/curl_global_init.html). - * - * @since 0.11.0 - */ - CURLWrapper(); - - /*! - * @brief Copy constructor. Does the same as the Constructor. - * - * @since 0.11.0 - */ - CURLWrapper(const CURLWrapper &); - - //! Move constructor - CURLWrapper(CURLWrapper &&other) noexcept = delete; - - /*! - * @brief Cleans up curl and connection. - * - * May call `curl_global_cleanup`, which is not thread-safe. For more - * information consult [curl_global_cleanup(3)] - * (https://curl.haxx.se/libcurl/c/curl_global_cleanup.html). - * - * @since 0.11.0 - */ - virtual ~CURLWrapper() noexcept; - - //! Copy assignment operator - CURLWrapper &operator=(const CURLWrapper &other) = delete; - - //! Move assignment operator - CURLWrapper &operator=(CURLWrapper &&other) noexcept = delete; - - /*! - * @brief Returns pointer to the CURL easy handle. - * - * You can use this handle to set or modify curl options. For more - * information consult [curl_easy_setopt(3)] - * (https://curl.haxx.se/libcurl/c/curl_easy_setopt.html). - * - * @since 0.11.0 - */ - inline CURL *get_curl_easy_handle() - { - return _connection; - } - - /*! - * @brief URL encodes the given string. - * - * For more information consult [curl_easy_escape(3)] - * (https://curl.haxx.se/libcurl/c/curl_easy_escape.html). - * - * @param url String to escape. - * - * @return The escaped string or {} if it failed. - * - * @since 0.11.0 - */ - [[nodiscard]] inline string escape_url(const string_view url) const - { - char *cbuf{curl_easy_escape(_connection, url.data(), - static_cast(url.size()))}; - string sbuf{cbuf}; - curl_free(cbuf); - return sbuf; - } - - /*! - * @brief URL decodes the given string. - * - * For more information consult [curl_easy_unescape(3)] - * (https://curl.haxx.se/libcurl/c/curl_easy_unescape.html). - * - * @param url String to unescape. - * - * @return The unescaped string or {} if it failed. - * - * @since 0.11.0 - */ - [[nodiscard]] inline string unescape_url(const string_view url) const - { - char *cbuf{curl_easy_unescape(_connection, url.data(), - static_cast(url.size()), nullptr)}; - string sbuf{cbuf}; - curl_free(cbuf); - return sbuf; - } - - /*! - * @brief Make a HTTP request. - * - * @param uri The full URI. - * @param archive Archive URI instead of fetching the body. - * - * @return The body of the page or the URI of the archived page. - * - * @since 0.11.0 - */ - [[nodiscard]] string make_request(string uri, bool archive); - - /*! - * @brief Returns a reference to the buffer libcurl writes into. - * - * @since 0.11.0 - */ - [[nodiscard]] inline string &get_buffer() - { - return _buffer_body; - } - -private: - CURL *_connection; - char _buffer_error[CURL_ERROR_SIZE]{}; - string _buffer_headers; - string _buffer_body; - - /*! - * @brief libcurl write callback function. - * - * @since 0.11.0 - */ - size_t writer_body(char *data, size_t size, size_t nmemb); - - /*! - * @brief Wrapper for curl, because it can only call static member - * functions. - * - * - * - * @since 0.11.0 - */ - static inline size_t writer_body_wrapper(char *data, size_t sz, - size_t nmemb, void *f) - { - return static_cast(f)->writer_body(data, sz, nmemb); - } - - //! @copydoc writer_body - size_t writer_headers(char *data, size_t size, size_t nmemb); - - //! @copydoc writer_body_wrapper - static inline size_t writer_headers_wrapper(char *data, size_t sz, - size_t nmemb, void *f) - { - return static_cast(f)->writer_headers(data, sz, nmemb); - } -}; - -} // namespace remwharead - -#endif // REMWHAREAD_CURL_WRAPPER_HPP diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 31241aa..fac7a2f 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -4,7 +4,6 @@ include(GNUInstallDirs) find_package(Poco CONFIG COMPONENTS Foundation Net Data DataSQLite JSON XML) find_package(Boost 1.48.0 REQUIRED COMPONENTS locale) -find_package(CURL 7.52 REQUIRED) file(GLOB_RECURSE sources_lib *.cpp) file(GLOB_RECURSE headers_lib ../../include/*.hpp) @@ -23,20 +22,9 @@ target_include_directories(${PROJECT_NAME} "$") target_link_libraries(${PROJECT_NAME} - PRIVATE pthread Boost::locale + PRIVATE pthread Boost::locale curl_wrapper PUBLIC stdc++fs) -# FindCURL provides an IMPORTED target since CMake 3.12. -if(NOT ${CMAKE_VERSION} VERSION_LESS 3.12) - target_link_libraries(${PROJECT_NAME} - PUBLIC CURL::libcurl) -else() - target_include_directories(${PROJECT_NAME} - PUBLIC ${CURL_INCLUDE_DIRS}) - target_link_libraries(${PROJECT_NAME} - PUBLIC ${CURL_LIBRARIES}) -endif() - # If no Poco*Config.cmake recipes are found, look for headers in standard dirs. if(Poco_FOUND) target_link_libraries(${PROJECT_NAME} diff --git a/src/lib/curl_wrapper.cpp b/src/lib/curl_wrapper.cpp deleted file mode 100644 index 6ac5584..0000000 --- a/src/lib/curl_wrapper.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include "curl_wrapper.hpp" - -#include "version.hpp" - -#include -#include - -#include -#include -#include -#include -#include - -namespace remwharead -{ - -using std::runtime_error; -using std::to_string; -using std::vector; -using RegEx = Poco::RegularExpression; - -static std::atomic initialized{false}; - -CURLWrapper::CURLWrapper() -{ - CURLcode code{CURLE_OK}; - if (!initialized) - { - // NOLINTNEXTLINE(hicpp-signed-bitwise) - code = curl_global_init(CURL_GLOBAL_ALL); - initialized = true; - } - _connection = curl_easy_init(); - if (_connection == nullptr || code != CURLE_OK) - { - throw runtime_error{"Failed to initialize curl. libcurl code: " - + to_string(code)}; - } - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_ERRORBUFFER, _buffer_error); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_WRITEFUNCTION, writer_body_wrapper); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_WRITEDATA, this); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_HEADERFUNCTION, - writer_headers_wrapper); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_HEADERDATA, this); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - code = curl_easy_setopt(_connection, CURLOPT_FOLLOWLOCATION, 1L); - if (code != CURLE_OK) - { - throw runtime_error{"HTTP is not supported."}; - } - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_MAXREDIRS, 5L); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - code = curl_easy_setopt(_connection, CURLOPT_USERAGENT, - (string("remwharead/") += version).c_str()); - if (code != CURLE_OK) - { - throw runtime_error{"Failed to set User-Agent."}; - } -} - -CURLWrapper::~CURLWrapper() noexcept -{ - curl_easy_cleanup(_connection); -} - -string CURLWrapper::make_request(string uri, bool archive) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - CURLcode code{curl_easy_setopt(_connection, CURLOPT_URL, uri.c_str())}; - if (code != CURLE_OK) - { - throw runtime_error{"Couldn't set URL: " + to_string(code)}; - } - - if (archive) - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_CUSTOMREQUEST, "HEAD"); - } - else - { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_setopt(_connection, CURLOPT_HTTPGET, 1L); - } - - code = curl_easy_perform(_connection); - if (code != CURLE_OK) - { - // I think PARTIAL_FILE is normal for HEAD requests? - if (archive && code != CURLE_PARTIAL_FILE) - { - throw runtime_error{"libcurl error: " + to_string(code)}; - } - } - - long http_status{0}; // NOLINT(google-runtime-int) - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - curl_easy_getinfo(_connection, CURLINFO_RESPONSE_CODE, &http_status); - - if (http_status == 200) - { - if (archive) - { - const RegEx re_location(R"(^Content-Location:\s*(.+)$)", - RegEx::RE_CASELESS); - vector matches; - re_location.split(_buffer_headers, matches); - if (matches.size() >= 2) - { - if (!matches[1].empty()) - { - return matches[1]; - } - } - return uri; - } - return _buffer_body; - } - - throw runtime_error{"HTTP error: " + to_string(http_status)}; -} - -size_t CURLWrapper::writer_body(char *data, size_t size, size_t nmemb) -{ - if (data == nullptr) - { - return 0; - } - - _buffer_body.append(data, size * nmemb); - - return size * nmemb; -} - -size_t CURLWrapper::writer_headers(char *data, size_t size, size_t nmemb) -{ - if (data == nullptr) - { - return 0; - } - - _buffer_headers.append(data, size * nmemb); - - return size * nmemb; -} - -} // namespace remwharead diff --git a/src/lib/uri.cpp b/src/lib/uri.cpp index 7d66c49..02eb40e 100644 --- a/src/lib/uri.cpp +++ b/src/lib/uri.cpp @@ -67,10 +67,13 @@ URI::URI(string uri) html_extract URI::get() { + using namespace curl_wrapper; + try { CURLWrapper curl; - _document = to_utf8(curl.make_request(_uri, false)); + _document = to_utf8( + curl.make_http_request(http_method::GET, _uri).body); if (!_document.empty()) { @@ -305,6 +308,8 @@ string URI::unescape_html(string html) archive_answer URI::archive() const { + using namespace curl_wrapper; + if (_uri.substr(0, 4) != "http") { return {false, "Only HTTP(S) is archivable.", ""}; @@ -313,13 +318,23 @@ archive_answer URI::archive() const try { CURLWrapper curl; - const string answer = curl.make_request("https://web.archive.org/save/" - + _uri, - true); + const auto answer = + curl.make_http_request(http_method::HEAD, + "https://web.archive.org/save/" + _uri); - if (!answer.empty()) + if (answer) { - return {true, "", "https://web.archive.org" + answer}; + string location{answer.get_header("location")}; + if (location.empty()) + { + location = answer.get_header("content-location"); + } + if (!location.empty()) + { + return {true, "", location}; + } + + return {false, "Could not extract location.", ""}; } } catch (const exception &e)