Use sub-project curl_wrapper instead of custom implementation.

Via git subtree from <https://schlomp.space/tastytea/curl_wrapper>.
2020-11-13 22:41:56 +01:00 · 2020-11-13 22:41:56 +01:00 · 6123c7fbb3
parent 5b56ad00b3
commit 6123c7fbb3
5 changed files with 23 additions and 352 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -39,6 +39,7 @@ if(WITH_CLANG-TIDY)
 endif()

 add_subdirectory(src)
+add_subdirectory(src/curl_wrapper)
 add_subdirectory(src/lib)
 add_subdirectory(include)
 add_subdirectory(src/cli)
--- a/include/curl_wrapper.hpp
+++ b/include/curl_wrapper.hpp
@ -1,175 +0,0 @@
-#ifndef REMWHAREAD_CURL_WRAPPER_HPP
-#define REMWHAREAD_CURL_WRAPPER_HPP
-
-#include "curl/curl.h"
-
-#include <string>
-#include <string_view>
-
-namespace remwharead
-{
-
-using std::string;
-using std::string_view;
-
-class CURLWrapper
-{
-public:
-    /*!
-     *  @brief  Initializes curl and sets up connection.
-     *
-     *  The first time an instance of CURLWrapper is created, it calls
-     *  `curl_global_init`, which is not thread-safe. For more information
-     *  consult [curl_global_init(3)]
-     *  (https://curl.haxx.se/libcurl/c/curl_global_init.html).
-     *
-     *  @since  0.11.0
-     */
-    CURLWrapper();
-
-    /*!
-     *  @brief  Copy constructor. Does the same as the Constructor.
-     *
-     *  @since  0.11.0
-     */
-    CURLWrapper(const CURLWrapper &);
-
-    //! Move constructor
-    CURLWrapper(CURLWrapper &&other) noexcept = delete;
-
-    /*!
-     *  @brief  Cleans up curl and connection.
-     *
-     *  May call `curl_global_cleanup`, which is not thread-safe. For more
-     *  information consult [curl_global_cleanup(3)]
-     *  (https://curl.haxx.se/libcurl/c/curl_global_cleanup.html).
-     *
-     *  @since  0.11.0
-     */
-    virtual ~CURLWrapper() noexcept;
-
-    //! Copy assignment operator
-    CURLWrapper &operator=(const CURLWrapper &other) = delete;
-
-    //! Move assignment operator
-    CURLWrapper &operator=(CURLWrapper &&other) noexcept = delete;
-
-    /*!
-     *  @brief  Returns pointer to the CURL easy handle.
-     *
-     *  You can use this handle to set or modify curl options. For more
-     *  information consult [curl_easy_setopt(3)]
-     *  (https://curl.haxx.se/libcurl/c/curl_easy_setopt.html).
-     *
-     *  @since  0.11.0
-     */
-    inline CURL *get_curl_easy_handle()
-    {
-        return _connection;
-    }
-
-    /*!
-     *  @brief  URL encodes the given string.
-     *
-     *  For more information consult [curl_easy_escape(3)]
-     *  (https://curl.haxx.se/libcurl/c/curl_easy_escape.html).
-     *
-     *  @param  url String to escape.
-     *
-     *  @return The escaped string or {} if it failed.
-     *
-     *  @since  0.11.0
-     */
-    [[nodiscard]] inline string escape_url(const string_view url) const
-    {
-        char *cbuf{curl_easy_escape(_connection, url.data(),
-                                    static_cast<int>(url.size()))};
-        string sbuf{cbuf};
-        curl_free(cbuf);
-        return sbuf;
-    }
-
-    /*!
-     *  @brief  URL decodes the given string.
-     *
-     *  For more information consult [curl_easy_unescape(3)]
-     *  (https://curl.haxx.se/libcurl/c/curl_easy_unescape.html).
-     *
-     *  @param  url String to unescape.
-     *
-     *  @return The unescaped string or {} if it failed.
-     *
-     *  @since  0.11.0
-     */
-    [[nodiscard]] inline string unescape_url(const string_view url) const
-    {
-        char *cbuf{curl_easy_unescape(_connection, url.data(),
-                                      static_cast<int>(url.size()), nullptr)};
-        string sbuf{cbuf};
-        curl_free(cbuf);
-        return sbuf;
-    }
-
-    /*!
-     *  @brief  Make a HTTP request.
-     *
-     *  @param  uri     The full URI.
-     *  @param  archive Archive URI instead of fetching the body.
-     *
-     *  @return The body of the page or the URI of the archived page.
-     *
-     *  @since  0.11.0
-     */
-    [[nodiscard]] string make_request(string uri, bool archive);
-
-    /*!
-     *  @brief  Returns a reference to the buffer libcurl writes into.
-     *
-     *  @since  0.11.0
-     */
-    [[nodiscard]] inline string &get_buffer()
-    {
-        return _buffer_body;
-    }
-
-private:
-    CURL *_connection;
-    char _buffer_error[CURL_ERROR_SIZE]{};
-    string _buffer_headers;
-    string _buffer_body;
-
-    /*!
-     *  @brief  libcurl write callback function.
-     *
-     *  @since  0.11.0
-     */
-    size_t writer_body(char *data, size_t size, size_t nmemb);
-
-    /*!
-     *  @brief  Wrapper for curl, because it can only call static member
-     *          functions.
-     *
-     *  <https://curl.haxx.se/docs/faq.html#Using_C_non_static_functions_f>
-     *
-     *  @since  0.11.0
-     */
-    static inline size_t writer_body_wrapper(char *data, size_t sz,
-                                             size_t nmemb, void *f)
-    {
-        return static_cast<CURLWrapper *>(f)->writer_body(data, sz, nmemb);
-    }
-
-    //! @copydoc writer_body
-    size_t writer_headers(char *data, size_t size, size_t nmemb);
-
-    //! @copydoc writer_body_wrapper
-    static inline size_t writer_headers_wrapper(char *data, size_t sz,
-                                                size_t nmemb, void *f)
-    {
-        return static_cast<CURLWrapper *>(f)->writer_headers(data, sz, nmemb);
-    }
-};
-
-} // namespace remwharead
-
-#endif // REMWHAREAD_CURL_WRAPPER_HPP
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@ -4,7 +4,6 @@ include(GNUInstallDirs)
 find_package(Poco CONFIG
  COMPONENTS Foundation Net Data DataSQLite JSON XML)
 find_package(Boost 1.48.0 REQUIRED COMPONENTS locale)
-find_package(CURL 7.52 REQUIRED)

 file(GLOB_RECURSE sources_lib *.cpp)
 file(GLOB_RECURSE headers_lib ../../include/*.hpp)
@ -23,20 +22,9 @@ target_include_directories(${PROJECT_NAME}
  "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>")

 target_link_libraries(${PROJECT_NAME}
-  PRIVATE pthread Boost::locale
+  PRIVATE pthread Boost::locale curl_wrapper
  PUBLIC stdc++fs)

-# FindCURL provides an IMPORTED target since CMake 3.12.
-if(NOT ${CMAKE_VERSION} VERSION_LESS 3.12)
-  target_link_libraries(${PROJECT_NAME}
-    PUBLIC CURL::libcurl)
-else()
-  target_include_directories(${PROJECT_NAME}
-    PUBLIC ${CURL_INCLUDE_DIRS})
-  target_link_libraries(${PROJECT_NAME}
-    PUBLIC ${CURL_LIBRARIES})
-endif()
-
 # If no Poco*Config.cmake recipes are found, look for headers in standard dirs.
 if(Poco_FOUND)
  target_link_libraries(${PROJECT_NAME}
--- a/src/lib/curl_wrapper.cpp
+++ b/src/lib/curl_wrapper.cpp
@ -1,158 +0,0 @@
-#include "curl_wrapper.hpp"
-
-#include "version.hpp"
-
-#include <Poco/RegularExpression.h>
-#include <curl/curl.h>
-
-#include <atomic>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-namespace remwharead
-{
-
-using std::runtime_error;
-using std::to_string;
-using std::vector;
-using RegEx = Poco::RegularExpression;
-
-static std::atomic<bool> initialized{false};
-
-CURLWrapper::CURLWrapper()
-{
-    CURLcode code{CURLE_OK};
-    if (!initialized)
-    {
-        // NOLINTNEXTLINE(hicpp-signed-bitwise)
-        code = curl_global_init(CURL_GLOBAL_ALL);
-        initialized = true;
-    }
-    _connection = curl_easy_init();
-    if (_connection == nullptr || code != CURLE_OK)
-    {
-        throw runtime_error{"Failed to initialize curl. libcurl code: "
-                            + to_string(code)};
-    }
-
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_ERRORBUFFER, _buffer_error);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_WRITEFUNCTION, writer_body_wrapper);
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_WRITEDATA, this);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_HEADERFUNCTION,
-                     writer_headers_wrapper);
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_HEADERDATA, this);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    code = curl_easy_setopt(_connection, CURLOPT_FOLLOWLOCATION, 1L);
-    if (code != CURLE_OK)
-    {
-        throw runtime_error{"HTTP is not supported."};
-    }
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_setopt(_connection, CURLOPT_MAXREDIRS, 5L);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    code = curl_easy_setopt(_connection, CURLOPT_USERAGENT,
-                            (string("remwharead/") += version).c_str());
-    if (code != CURLE_OK)
-    {
-        throw runtime_error{"Failed to set User-Agent."};
-    }
-}
-
-CURLWrapper::~CURLWrapper() noexcept
-{
-    curl_easy_cleanup(_connection);
-}
-
-string CURLWrapper::make_request(string uri, bool archive)
-{
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    CURLcode code{curl_easy_setopt(_connection, CURLOPT_URL, uri.c_str())};
-    if (code != CURLE_OK)
-    {
-        throw runtime_error{"Couldn't set URL: " + to_string(code)};
-    }
-
-    if (archive)
-    {
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-        curl_easy_setopt(_connection, CURLOPT_CUSTOMREQUEST, "HEAD");
-    }
-    else
-    {
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-        curl_easy_setopt(_connection, CURLOPT_HTTPGET, 1L);
-    }
-
-    code = curl_easy_perform(_connection);
-    if (code != CURLE_OK)
-    {
-        // I think PARTIAL_FILE is normal for HEAD requests?
-        if (archive && code != CURLE_PARTIAL_FILE)
-        {
-            throw runtime_error{"libcurl error: " + to_string(code)};
-        }
-    }
-
-    long http_status{0}; // NOLINT(google-runtime-int)
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-    curl_easy_getinfo(_connection, CURLINFO_RESPONSE_CODE, &http_status);
-
-    if (http_status == 200)
-    {
-        if (archive)
-        {
-            const RegEx re_location(R"(^Content-Location:\s*(.+)$)",
-                                    RegEx::RE_CASELESS);
-            vector<string> matches;
-            re_location.split(_buffer_headers, matches);
-            if (matches.size() >= 2)
-            {
-                if (!matches[1].empty())
-                {
-                    return matches[1];
-                }
-            }
-            return uri;
-        }
-        return _buffer_body;
-    }
-
-    throw runtime_error{"HTTP error: " + to_string(http_status)};
-}
-
-size_t CURLWrapper::writer_body(char *data, size_t size, size_t nmemb)
-{
-    if (data == nullptr)
-    {
-        return 0;
-    }
-
-    _buffer_body.append(data, size * nmemb);
-
-    return size * nmemb;
-}
-
-size_t CURLWrapper::writer_headers(char *data, size_t size, size_t nmemb)
-{
-    if (data == nullptr)
-    {
-        return 0;
-    }
-
-    _buffer_headers.append(data, size * nmemb);
-
-    return size * nmemb;
-}
-
-} // namespace remwharead
--- a/src/lib/uri.cpp
+++ b/src/lib/uri.cpp
@ -67,10 +67,13 @@ URI::URI(string uri)

 html_extract URI::get()
 {
+    using namespace curl_wrapper;
+
    try
    {
        CURLWrapper curl;
-        _document = to_utf8(curl.make_request(_uri, false));
+        _document = to_utf8(
+            curl.make_http_request(http_method::GET, _uri).body);

        if (!_document.empty())
        {
@ -305,6 +308,8 @@ string URI::unescape_html(string html)

 archive_answer URI::archive() const
 {
+    using namespace curl_wrapper;
+
    if (_uri.substr(0, 4) != "http")
    {
        return {false, "Only HTTP(S) is archivable.", ""};
@ -313,13 +318,23 @@ archive_answer URI::archive() const
    try
    {
        CURLWrapper curl;
-        const string answer = curl.make_request("https://web.archive.org/save/"
-                                                    + _uri,
-                                                true);
+        const auto answer =
+            curl.make_http_request(http_method::HEAD,
+                                   "https://web.archive.org/save/" + _uri);

-        if (!answer.empty())
+        if (answer)
        {
-            return {true, "", "https://web.archive.org" + answer};
+            string location{answer.get_header("location")};
+            if (location.empty())
+            {
+                location = answer.get_header("content-location");
+            }
+            if (!location.empty())
+            {
+                return {true, "", location};
+            }
+
+            return {false, "Could not extract location.", ""};
        }
    }
    catch (const exception &e)