Use sub-project curl_wrapper instead of custom implementation.
continuous-integration/drone/push Build is passing Details

Via git subtree from <https://schlomp.space/tastytea/curl_wrapper>.
This commit is contained in:
tastytea 2020-11-13 22:41:56 +01:00
parent 5b56ad00b3
commit 6123c7fbb3
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
5 changed files with 23 additions and 352 deletions

View File

@ -39,6 +39,7 @@ if(WITH_CLANG-TIDY)
endif()
add_subdirectory(src)
add_subdirectory(src/curl_wrapper)
add_subdirectory(src/lib)
add_subdirectory(include)
add_subdirectory(src/cli)

View File

@ -1,175 +0,0 @@
#ifndef REMWHAREAD_CURL_WRAPPER_HPP
#define REMWHAREAD_CURL_WRAPPER_HPP
#include "curl/curl.h"
#include <string>
#include <string_view>
namespace remwharead
{
using std::string;
using std::string_view;
class CURLWrapper
{
public:
/*!
* @brief Initializes curl and sets up connection.
*
* The first time an instance of CURLWrapper is created, it calls
* `curl_global_init`, which is not thread-safe. For more information
* consult [curl_global_init(3)]
* (https://curl.haxx.se/libcurl/c/curl_global_init.html).
*
* @since 0.11.0
*/
CURLWrapper();
/*!
* @brief Copy constructor. Does the same as the Constructor.
*
* @since 0.11.0
*/
CURLWrapper(const CURLWrapper &);
//! Move constructor
CURLWrapper(CURLWrapper &&other) noexcept = delete;
/*!
* @brief Cleans up curl and connection.
*
* May call `curl_global_cleanup`, which is not thread-safe. For more
* information consult [curl_global_cleanup(3)]
* (https://curl.haxx.se/libcurl/c/curl_global_cleanup.html).
*
* @since 0.11.0
*/
virtual ~CURLWrapper() noexcept;
//! Copy assignment operator
CURLWrapper &operator=(const CURLWrapper &other) = delete;
//! Move assignment operator
CURLWrapper &operator=(CURLWrapper &&other) noexcept = delete;
/*!
* @brief Returns pointer to the CURL easy handle.
*
* You can use this handle to set or modify curl options. For more
* information consult [curl_easy_setopt(3)]
* (https://curl.haxx.se/libcurl/c/curl_easy_setopt.html).
*
* @since 0.11.0
*/
inline CURL *get_curl_easy_handle()
{
return _connection;
}
/*!
* @brief URL encodes the given string.
*
* For more information consult [curl_easy_escape(3)]
* (https://curl.haxx.se/libcurl/c/curl_easy_escape.html).
*
* @param url String to escape.
*
* @return The escaped string or {} if it failed.
*
* @since 0.11.0
*/
[[nodiscard]] inline string escape_url(const string_view url) const
{
char *cbuf{curl_easy_escape(_connection, url.data(),
static_cast<int>(url.size()))};
string sbuf{cbuf};
curl_free(cbuf);
return sbuf;
}
/*!
* @brief URL decodes the given string.
*
* For more information consult [curl_easy_unescape(3)]
* (https://curl.haxx.se/libcurl/c/curl_easy_unescape.html).
*
* @param url String to unescape.
*
* @return The unescaped string or {} if it failed.
*
* @since 0.11.0
*/
[[nodiscard]] inline string unescape_url(const string_view url) const
{
char *cbuf{curl_easy_unescape(_connection, url.data(),
static_cast<int>(url.size()), nullptr)};
string sbuf{cbuf};
curl_free(cbuf);
return sbuf;
}
/*!
* @brief Make a HTTP request.
*
* @param uri The full URI.
* @param archive Archive URI instead of fetching the body.
*
* @return The body of the page or the URI of the archived page.
*
* @since 0.11.0
*/
[[nodiscard]] string make_request(string uri, bool archive);
/*!
* @brief Returns a reference to the buffer libcurl writes into.
*
* @since 0.11.0
*/
[[nodiscard]] inline string &get_buffer()
{
return _buffer_body;
}
private:
CURL *_connection;
char _buffer_error[CURL_ERROR_SIZE]{};
string _buffer_headers;
string _buffer_body;
/*!
* @brief libcurl write callback function.
*
* @since 0.11.0
*/
size_t writer_body(char *data, size_t size, size_t nmemb);
/*!
* @brief Wrapper for curl, because it can only call static member
* functions.
*
* <https://curl.haxx.se/docs/faq.html#Using_C_non_static_functions_f>
*
* @since 0.11.0
*/
static inline size_t writer_body_wrapper(char *data, size_t sz,
size_t nmemb, void *f)
{
return static_cast<CURLWrapper *>(f)->writer_body(data, sz, nmemb);
}
//! @copydoc writer_body
size_t writer_headers(char *data, size_t size, size_t nmemb);
//! @copydoc writer_body_wrapper
static inline size_t writer_headers_wrapper(char *data, size_t sz,
size_t nmemb, void *f)
{
return static_cast<CURLWrapper *>(f)->writer_headers(data, sz, nmemb);
}
};
} // namespace remwharead
#endif // REMWHAREAD_CURL_WRAPPER_HPP

View File

@ -4,7 +4,6 @@ include(GNUInstallDirs)
find_package(Poco CONFIG
COMPONENTS Foundation Net Data DataSQLite JSON XML)
find_package(Boost 1.48.0 REQUIRED COMPONENTS locale)
find_package(CURL 7.52 REQUIRED)
file(GLOB_RECURSE sources_lib *.cpp)
file(GLOB_RECURSE headers_lib ../../include/*.hpp)
@ -23,20 +22,9 @@ target_include_directories(${PROJECT_NAME}
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>")
target_link_libraries(${PROJECT_NAME}
PRIVATE pthread Boost::locale
PRIVATE pthread Boost::locale curl_wrapper
PUBLIC stdc++fs)
# FindCURL provides an IMPORTED target since CMake 3.12.
if(NOT ${CMAKE_VERSION} VERSION_LESS 3.12)
target_link_libraries(${PROJECT_NAME}
PUBLIC CURL::libcurl)
else()
target_include_directories(${PROJECT_NAME}
PUBLIC ${CURL_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME}
PUBLIC ${CURL_LIBRARIES})
endif()
# If no Poco*Config.cmake recipes are found, look for headers in standard dirs.
if(Poco_FOUND)
target_link_libraries(${PROJECT_NAME}

View File

@ -1,158 +0,0 @@
#include "curl_wrapper.hpp"
#include "version.hpp"
#include <Poco/RegularExpression.h>
#include <curl/curl.h>
#include <atomic>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
namespace remwharead
{
using std::runtime_error;
using std::to_string;
using std::vector;
using RegEx = Poco::RegularExpression;
static std::atomic<bool> initialized{false};
CURLWrapper::CURLWrapper()
{
CURLcode code{CURLE_OK};
if (!initialized)
{
// NOLINTNEXTLINE(hicpp-signed-bitwise)
code = curl_global_init(CURL_GLOBAL_ALL);
initialized = true;
}
_connection = curl_easy_init();
if (_connection == nullptr || code != CURLE_OK)
{
throw runtime_error{"Failed to initialize curl. libcurl code: "
+ to_string(code)};
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_ERRORBUFFER, _buffer_error);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_WRITEFUNCTION, writer_body_wrapper);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_WRITEDATA, this);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_HEADERFUNCTION,
writer_headers_wrapper);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_HEADERDATA, this);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
code = curl_easy_setopt(_connection, CURLOPT_FOLLOWLOCATION, 1L);
if (code != CURLE_OK)
{
throw runtime_error{"HTTP is not supported."};
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_MAXREDIRS, 5L);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
code = curl_easy_setopt(_connection, CURLOPT_USERAGENT,
(string("remwharead/") += version).c_str());
if (code != CURLE_OK)
{
throw runtime_error{"Failed to set User-Agent."};
}
}
CURLWrapper::~CURLWrapper() noexcept
{
curl_easy_cleanup(_connection);
}
string CURLWrapper::make_request(string uri, bool archive)
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
CURLcode code{curl_easy_setopt(_connection, CURLOPT_URL, uri.c_str())};
if (code != CURLE_OK)
{
throw runtime_error{"Couldn't set URL: " + to_string(code)};
}
if (archive)
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_CUSTOMREQUEST, "HEAD");
}
else
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_setopt(_connection, CURLOPT_HTTPGET, 1L);
}
code = curl_easy_perform(_connection);
if (code != CURLE_OK)
{
// I think PARTIAL_FILE is normal for HEAD requests?
if (archive && code != CURLE_PARTIAL_FILE)
{
throw runtime_error{"libcurl error: " + to_string(code)};
}
}
long http_status{0}; // NOLINT(google-runtime-int)
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
curl_easy_getinfo(_connection, CURLINFO_RESPONSE_CODE, &http_status);
if (http_status == 200)
{
if (archive)
{
const RegEx re_location(R"(^Content-Location:\s*(.+)$)",
RegEx::RE_CASELESS);
vector<string> matches;
re_location.split(_buffer_headers, matches);
if (matches.size() >= 2)
{
if (!matches[1].empty())
{
return matches[1];
}
}
return uri;
}
return _buffer_body;
}
throw runtime_error{"HTTP error: " + to_string(http_status)};
}
size_t CURLWrapper::writer_body(char *data, size_t size, size_t nmemb)
{
if (data == nullptr)
{
return 0;
}
_buffer_body.append(data, size * nmemb);
return size * nmemb;
}
size_t CURLWrapper::writer_headers(char *data, size_t size, size_t nmemb)
{
if (data == nullptr)
{
return 0;
}
_buffer_headers.append(data, size * nmemb);
return size * nmemb;
}
} // namespace remwharead

View File

@ -67,10 +67,13 @@ URI::URI(string uri)
html_extract URI::get()
{
using namespace curl_wrapper;
try
{
CURLWrapper curl;
_document = to_utf8(curl.make_request(_uri, false));
_document = to_utf8(
curl.make_http_request(http_method::GET, _uri).body);
if (!_document.empty())
{
@ -305,6 +308,8 @@ string URI::unescape_html(string html)
archive_answer URI::archive() const
{
using namespace curl_wrapper;
if (_uri.substr(0, 4) != "http")
{
return {false, "Only HTTP(S) is archivable.", ""};
@ -313,13 +318,23 @@ archive_answer URI::archive() const
try
{
CURLWrapper curl;
const string answer = curl.make_request("https://web.archive.org/save/"
+ _uri,
true);
const auto answer =
curl.make_http_request(http_method::HEAD,
"https://web.archive.org/save/" + _uri);
if (!answer.empty())
if (answer)
{
return {true, "", "https://web.archive.org" + answer};
string location{answer.get_header("location")};
if (location.empty())
{
location = answer.get_header("content-location");
}
if (!location.empty())
{
return {true, "", location};
}
return {false, "Could not extract location.", ""};
}
}
catch (const exception &e)