Use sub-project curl_wrapper instead of custom implementation.
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
Via git subtree from <https://schlomp.space/tastytea/curl_wrapper>.
This commit is contained in:
parent
5b56ad00b3
commit
6123c7fbb3
|
@ -39,6 +39,7 @@ if(WITH_CLANG-TIDY)
|
|||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(src/curl_wrapper)
|
||||
add_subdirectory(src/lib)
|
||||
add_subdirectory(include)
|
||||
add_subdirectory(src/cli)
|
||||
|
|
|
@ -1,175 +0,0 @@
|
|||
#ifndef REMWHAREAD_CURL_WRAPPER_HPP
|
||||
#define REMWHAREAD_CURL_WRAPPER_HPP
|
||||
|
||||
#include "curl/curl.h"
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace remwharead
|
||||
{
|
||||
|
||||
using std::string;
|
||||
using std::string_view;
|
||||
|
||||
class CURLWrapper
|
||||
{
|
||||
public:
|
||||
/*!
|
||||
* @brief Initializes curl and sets up connection.
|
||||
*
|
||||
* The first time an instance of CURLWrapper is created, it calls
|
||||
* `curl_global_init`, which is not thread-safe. For more information
|
||||
* consult [curl_global_init(3)]
|
||||
* (https://curl.haxx.se/libcurl/c/curl_global_init.html).
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
CURLWrapper();
|
||||
|
||||
/*!
|
||||
* @brief Copy constructor. Does the same as the Constructor.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
CURLWrapper(const CURLWrapper &);
|
||||
|
||||
//! Move constructor
|
||||
CURLWrapper(CURLWrapper &&other) noexcept = delete;
|
||||
|
||||
/*!
|
||||
* @brief Cleans up curl and connection.
|
||||
*
|
||||
* May call `curl_global_cleanup`, which is not thread-safe. For more
|
||||
* information consult [curl_global_cleanup(3)]
|
||||
* (https://curl.haxx.se/libcurl/c/curl_global_cleanup.html).
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
virtual ~CURLWrapper() noexcept;
|
||||
|
||||
//! Copy assignment operator
|
||||
CURLWrapper &operator=(const CURLWrapper &other) = delete;
|
||||
|
||||
//! Move assignment operator
|
||||
CURLWrapper &operator=(CURLWrapper &&other) noexcept = delete;
|
||||
|
||||
/*!
|
||||
* @brief Returns pointer to the CURL easy handle.
|
||||
*
|
||||
* You can use this handle to set or modify curl options. For more
|
||||
* information consult [curl_easy_setopt(3)]
|
||||
* (https://curl.haxx.se/libcurl/c/curl_easy_setopt.html).
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
inline CURL *get_curl_easy_handle()
|
||||
{
|
||||
return _connection;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief URL encodes the given string.
|
||||
*
|
||||
* For more information consult [curl_easy_escape(3)]
|
||||
* (https://curl.haxx.se/libcurl/c/curl_easy_escape.html).
|
||||
*
|
||||
* @param url String to escape.
|
||||
*
|
||||
* @return The escaped string or {} if it failed.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
[[nodiscard]] inline string escape_url(const string_view url) const
|
||||
{
|
||||
char *cbuf{curl_easy_escape(_connection, url.data(),
|
||||
static_cast<int>(url.size()))};
|
||||
string sbuf{cbuf};
|
||||
curl_free(cbuf);
|
||||
return sbuf;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief URL decodes the given string.
|
||||
*
|
||||
* For more information consult [curl_easy_unescape(3)]
|
||||
* (https://curl.haxx.se/libcurl/c/curl_easy_unescape.html).
|
||||
*
|
||||
* @param url String to unescape.
|
||||
*
|
||||
* @return The unescaped string or {} if it failed.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
[[nodiscard]] inline string unescape_url(const string_view url) const
|
||||
{
|
||||
char *cbuf{curl_easy_unescape(_connection, url.data(),
|
||||
static_cast<int>(url.size()), nullptr)};
|
||||
string sbuf{cbuf};
|
||||
curl_free(cbuf);
|
||||
return sbuf;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Make a HTTP request.
|
||||
*
|
||||
* @param uri The full URI.
|
||||
* @param archive Archive URI instead of fetching the body.
|
||||
*
|
||||
* @return The body of the page or the URI of the archived page.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
[[nodiscard]] string make_request(string uri, bool archive);
|
||||
|
||||
/*!
|
||||
* @brief Returns a reference to the buffer libcurl writes into.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
[[nodiscard]] inline string &get_buffer()
|
||||
{
|
||||
return _buffer_body;
|
||||
}
|
||||
|
||||
private:
|
||||
CURL *_connection;
|
||||
char _buffer_error[CURL_ERROR_SIZE]{};
|
||||
string _buffer_headers;
|
||||
string _buffer_body;
|
||||
|
||||
/*!
|
||||
* @brief libcurl write callback function.
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
size_t writer_body(char *data, size_t size, size_t nmemb);
|
||||
|
||||
/*!
|
||||
* @brief Wrapper for curl, because it can only call static member
|
||||
* functions.
|
||||
*
|
||||
* <https://curl.haxx.se/docs/faq.html#Using_C_non_static_functions_f>
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
static inline size_t writer_body_wrapper(char *data, size_t sz,
|
||||
size_t nmemb, void *f)
|
||||
{
|
||||
return static_cast<CURLWrapper *>(f)->writer_body(data, sz, nmemb);
|
||||
}
|
||||
|
||||
//! @copydoc writer_body
|
||||
size_t writer_headers(char *data, size_t size, size_t nmemb);
|
||||
|
||||
//! @copydoc writer_body_wrapper
|
||||
static inline size_t writer_headers_wrapper(char *data, size_t sz,
|
||||
size_t nmemb, void *f)
|
||||
{
|
||||
return static_cast<CURLWrapper *>(f)->writer_headers(data, sz, nmemb);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace remwharead
|
||||
|
||||
#endif // REMWHAREAD_CURL_WRAPPER_HPP
|
|
@ -4,7 +4,6 @@ include(GNUInstallDirs)
|
|||
find_package(Poco CONFIG
|
||||
COMPONENTS Foundation Net Data DataSQLite JSON XML)
|
||||
find_package(Boost 1.48.0 REQUIRED COMPONENTS locale)
|
||||
find_package(CURL 7.52 REQUIRED)
|
||||
|
||||
file(GLOB_RECURSE sources_lib *.cpp)
|
||||
file(GLOB_RECURSE headers_lib ../../include/*.hpp)
|
||||
|
@ -23,20 +22,9 @@ target_include_directories(${PROJECT_NAME}
|
|||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>")
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE pthread Boost::locale
|
||||
PRIVATE pthread Boost::locale curl_wrapper
|
||||
PUBLIC stdc++fs)
|
||||
|
||||
# FindCURL provides an IMPORTED target since CMake 3.12.
|
||||
if(NOT ${CMAKE_VERSION} VERSION_LESS 3.12)
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PUBLIC CURL::libcurl)
|
||||
else()
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
PUBLIC ${CURL_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PUBLIC ${CURL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
# If no Poco*Config.cmake recipes are found, look for headers in standard dirs.
|
||||
if(Poco_FOUND)
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
|
|
|
@ -1,158 +0,0 @@
|
|||
#include "curl_wrapper.hpp"
|
||||
|
||||
#include "version.hpp"
|
||||
|
||||
#include <Poco/RegularExpression.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace remwharead
|
||||
{
|
||||
|
||||
using std::runtime_error;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using RegEx = Poco::RegularExpression;
|
||||
|
||||
static std::atomic<bool> initialized{false};
|
||||
|
||||
CURLWrapper::CURLWrapper()
|
||||
{
|
||||
CURLcode code{CURLE_OK};
|
||||
if (!initialized)
|
||||
{
|
||||
// NOLINTNEXTLINE(hicpp-signed-bitwise)
|
||||
code = curl_global_init(CURL_GLOBAL_ALL);
|
||||
initialized = true;
|
||||
}
|
||||
_connection = curl_easy_init();
|
||||
if (_connection == nullptr || code != CURLE_OK)
|
||||
{
|
||||
throw runtime_error{"Failed to initialize curl. libcurl code: "
|
||||
+ to_string(code)};
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_ERRORBUFFER, _buffer_error);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_WRITEFUNCTION, writer_body_wrapper);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_WRITEDATA, this);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_HEADERFUNCTION,
|
||||
writer_headers_wrapper);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_HEADERDATA, this);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
code = curl_easy_setopt(_connection, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
if (code != CURLE_OK)
|
||||
{
|
||||
throw runtime_error{"HTTP is not supported."};
|
||||
}
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_MAXREDIRS, 5L);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
code = curl_easy_setopt(_connection, CURLOPT_USERAGENT,
|
||||
(string("remwharead/") += version).c_str());
|
||||
if (code != CURLE_OK)
|
||||
{
|
||||
throw runtime_error{"Failed to set User-Agent."};
|
||||
}
|
||||
}
|
||||
|
||||
CURLWrapper::~CURLWrapper() noexcept
|
||||
{
|
||||
curl_easy_cleanup(_connection);
|
||||
}
|
||||
|
||||
string CURLWrapper::make_request(string uri, bool archive)
|
||||
{
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
CURLcode code{curl_easy_setopt(_connection, CURLOPT_URL, uri.c_str())};
|
||||
if (code != CURLE_OK)
|
||||
{
|
||||
throw runtime_error{"Couldn't set URL: " + to_string(code)};
|
||||
}
|
||||
|
||||
if (archive)
|
||||
{
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_CUSTOMREQUEST, "HEAD");
|
||||
}
|
||||
else
|
||||
{
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_setopt(_connection, CURLOPT_HTTPGET, 1L);
|
||||
}
|
||||
|
||||
code = curl_easy_perform(_connection);
|
||||
if (code != CURLE_OK)
|
||||
{
|
||||
// I think PARTIAL_FILE is normal for HEAD requests?
|
||||
if (archive && code != CURLE_PARTIAL_FILE)
|
||||
{
|
||||
throw runtime_error{"libcurl error: " + to_string(code)};
|
||||
}
|
||||
}
|
||||
|
||||
long http_status{0}; // NOLINT(google-runtime-int)
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
|
||||
curl_easy_getinfo(_connection, CURLINFO_RESPONSE_CODE, &http_status);
|
||||
|
||||
if (http_status == 200)
|
||||
{
|
||||
if (archive)
|
||||
{
|
||||
const RegEx re_location(R"(^Content-Location:\s*(.+)$)",
|
||||
RegEx::RE_CASELESS);
|
||||
vector<string> matches;
|
||||
re_location.split(_buffer_headers, matches);
|
||||
if (matches.size() >= 2)
|
||||
{
|
||||
if (!matches[1].empty())
|
||||
{
|
||||
return matches[1];
|
||||
}
|
||||
}
|
||||
return uri;
|
||||
}
|
||||
return _buffer_body;
|
||||
}
|
||||
|
||||
throw runtime_error{"HTTP error: " + to_string(http_status)};
|
||||
}
|
||||
|
||||
size_t CURLWrapper::writer_body(char *data, size_t size, size_t nmemb)
|
||||
{
|
||||
if (data == nullptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
_buffer_body.append(data, size * nmemb);
|
||||
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
size_t CURLWrapper::writer_headers(char *data, size_t size, size_t nmemb)
|
||||
{
|
||||
if (data == nullptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
_buffer_headers.append(data, size * nmemb);
|
||||
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
} // namespace remwharead
|
|
@ -67,10 +67,13 @@ URI::URI(string uri)
|
|||
|
||||
html_extract URI::get()
|
||||
{
|
||||
using namespace curl_wrapper;
|
||||
|
||||
try
|
||||
{
|
||||
CURLWrapper curl;
|
||||
_document = to_utf8(curl.make_request(_uri, false));
|
||||
_document = to_utf8(
|
||||
curl.make_http_request(http_method::GET, _uri).body);
|
||||
|
||||
if (!_document.empty())
|
||||
{
|
||||
|
@ -305,6 +308,8 @@ string URI::unescape_html(string html)
|
|||
|
||||
archive_answer URI::archive() const
|
||||
{
|
||||
using namespace curl_wrapper;
|
||||
|
||||
if (_uri.substr(0, 4) != "http")
|
||||
{
|
||||
return {false, "Only HTTP(S) is archivable.", ""};
|
||||
|
@ -313,13 +318,23 @@ archive_answer URI::archive() const
|
|||
try
|
||||
{
|
||||
CURLWrapper curl;
|
||||
const string answer = curl.make_request("https://web.archive.org/save/"
|
||||
+ _uri,
|
||||
true);
|
||||
const auto answer =
|
||||
curl.make_http_request(http_method::HEAD,
|
||||
"https://web.archive.org/save/" + _uri);
|
||||
|
||||
if (!answer.empty())
|
||||
if (answer)
|
||||
{
|
||||
return {true, "", "https://web.archive.org" + answer};
|
||||
string location{answer.get_header("location")};
|
||||
if (location.empty())
|
||||
{
|
||||
location = answer.get_header("content-location");
|
||||
}
|
||||
if (!location.empty())
|
||||
{
|
||||
return {true, "", location};
|
||||
}
|
||||
|
||||
return {false, "Could not extract location.", ""};
|
||||
}
|
||||
}
|
||||
catch (const exception &e)
|
||||
|
|
Loading…
Reference in New Issue