2018-05-11 02:21:44 +02:00
|
|
|
/* This file is part of expandurl-mastodon.
|
|
|
|
* Copyright © 2018 tastytea <tastytea@tastytea.de>
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <iostream>
|
2018-05-11 06:57:41 +02:00
|
|
|
#include <sstream>
|
2018-05-11 02:36:49 +02:00
|
|
|
#include <regex>
|
2018-05-18 13:27:12 +02:00
|
|
|
#include <array>
|
|
|
|
#include <utility>
|
2018-05-22 13:44:41 +02:00
|
|
|
#include <syslog.h>
|
2018-05-11 02:21:44 +02:00
|
|
|
#include <curlpp/cURLpp.hpp>
|
|
|
|
#include <curlpp/Options.hpp>
|
|
|
|
#include <curlpp/Infos.hpp>
|
|
|
|
#include "version.hpp"
|
|
|
|
#include "expandurl-mastodon.hpp"
|
|
|
|
|
|
|
|
using std::string;
|
|
|
|
namespace curlopts = curlpp::options;
|
|
|
|
|
2018-05-14 21:44:44 +02:00
|
|
|
const std::vector<string> get_urls(const string &html)
|
|
|
|
{
|
2018-06-28 13:34:51 +02:00
|
|
|
const std::regex re_url("href=\\\\?\"([^\"\\\\]+)\\\\?\"([^>]+)");
|
2018-05-14 21:44:44 +02:00
|
|
|
std::smatch match;
|
|
|
|
string buffer = html;
|
|
|
|
std::vector<string> v;
|
|
|
|
|
|
|
|
while (std::regex_search(buffer, match, re_url))
|
|
|
|
{
|
2018-06-28 13:34:51 +02:00
|
|
|
// Add URL to vector if it is not a mention.#
|
|
|
|
if (match[2].str().find("mention") == std::string::npos)
|
|
|
|
{
|
|
|
|
string url = Easy::unescape_html(match[1].str());
|
|
|
|
v.push_back(strip(expand(url)));
|
|
|
|
buffer = match.suffix().str();
|
|
|
|
}
|
2018-05-14 21:44:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2018-05-11 02:21:44 +02:00
|
|
|
const string expand(const string &url)
|
|
|
|
{
|
|
|
|
curlpp::Easy request;
|
2018-05-11 06:57:41 +02:00
|
|
|
std::stringstream ss;
|
2018-05-11 02:21:44 +02:00
|
|
|
|
2018-05-11 06:57:41 +02:00
|
|
|
request.setOpt(curlopts::WriteStream(&ss));
|
2018-05-11 02:21:44 +02:00
|
|
|
request.setOpt<curlopts::CustomRequest>("HEAD");
|
|
|
|
request.setOpt<curlopts::Url>(url);
|
|
|
|
request.setOpt<curlopts::UserAgent>
|
|
|
|
(static_cast<const string>("expandurl-mastodon/") + global::version);
|
|
|
|
request.setOpt<curlopts::HttpHeader>(
|
|
|
|
{
|
|
|
|
"Connection: close",
|
|
|
|
});
|
|
|
|
request.setOpt<curlopts::FollowLocation>(true);
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
request.perform();
|
|
|
|
}
|
|
|
|
catch (const std::exception &e)
|
|
|
|
{
|
2018-05-22 13:44:41 +02:00
|
|
|
syslog(LOG_ERR, "%s", e.what());
|
2018-06-04 20:20:01 +02:00
|
|
|
// TODO: Do something when: "Couldn't resolve host …"
|
2018-05-22 13:44:41 +02:00
|
|
|
syslog(LOG_NOTICE, "The previous error is ignored.");
|
2018-05-11 02:21:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return curlpp::infos::EffectiveUrl::get(request);
|
|
|
|
}
|
2018-05-11 02:36:49 +02:00
|
|
|
|
|
|
|
const string strip(const string &url)
|
|
|
|
{
|
2018-05-21 14:36:00 +02:00
|
|
|
using namespace std::regex_constants;
|
2018-05-29 14:56:39 +02:00
|
|
|
Json::Value &config = configfile.get_json();
|
2018-05-18 13:27:12 +02:00
|
|
|
string newurl = url;
|
|
|
|
|
2018-05-29 14:56:39 +02:00
|
|
|
for (auto it = config["replace"].begin(); it != config["replace"].end();
|
|
|
|
++it)
|
2018-05-18 13:27:12 +02:00
|
|
|
{
|
2018-05-29 14:56:39 +02:00
|
|
|
newurl = std::regex_replace(newurl,
|
|
|
|
std::regex(it.name(), icase),
|
|
|
|
(*it).asString());
|
2018-05-18 13:27:12 +02:00
|
|
|
}
|
|
|
|
|
2018-05-29 15:03:04 +02:00
|
|
|
// If '&' is found in the new URL, but no '?'
|
|
|
|
if (newurl.find('&') != std::string::npos &&
|
|
|
|
newurl.find('?') == std::string::npos)
|
|
|
|
{
|
|
|
|
size_t pos = newurl.find('&');
|
|
|
|
newurl.replace(pos, 1, "?");
|
|
|
|
}
|
|
|
|
|
2018-05-18 13:27:12 +02:00
|
|
|
return newurl;
|
2018-05-11 02:36:49 +02:00
|
|
|
}
|
2018-05-29 14:56:39 +02:00
|
|
|
|
|
|
|
const void init_replacements()
|
|
|
|
{
|
|
|
|
using replace_pair = std::pair<const std::string, const std::string>;
|
|
|
|
Json::Value &config = configfile.get_json();
|
|
|
|
if (config["replace"].isNull())
|
|
|
|
{
|
|
|
|
const std::array<const replace_pair, 5> replace_array =
|
|
|
|
{{
|
|
|
|
{ "[\\?&]utm_[^&]+", "" }, // Google
|
2018-06-11 05:44:06 +02:00
|
|
|
{ "[\\?&]wt_?[^&]+", "" }, // Twitter?
|
2018-05-29 14:56:39 +02:00
|
|
|
{ "[\\?&]__twitter_impression=[^&]+", "" }, // Twitter?
|
|
|
|
{ "//amp\\.", "//" } // AMP
|
|
|
|
}};
|
|
|
|
|
|
|
|
for (const replace_pair &pair : replace_array)
|
|
|
|
{
|
|
|
|
config["replace"][pair.first] = pair.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|