266 lines
6.5 KiB
C++
266 lines
6.5 KiB
C++
/* This file is part of FediBlock-backend.
|
|
* Copyright © 2020, 2021 tastytea <tastytea@tastytea.de>
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, version 3.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "cgi.hpp"
|
|
|
|
#include "files.hpp"
|
|
#include "fs-compat.hpp"
|
|
#include "git.hpp"
|
|
#include "time.hpp"
|
|
|
|
#include <cgicc/Cgicc.h>
|
|
#include <curl/curl.h>
|
|
#include <fmt/format.h>
|
|
#include <unicode/unistr.h>
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <chrono>
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <ios>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <map>
|
|
#include <regex>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
namespace FediBlock::cgi
|
|
{
|
|
|
|
using fmt::format;
|
|
using std::getline;
|
|
using std::ios;
|
|
using std::map;
|
|
using std::ofstream;
|
|
using std::runtime_error;
|
|
using std::string;
|
|
using std::string_view;
|
|
using std::stringstream;
|
|
using std::transform;
|
|
using std::vector;
|
|
using std::chrono::system_clock;
|
|
|
|
entry_type parse_formdata()
|
|
{
|
|
entry_type entry;
|
|
|
|
cgicc::Cgicc cgi;
|
|
|
|
// Catch non-targeted spam.
|
|
if (!cgi("url").empty())
|
|
{
|
|
throw SpamException{};
|
|
}
|
|
|
|
if (!captcha_valid(static_cast<std::uint8_t>(std::stoul(cgi("captcha_id"))),
|
|
cgi("captcha_answer")))
|
|
{
|
|
throw CaptchaException{};
|
|
}
|
|
|
|
entry.instance = cgi("instance");
|
|
|
|
if (!cgi("tags").empty()) // Old form.
|
|
{
|
|
entry.tags = string_to_vector(cgi("tags"));
|
|
}
|
|
else
|
|
{
|
|
entry.tags = get_array("tags[]");
|
|
}
|
|
transform(entry.tags.begin(), entry.tags.end(), entry.tags.begin(),
|
|
[](const auto &tag) { return tolower(tag); });
|
|
add_tags(entry);
|
|
|
|
if (!cgi("receipts").empty()) // Old form.
|
|
{
|
|
entry.receipts = string_to_vector(cgi("receipts"));
|
|
}
|
|
else
|
|
{
|
|
entry.receipts = get_array("receipts[]");
|
|
}
|
|
|
|
entry.description = cgi("description");
|
|
entry.report_time = time::to_string(system_clock::now());
|
|
|
|
if (is_spam(entry))
|
|
{
|
|
throw SpamException{};
|
|
}
|
|
|
|
std::uint8_t screenshot_counter{1};
|
|
for (const auto &screenshot : cgi.getFiles())
|
|
{
|
|
constexpr size_t size_limit{1024 * 1024};
|
|
if (screenshot.getDataLength() > size_limit)
|
|
{
|
|
throw runtime_error{format("The screenshot “{0:s}” is too big. "
|
|
"The limit is {1:.1f} kilobyte (KiB).",
|
|
screenshot.getFilename(),
|
|
size_limit / 1024.0)};
|
|
}
|
|
|
|
const string filepath{
|
|
files::get_tmpdir()
|
|
/ format("{:s}-{:d}{:s}", git::get_branch_name(),
|
|
screenshot_counter,
|
|
fs::path(screenshot.getFilename()).extension().string())};
|
|
ofstream file{filepath, ios::binary};
|
|
if (!file.good())
|
|
{
|
|
throw runtime_error{"Could not open temporary file: " + filepath};
|
|
}
|
|
screenshot.writeToStream(file);
|
|
entry.screenshot_filepaths.push_back(filepath);
|
|
++screenshot_counter;
|
|
}
|
|
|
|
return entry;
|
|
}
|
|
|
|
vector<string> string_to_vector(const string_view str)
|
|
{
|
|
vector<string> vec;
|
|
|
|
stringstream input{str.data()};
|
|
string element;
|
|
|
|
while (getline(input, element, ','))
|
|
{
|
|
if (!element.empty())
|
|
{
|
|
const size_t startpos{element.find_first_not_of(' ')};
|
|
if (element[startpos] == *element.end())
|
|
{
|
|
continue;
|
|
}
|
|
const size_t length{element.find_last_not_of(' ') - startpos + 1};
|
|
vec.push_back(element.substr(startpos, length));
|
|
}
|
|
}
|
|
|
|
return vec;
|
|
}
|
|
|
|
vector<string> get_array(const string &name)
|
|
{
|
|
cgicc::Cgicc cgi;
|
|
vector<cgicc::FormEntry> form;
|
|
vector<string> values;
|
|
|
|
cgi.getElement(name, form);
|
|
for (const auto &element : form)
|
|
{
|
|
const string value{element.getValue()};
|
|
if (!value.empty())
|
|
{
|
|
const auto new_values{string_to_vector(tolower(value))};
|
|
values.insert(values.end(),
|
|
std::make_move_iterator(new_values.begin()),
|
|
std::make_move_iterator(new_values.end()));
|
|
}
|
|
}
|
|
|
|
return values;
|
|
}
|
|
|
|
string tolower(const string_view str)
|
|
{
|
|
string result;
|
|
|
|
const auto unistr{icu::UnicodeString(str.data(), "UTF-8").toLower()};
|
|
unistr.toUTF8String(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
string text2html(string text)
|
|
{
|
|
static const map<string_view, string_view> entities{{"<", "<"},
|
|
{">", ">"}};
|
|
static const map<string_view, string_view> html{{"\r\n", "<br>"}};
|
|
|
|
for (const auto &replacementmap : {entities, html})
|
|
{
|
|
for (const auto &repl : replacementmap)
|
|
{
|
|
size_t pos{0};
|
|
while ((pos = text.find(repl.first, pos)) != string::npos)
|
|
{
|
|
text.replace(pos, repl.first.size(), repl.second);
|
|
pos += repl.second.size();
|
|
}
|
|
}
|
|
}
|
|
|
|
return text;
|
|
}
|
|
|
|
bool captcha_valid(std::uint8_t id, const string_view answer)
|
|
{
|
|
std::array<string, 6> answers{"2", "6", "17", "12", "4", "1"};
|
|
return answers.at(id) == answer;
|
|
}
|
|
|
|
bool is_spam(const entry_type &entry)
|
|
{
|
|
using std::regex;
|
|
using std::regex_search;
|
|
|
|
std::ifstream file(files::get_datadir() / "spamfilter.lst");
|
|
if (file.good())
|
|
{
|
|
string line;
|
|
while (getline(file, line))
|
|
{
|
|
if (!line.empty() && regex_search(entry.description, regex{line}))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void add_tags(entry_type &entry)
|
|
{
|
|
for (const auto &tag : entry.tags)
|
|
{
|
|
if (tag == "nazism")
|
|
{
|
|
entry.tags.emplace_back("fascism");
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace FediBlock::cgi
|
|
|
|
const char *SpamException::what() const noexcept
|
|
{
|
|
return "Spam detected.";
|
|
}
|
|
|
|
const char *CaptchaException::what() const noexcept
|
|
{
|
|
return "The solution to the captcha is not correct.";
|
|
}
|