This repository has been archived on 2021-03-22. You can view files and clone it, but cannot push or open issues or pull requests.
backend/src/cgi.cpp

266 lines
6.5 KiB
C++

/* This file is part of FediBlock-backend.
* Copyright © 2020, 2021 tastytea <tastytea@tastytea.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cgi.hpp"
#include "files.hpp"
#include "fs-compat.hpp"
#include "git.hpp"
#include "time.hpp"
#include <cgicc/Cgicc.h>
#include <curl/curl.h>
#include <fmt/format.h>
#include <unicode/unistr.h>
#include <algorithm>
#include <array>
#include <chrono>
#include <cstdint>
#include <fstream>
#include <ios>
#include <iostream>
#include <iterator>
#include <map>
#include <regex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
namespace FediBlock::cgi
{
using fmt::format;
using std::getline;
using std::ios;
using std::map;
using std::ofstream;
using std::runtime_error;
using std::string;
using std::string_view;
using std::stringstream;
using std::transform;
using std::vector;
using std::chrono::system_clock;
entry_type parse_formdata()
{
entry_type entry;
cgicc::Cgicc cgi;
// Catch non-targeted spam.
if (!cgi("url").empty())
{
throw SpamException{};
}
if (!captcha_valid(static_cast<std::uint8_t>(std::stoul(cgi("captcha_id"))),
cgi("captcha_answer")))
{
throw CaptchaException{};
}
entry.instance = cgi("instance");
if (!cgi("tags").empty()) // Old form.
{
entry.tags = string_to_vector(cgi("tags"));
}
else
{
entry.tags = get_array("tags[]");
}
transform(entry.tags.begin(), entry.tags.end(), entry.tags.begin(),
[](const auto &tag) { return tolower(tag); });
add_tags(entry);
if (!cgi("receipts").empty()) // Old form.
{
entry.receipts = string_to_vector(cgi("receipts"));
}
else
{
entry.receipts = get_array("receipts[]");
}
entry.description = cgi("description");
entry.report_time = time::to_string(system_clock::now());
if (is_spam(entry))
{
throw SpamException{};
}
std::uint8_t screenshot_counter{1};
for (const auto &screenshot : cgi.getFiles())
{
constexpr size_t size_limit{1024 * 1024};
if (screenshot.getDataLength() > size_limit)
{
throw runtime_error{format("The screenshot “{0:s}” is too big. "
"The limit is {1:.1f} kilobyte (KiB).",
screenshot.getFilename(),
size_limit / 1024.0)};
}
const string filepath{
files::get_tmpdir()
/ format("{:s}-{:d}{:s}", git::get_branch_name(),
screenshot_counter,
fs::path(screenshot.getFilename()).extension().string())};
ofstream file{filepath, ios::binary};
if (!file.good())
{
throw runtime_error{"Could not open temporary file: " + filepath};
}
screenshot.writeToStream(file);
entry.screenshot_filepaths.push_back(filepath);
++screenshot_counter;
}
return entry;
}
vector<string> string_to_vector(const string_view str)
{
vector<string> vec;
stringstream input{str.data()};
string element;
while (getline(input, element, ','))
{
if (!element.empty())
{
const size_t startpos{element.find_first_not_of(' ')};
if (element[startpos] == *element.end())
{
continue;
}
const size_t length{element.find_last_not_of(' ') - startpos + 1};
vec.push_back(element.substr(startpos, length));
}
}
return vec;
}
vector<string> get_array(const string &name)
{
cgicc::Cgicc cgi;
vector<cgicc::FormEntry> form;
vector<string> values;
cgi.getElement(name, form);
for (const auto &element : form)
{
const string value{element.getValue()};
if (!value.empty())
{
const auto new_values{string_to_vector(tolower(value))};
values.insert(values.end(),
std::make_move_iterator(new_values.begin()),
std::make_move_iterator(new_values.end()));
}
}
return values;
}
string tolower(const string_view str)
{
string result;
const auto unistr{icu::UnicodeString(str.data(), "UTF-8").toLower()};
unistr.toUTF8String(result);
return result;
}
string text2html(string text)
{
static const map<string_view, string_view> entities{{"<", "&lt;"},
{">", "&gt;"}};
static const map<string_view, string_view> html{{"\r\n", "<br>"}};
for (const auto &replacementmap : {entities, html})
{
for (const auto &repl : replacementmap)
{
size_t pos{0};
while ((pos = text.find(repl.first, pos)) != string::npos)
{
text.replace(pos, repl.first.size(), repl.second);
pos += repl.second.size();
}
}
}
return text;
}
bool captcha_valid(std::uint8_t id, const string_view answer)
{
std::array<string, 6> answers{"2", "6", "17", "12", "4", "1"};
return answers.at(id) == answer;
}
bool is_spam(const entry_type &entry)
{
using std::regex;
using std::regex_search;
std::ifstream file(files::get_datadir() / "spamfilter.lst");
if (file.good())
{
string line;
while (getline(file, line))
{
if (!line.empty() && regex_search(entry.description, regex{line}))
{
return true;
}
}
}
return false;
}
void add_tags(entry_type &entry)
{
for (const auto &tag : entry.tags)
{
if (tag == "nazism")
{
entry.tags.emplace_back("fascism");
}
}
}
} // namespace FediBlock::cgi
const char *SpamException::what() const noexcept
{
return "Spam detected.";
}
const char *CaptchaException::what() const noexcept
{
return "The solution to the captcha is not correct.";
}