This repository has been archived on 2021-03-22. You can view files and clone it, but cannot push or open issues or pull requests.
backend/src/cgi.cpp

266 lines
6.5 KiB
C++
Raw Normal View History

2020-06-29 06:10:40 +02:00
/* This file is part of FediBlock-backend.
* Copyright © 2020, 2021 tastytea <tastytea@tastytea.de>
2020-06-29 06:10:40 +02:00
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2020-06-29 05:21:23 +02:00
#include "cgi.hpp"
#include "files.hpp"
#include "fs-compat.hpp"
#include "git.hpp"
2020-07-05 08:01:30 +02:00
#include "time.hpp"
2020-06-29 05:21:23 +02:00
#include <cgicc/Cgicc.h>
#include <curl/curl.h>
#include <fmt/format.h>
#include <unicode/unistr.h>
2020-06-29 05:21:23 +02:00
#include <algorithm>
#include <array>
2020-07-05 08:01:30 +02:00
#include <chrono>
#include <cstdint>
2020-06-29 22:37:09 +02:00
#include <fstream>
#include <ios>
#include <iostream>
#include <iterator>
#include <map>
2021-01-18 08:19:22 +01:00
#include <regex>
2020-06-29 05:21:23 +02:00
#include <sstream>
2020-06-29 22:37:09 +02:00
#include <stdexcept>
2020-06-29 05:21:23 +02:00
#include <string>
#include <string_view>
2020-06-29 05:21:23 +02:00
#include <vector>
2020-07-01 20:51:35 +02:00
namespace FediBlock::cgi
2020-06-29 05:21:23 +02:00
{
using fmt::format;
2020-06-29 05:21:23 +02:00
using std::getline;
2020-06-29 22:37:09 +02:00
using std::ios;
using std::map;
2020-06-29 22:37:09 +02:00
using std::ofstream;
using std::runtime_error;
2020-06-29 05:21:23 +02:00
using std::string;
using std::string_view;
using std::stringstream;
using std::transform;
2020-06-29 05:21:23 +02:00
using std::vector;
2020-07-05 08:01:30 +02:00
using std::chrono::system_clock;
2020-06-29 05:21:23 +02:00
entry_type parse_formdata()
{
2020-06-29 07:05:04 +02:00
entry_type entry;
2020-06-29 22:37:09 +02:00
cgicc::Cgicc cgi;
2021-01-17 17:01:46 +01:00
// Catch non-targeted spam.
if (!cgi("url").empty())
{
throw SpamException{};
}
if (!captcha_valid(static_cast<std::uint8_t>(std::stoul(cgi("captcha_id"))),
cgi("captcha_answer")))
{
2021-01-17 19:05:33 +01:00
throw CaptchaException{};
}
entry.instance = cgi("instance");
if (!cgi("tags").empty()) // Old form.
{
entry.tags = string_to_vector(cgi("tags"));
}
else
{
entry.tags = get_array("tags[]");
}
transform(entry.tags.begin(), entry.tags.end(), entry.tags.begin(),
[](const auto &tag) { return tolower(tag); });
add_tags(entry);
if (!cgi("receipts").empty()) // Old form.
{
entry.receipts = string_to_vector(cgi("receipts"));
}
else
{
entry.receipts = get_array("receipts[]");
}
entry.description = cgi("description");
2020-07-05 08:01:30 +02:00
entry.report_time = time::to_string(system_clock::now());
2021-01-18 08:19:22 +01:00
if (is_spam(entry))
{
throw SpamException{};
}
std::uint8_t screenshot_counter{1};
for (const auto &screenshot : cgi.getFiles())
{
constexpr size_t size_limit{1024 * 1024};
if (screenshot.getDataLength() > size_limit)
2020-06-29 22:37:09 +02:00
{
throw runtime_error{format("The screenshot “{0:s}” is too big. "
2020-11-15 21:30:19 +01:00
"The limit is {1:.1f} kilobyte (KiB).",
screenshot.getFilename(),
2020-11-15 21:30:19 +01:00
size_limit / 1024.0)};
}
2020-06-29 22:37:09 +02:00
const string filepath{
files::get_tmpdir()
2021-01-09 16:48:38 +01:00
/ format("{:s}-{:d}{:s}", git::get_branch_name(),
screenshot_counter,
fs::path(screenshot.getFilename()).extension().string())};
ofstream file{filepath, ios::binary};
if (!file.good())
{
throw runtime_error{"Could not open temporary file: " + filepath};
2020-06-29 22:37:09 +02:00
}
screenshot.writeToStream(file);
entry.screenshot_filepaths.push_back(filepath);
++screenshot_counter;
2020-06-29 05:21:23 +02:00
}
2020-06-29 07:05:04 +02:00
return entry;
2020-06-29 05:21:23 +02:00
}
2020-06-29 07:05:04 +02:00
vector<string> string_to_vector(const string_view str)
2020-06-29 05:21:23 +02:00
{
2020-06-29 07:05:04 +02:00
vector<string> vec;
2020-06-29 05:21:23 +02:00
stringstream input{str.data()};
string element;
while (getline(input, element, ','))
2020-06-29 05:21:23 +02:00
{
2020-06-30 07:33:15 +02:00
if (!element.empty())
{
const size_t startpos{element.find_first_not_of(' ')};
if (element[startpos] == *element.end())
{
continue;
}
const size_t length{element.find_last_not_of(' ') - startpos + 1};
vec.push_back(element.substr(startpos, length));
2020-06-30 07:33:15 +02:00
}
2020-06-29 05:21:23 +02:00
}
2020-06-29 07:05:04 +02:00
return vec;
2020-06-29 05:21:23 +02:00
}
vector<string> get_array(const string &name)
{
cgicc::Cgicc cgi;
vector<cgicc::FormEntry> form;
vector<string> values;
cgi.getElement(name, form);
for (const auto &element : form)
2020-10-16 23:52:08 +02:00
{
const string value{element.getValue()};
if (!value.empty())
2020-10-16 23:52:08 +02:00
{
const auto new_values{string_to_vector(tolower(value))};
values.insert(values.end(),
std::make_move_iterator(new_values.begin()),
std::make_move_iterator(new_values.end()));
2020-10-16 23:52:08 +02:00
}
}
return values;
}
string tolower(const string_view str)
{
string result;
const auto unistr{icu::UnicodeString(str.data(), "UTF-8").toLower()};
unistr.toUTF8String(result);
return result;
}
string text2html(string text)
{
static const map<string_view, string_view> entities{{"<", "&lt;"},
{">", "&gt;"}};
static const map<string_view, string_view> html{{"\r\n", "<br>"}};
for (const auto &replacementmap : {entities, html})
{
for (const auto &repl : replacementmap)
{
size_t pos{0};
while ((pos = text.find(repl.first, pos)) != string::npos)
{
text.replace(pos, repl.first.size(), repl.second);
pos += repl.second.size();
}
}
}
return text;
}
2021-01-17 17:01:46 +01:00
bool captcha_valid(std::uint8_t id, const string_view answer)
{
std::array<string, 6> answers{"2", "6", "17", "12", "4", "1"};
2021-01-18 08:20:39 +01:00
return answers.at(id) == answer;
}
2021-01-18 08:19:22 +01:00
bool is_spam(const entry_type &entry)
{
using std::regex;
using std::regex_search;
std::ifstream file(files::get_datadir() / "spamfilter.lst");
if (file.good())
{
string line;
while (getline(file, line))
{
2021-01-29 04:07:00 +01:00
if (!line.empty() && regex_search(entry.description, regex{line}))
2021-01-18 08:19:22 +01:00
{
return true;
}
}
}
return false;
}
void add_tags(entry_type &entry)
{
for (const auto &tag : entry.tags)
{
if (tag == "nazism")
{
entry.tags.emplace_back("fascism");
}
}
}
2020-07-01 20:51:35 +02:00
} // namespace FediBlock::cgi
2021-01-17 17:01:46 +01:00
const char *SpamException::what() const noexcept
{
return "Spam detected.";
}
const char *CaptchaException::what() const noexcept
{
return "The solution to the captcha is not correct.";
}