mastorss/src/rss2mastodon.cpp

264 lines
7.7 KiB
C++
Raw Normal View History

2018-01-26 02:33:58 +01:00
/* This file is part of rss2mastodon.
* Copyright © 2018 tastytea <tastytea@tastytea.de>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <vector>
#include <string>
#include <cstdint>
#include <cstdlib>
#include <random>
#include <regex>
#include <sstream>
#include <thread>
#include <chrono>
2018-01-26 02:33:58 +01:00
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <boost/filesystem.hpp>
#include <mastodon-cpp.hpp>
#include "rss2mastodon.hpp"
namespace pt = boost::property_tree;
using Mastodon::API;
using std::cout;
using std::cerr;
using std::string;
2018-02-01 12:03:16 +01:00
uint16_t max_size = 500;
2018-01-26 02:33:58 +01:00
const string filepath = string(getenv("HOME")) + "/.config/rss2mastodon/";
2018-01-26 03:35:52 +01:00
void read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl)
2018-01-26 02:33:58 +01:00
{
bool config_changed = false;
// Read config file, get access token
try {
pt::read_json(filepath + "config-" + profile + ".json", config);
2018-01-26 02:33:58 +01:00
instance = config.get(profile + ".instance", "");
access_token = config.get(profile + ".access_token", "");
2018-01-26 03:35:52 +01:00
feedurl = config.get(profile + ".feedurl", "");
2018-01-26 02:33:58 +01:00
}
catch (std::exception &e)
{
// most likely no config file found
2018-02-01 12:03:16 +01:00
cout << "Config file not readable. Building new one.\n";
2018-01-26 02:33:58 +01:00
const boost::filesystem::path path(filepath);
boost::filesystem::create_directory(filepath);
}
if (instance.empty())
{
cout << "Instance: ";
std::cin >> instance;
config.put(profile + ".instance", instance);
config_changed = true;
}
if (access_token.empty())
{
cout << "access_token: ";
std::cin >> access_token;
config.put(profile + ".access_token", access_token);
config_changed = true;
}
2018-01-26 03:35:52 +01:00
if (feedurl.empty())
{
cout << "feedurl: ";
std::cin >> feedurl;
config.put(profile + ".feedurl", feedurl);
config_changed = true;
}
2018-01-26 02:33:58 +01:00
if (config_changed)
{
pt::write_json(filepath + "config-" + profile + ".json", config);
2018-01-26 02:33:58 +01:00
}
}
std::vector<string> parse_website(const string &profile, const string &xml)
{
pt::ptree json;
std::vector<string> watchwords;
try
{
pt::read_json(filepath + "watchwords.json", json);
}
catch (std::exception &e)
{
// most likely file not found
std::cerr << "ERROR: " << filepath << "watchwords.json not found or not readable.\n";
std::cerr << e.what() << '\n';
2018-01-26 02:33:58 +01:00
return {};
}
try
{
for (const pt::ptree::value_type &value : json.get_child(profile + ".tags"))
{
watchwords.push_back(value.second.data());
}
}
catch (const std::exception &e)
{
// Node not found, no problem
}
try
{
for (const pt::ptree::value_type &value : json.get_child("global.tags"))
{
watchwords.push_back(value.second.data());
}
}
catch (const std::exception &e)
{
// Node not found, no problem
}
pt::ptree rss;
std::istringstream iss(xml);
pt::read_xml(iss, rss);
std::vector<string> ret;
for (const pt::ptree::value_type &v : rss.get_child("rss.channel"))
{
if (v.second.size() > 0)
{
if (string(v.first.data()).compare("item") == 0)
{
string title = v.second.get_child("title").data();
string link = v.second.get_child("link").data();
string desc = v.second.get_child("description").data();
string str = title + "\n\n" + desc;
2018-02-01 12:03:16 +01:00
// Some feeds contain encoded xhtml-tags >:|
std::regex relt("&lt;");
std::regex regt("&gt;");
std::regex reparagraph("</p><p>");
std::regex recdata1("<!\\[CDATA\\[");
std::regex recdata2("\\]\\]>");
std::regex restrip("<[^>]*>");
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
str = std::regex_replace(str, relt, "<");
str = std::regex_replace(str, regt, ">");
str = std::regex_replace(str, reparagraph, "\n\n");
str = std::regex_replace(str, recdata1, "");
str = std::regex_replace(str, recdata2, "");
str = std::regex_replace(str, restrip, "");
str = std::regex_replace(str, reindyfuckup, "");
2018-01-26 02:33:58 +01:00
for (const string &hashtag : watchwords)
{
2018-02-01 12:03:16 +01:00
std::regex rehashtag("([[:space:][:punct:]^])(" + hashtag + ")([[:space:][:punct:]$])",
std::regex_constants::icase);
2018-02-01 12:03:16 +01:00
str = std::regex_replace(str, rehashtag, "$1#$2$3",
std::regex_constants::format_first_only);
2018-01-26 02:33:58 +01:00
}
2018-02-01 12:03:16 +01:00
if ((str.size() + link.size()) > (max_size - 15))
{
str.resize((max_size - link.size() - 15));
str += " […]";
}
2018-01-26 02:33:58 +01:00
str += "\n\n" + link + "\n\n#bot";
ret.push_back(str);
}
}
}
return ret;
}
int main(int argc, char *argv[])
{
if (argc < 2)
{
2018-02-01 12:03:16 +01:00
cerr << "usage: " << argv[0] << " <profile> [max size]\n";
2018-01-26 02:33:58 +01:00
return 32;
}
2018-02-01 12:03:16 +01:00
if (argc == 3)
{
max_size == std::stoi(argv[2]);
}
2018-01-26 02:33:58 +01:00
pt::ptree config;
string instance = "";
string access_token = "";
2018-01-26 03:35:52 +01:00
string feedurl = "";
2018-01-26 02:33:58 +01:00
const string profile = argv[1];
2018-02-01 12:03:16 +01:00
std::uint16_t ret;
2018-01-26 02:33:58 +01:00
2018-01-26 03:35:52 +01:00
read_config(config, profile, instance, access_token, feedurl);
std::size_t pos = 0;
pos = feedurl.find("//") + 2;
const string hostname = feedurl.substr(pos, feedurl.find('/', pos) - pos);
const string path = feedurl.substr(pos + hostname.size());
2018-01-26 02:33:58 +01:00
string answer;
string last_entry = config.get(profile + ".last_entry", "");
std::vector<string> entries;
2018-02-01 12:03:16 +01:00
ret = http_get(hostname, path, answer);
if (ret != 0)
{
return ret;
}
2018-01-26 02:33:58 +01:00
entries = parse_website(profile, answer);
2018-01-26 20:59:01 +01:00
if (last_entry.empty())
{
last_entry = entries.at(1);
}
2018-02-01 12:03:16 +01:00
config.put(profile + ".last_entry", entries.front());
2018-01-26 02:33:58 +01:00
bool new_content = false;
for (auto rit = entries.rbegin(); rit != entries.rend(); ++rit)
{
if (!new_content && (*rit).compare(last_entry) == 0)
{
new_content = true;
continue;
}
else if (!new_content)
{
continue;
}
2018-01-26 03:35:52 +01:00
string answer;
Mastodon::API masto(instance, access_token);
API::parametermap parameters =
{
{ "status", { *rit } },
{ "visibility", { "public" } }
};
ret = masto.post(API::v1::statuses, parameters, answer);
if (ret == 0)
{
2018-02-01 12:03:16 +01:00
pt::write_json(filepath + "config-" + profile + ".json", config);
2018-01-26 03:35:52 +01:00
}
else
{
std::cerr << "Error code: " << ret << '\n';
2018-02-01 12:03:16 +01:00
std::cerr << answer << '\n';
2018-01-26 03:35:52 +01:00
return ret;
}
std::this_thread::sleep_for(std::chrono::seconds(2));
2018-01-26 02:33:58 +01:00
}
return 0;
}