refactoring

This commit is contained in:
tastytea 2018-03-15 13:20:26 +01:00
parent dd47092bbb
commit 7ef6c5fad7
Signed by: tastytea
GPG Key ID: 59346E0EA35C67E5
6 changed files with 72 additions and 46 deletions

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.7) cmake_minimum_required (VERSION 3.7)
project (mastorss project (mastorss
VERSION 0.3.7 VERSION 0.4.0
LANGUAGES CXX LANGUAGES CXX
) )

View File

@ -1,7 +1,9 @@
**mastorss** dumps RSS feeds into a mastodon account. **mastorss** dumps RSS feeds into a mastodon account.
It is hacked together and generally only extended/fixed when it fails. Supports RSS 2.0 but not RSS 0.92. Does not support Atom at the moment.
Do NOT assume it follows any standards.
Use at your own risk. <item>s in feeds must have <link>, <title> and <description>.
The documentation is far from complete, sorry.
# Install # Install
@ -13,7 +15,7 @@ Use at your own risk.
* [boost](http://www.boost.org/) (tested: 1.63.0) * [boost](http://www.boost.org/) (tested: 1.63.0)
* [libcurl](https://curl.haxx.se/) (tested: 7.58.0) * [libcurl](https://curl.haxx.se/) (tested: 7.58.0)
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1) * [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.2.13) * [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.6.4)
## Get sourcecode ## Get sourcecode
@ -28,9 +30,7 @@ Use at your own risk.
cmake .. cmake ..
make make
cmake options: ## Install
* `-DCMAKE_BUILD_TYPE=Debug` for a debug build
Install with `make install`. Install with `make install`.
@ -53,7 +53,16 @@ ${HOME}/.config/mastorss/config-example.json
"feedurl": "https:\/\/example.com\/feed.rss", "feedurl": "https:\/\/example.com\/feed.rss",
"access_token": "123abc", "access_token": "123abc",
"max_size": "400", "max_size": "400",
"last_entry": "Example\n\nThis is an example.\n\nhttps:\/\/example.com\/12345.html\n\n#bot" "skip":
[
"If the entry starts with this, skip it",
"Skip me too!"
],
"fixes":
[
"delete this",
"[Rr]ead more(\.{3}|…)"
]
} }
} }

View File

@ -31,7 +31,7 @@ using std::cerr;
using std::cin; using std::cin;
using std::string; using std::string;
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl) std::uint16_t read_config(string &instance, string &access_token, string &feedurl)
{ {
bool config_changed = false; bool config_changed = false;
@ -63,8 +63,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "No access token found.\n"; cout << "No access token found.\n";
string client_id, client_secret, url; string client_id, client_secret, url;
Mastodon::API masto(instance, ""); Mastodon::API masto(instance, "");
std::uint16_t ret = masto.register_app1(instance, std::uint16_t ret = masto.register_app1("mastorss",
"mastorss",
"urn:ietf:wg:oauth:2.0:oob", "urn:ietf:wg:oauth:2.0:oob",
"write", "write",
"https://github.com/tastytea/mastorss", "https://github.com/tastytea/mastorss",
@ -78,8 +77,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "Insert code: "; cout << "Insert code: ";
cin >> code; cin >> code;
masto.register_app2(instance, masto.register_app2(client_id,
client_id,
client_secret, client_secret,
"urn:ietf:wg:oauth:2.0:oob", "urn:ietf:wg:oauth:2.0:oob",
code, code,

View File

@ -39,6 +39,8 @@ using std::string;
// Initialize global variables // Initialize global variables
std::uint16_t max_size = 500; std::uint16_t max_size = 500;
const string filepath = string(getenv("HOME")) + "/.config/mastorss/"; const string filepath = string(getenv("HOME")) + "/.config/mastorss/";
pt::ptree config;
std::string profile;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -53,29 +55,29 @@ int main(int argc, char *argv[])
max_size = std::stoi(argv[2]); max_size = std::stoi(argv[2]);
} }
pt::ptree config;
string instance = ""; string instance = "";
string access_token = ""; string access_token = "";
string feedurl = ""; string feedurl = "";
const string profile = argv[1]; profile = argv[1];
std::uint16_t ret; std::uint16_t ret;
read_config(config, profile, instance, access_token, feedurl);
curlpp_init();
string answer; string answer;
string last_entry = config.get(profile + ".last_entry", "");
std::vector<string> entries; std::vector<string> entries;
read_config(instance, access_token, feedurl);
curlpp_init();
ret = http_get(feedurl, answer, "mastorss/" + (string)global::version); ret = http_get(feedurl, answer, "mastorss/" + (string)global::version);
if (ret != 0) if (ret != 0)
{ {
return ret; return ret;
} }
entries = parse_website(profile, answer); entries = parse_website(answer);
string last_entry = config.get(profile + ".last_entry", "");
if (last_entry.empty()) if (last_entry.empty())
{ {
// If no last_entry is stored in the config file,
// make last_entry the second-newest entry.
last_entry = entries.at(1); last_entry = entries.at(1);
} }
config.put(profile + ".last_entry", entries.front()); config.put(profile + ".last_entry", entries.front());
@ -85,6 +87,8 @@ int main(int argc, char *argv[])
{ {
if (!new_content && (*rit).compare(last_entry) == 0) if (!new_content && (*rit).compare(last_entry) == 0)
{ {
// If the last entry is found in entries,
// start tooting in the next loop.
new_content = true; new_content = true;
continue; continue;
} }
@ -113,14 +117,7 @@ int main(int argc, char *argv[])
std::this_thread::sleep_for(std::chrono::seconds(2)); std::this_thread::sleep_for(std::chrono::seconds(2));
} }
// If the last entry is not in the current feed, set the oldest item as last entry // Write the new last_entry only if no error happened.
// Could lead to spamming if an item gets deleted or changed.
// Update: It DID couse spamming :-(
// TODO: Think of something better
// if (!new_content)
// {
// config.put(profile + ".last_entry", entries.at(entries.size() - 1));
// }
pt::write_json(filepath + "config-" + profile + ".json", config); pt::write_json(filepath + "config-" + profile + ".json", config);
return 0; return 0;

View File

@ -11,10 +11,12 @@ using std::string;
extern std::uint16_t max_size; extern std::uint16_t max_size;
extern const string filepath; extern const string filepath;
extern pt::ptree config;
extern std::string profile;
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl); std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
std::vector<string> parse_website(const string &profile, const string &xml); std::vector<string> parse_website(const string &xml);
void unescape_html(const string &str); void unescape_html(const string &str);
void individual_fixes(string &str); void individual_fixes(string &str);

View File

@ -64,7 +64,7 @@ void unescape_html(string &str)
str = std::regex_replace(str, reapos, "\'"); str = std::regex_replace(str, reapos, "\'");
} }
std::vector<string> parse_website(const string &profile, const string &xml) std::vector<string> parse_website(const string &xml)
{ {
pt::ptree json; pt::ptree json;
std::vector<string> watchwords; std::vector<string> watchwords;
@ -83,6 +83,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
try try
{ {
// Read profile-specific hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child(profile + ".tags")) for (const pt::ptree::value_type &value : json.get_child(profile + ".tags"))
{ {
watchwords.push_back(value.second.data()); watchwords.push_back(value.second.data());
@ -94,6 +95,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
} }
try try
{ {
// Read global hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child("global.tags")) for (const pt::ptree::value_type &value : json.get_child("global.tags"))
{ {
watchwords.push_back(value.second.data()); watchwords.push_back(value.second.data());
@ -120,14 +122,29 @@ std::vector<string> parse_website(const string &profile, const string &xml)
string desc = v.second.get_child("description").data(); string desc = v.second.get_child("description").data();
string str = title + "\n\n" + desc; string str = title + "\n\n" + desc;
// ANF News puts this always on top, causing us to think it's new try
if (title.compare(0, 35, "Newsticker zu den Angriffen auf Efr") == 0)
{ {
continue; // Skip entries beginning with this text
for (const pt::ptree::value_type &v : config.get_child(profile + ".skip"))
{
const string skip = v.second.data();
if (!skip.empty())
{
if (title.compare(0, skip.length(), skip) == 0)
{
continue;
}
}
}
}
catch (const std::exception &e)
{
// Node not found, no problem
} }
unescape_html(str); unescape_html(str);
// Try to turn the HTML into human-readable text
std::regex reparagraph("</p><p>"); std::regex reparagraph("</p><p>");
std::regex recdata1("<!\\[CDATA\\["); std::regex recdata1("<!\\[CDATA\\[");
std::regex recdata2("\\]\\]>"); std::regex recdata2("\\]\\]>");
@ -166,16 +183,19 @@ std::vector<string> parse_website(const string &profile, const string &xml)
return ret; return ret;
} }
// Read regular expressions from the config file and delete all matches.
void individual_fixes(string &str) void individual_fixes(string &str)
{ {
// de.indymedia.org articles sometimes have CSS in the description try
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$"); {
// Direkte Action closing for (const pt::ptree::value_type &v : config.get_child(profile + ".fixes"))
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion."); {
// GG/BO closing std::regex refix(v.second.data());
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$"); str = std::regex_replace(str, refix, "");
}
str = std::regex_replace(str, reindyfuckup, ""); }
str = std::regex_replace(str, redaclosing, ""); catch (const std::exception &e)
str = std::regex_replace(str, reggboclosing, ""); {
// Node not found, no problem
}
} }