From 7ef6c5fad75bbfaa24ae4049a6463d601ec04afe Mon Sep 17 00:00:00 2001 From: tastytea Date: Thu, 15 Mar 2018 13:20:26 +0100 Subject: [PATCH] refactoring --- CMakeLists.txt | 2 +- README.md | 25 +++++++++++++++++-------- src/config.cpp | 8 +++----- src/mastorss.cpp | 29 +++++++++++++---------------- src/mastorss.hpp | 6 ++++-- src/parse.cpp | 48 ++++++++++++++++++++++++++++++++++-------------- 6 files changed, 72 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28e2832..1a13762 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 3.7) project (mastorss - VERSION 0.3.7 + VERSION 0.4.0 LANGUAGES CXX ) diff --git a/README.md b/README.md index 9e6061e..ace9bd2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ **mastorss** dumps RSS feeds into a mastodon account. -It is hacked together and generally only extended/fixed when it fails. -Do NOT assume it follows any standards. -Use at your own risk. +Supports RSS 2.0 but not RSS 0.92. Does not support Atom at the moment. + +s in feeds must have , and <description>. + +The documentation is far from complete, sorry. # Install @@ -13,7 +15,7 @@ Use at your own risk. * [boost](http://www.boost.org/) (tested: 1.63.0) * [libcurl](https://curl.haxx.se/) (tested: 7.58.0) * [curlpp](http://www.curlpp.org/) (tested: 0.8.1) - * [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.2.13) + * [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.6.4) ## Get sourcecode @@ -28,9 +30,7 @@ Use at your own risk. cmake .. make -cmake options: - - * `-DCMAKE_BUILD_TYPE=Debug` for a debug build +## Install Install with `make install`. @@ -53,7 +53,16 @@ ${HOME}/.config/mastorss/config-example.json "feedurl": "https:\/\/example.com\/feed.rss", "access_token": "123abc", "max_size": "400", - "last_entry": "Example\n\nThis is an example.\n\nhttps:\/\/example.com\/12345.html\n\n#bot" + "skip": + [ + "If the entry starts with this, skip it", + "Skip me too!" + ], + "fixes": + [ + "delete this", + "[Rr]ead more(\.{3}|…)" + ] } } diff --git a/src/config.cpp b/src/config.cpp index 5ef16af..85a6bae 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -31,7 +31,7 @@ using std::cerr; using std::cin; using std::string; -std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl) +std::uint16_t read_config(string &instance, string &access_token, string &feedurl) { bool config_changed = false; @@ -63,8 +63,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst cout << "No access token found.\n"; string client_id, client_secret, url; Mastodon::API masto(instance, ""); - std::uint16_t ret = masto.register_app1(instance, - "mastorss", + std::uint16_t ret = masto.register_app1("mastorss", "urn:ietf:wg:oauth:2.0:oob", "write", "https://github.com/tastytea/mastorss", @@ -78,8 +77,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst cout << "Insert code: "; cin >> code; - masto.register_app2(instance, - client_id, + masto.register_app2(client_id, client_secret, "urn:ietf:wg:oauth:2.0:oob", code, diff --git a/src/mastorss.cpp b/src/mastorss.cpp index ef39eab..8ebbc53 100644 --- a/src/mastorss.cpp +++ b/src/mastorss.cpp @@ -39,6 +39,8 @@ using std::string; // Initialize global variables std::uint16_t max_size = 500; const string filepath = string(getenv("HOME")) + "/.config/mastorss/"; +pt::ptree config; +std::string profile; int main(int argc, char *argv[]) { @@ -53,29 +55,29 @@ int main(int argc, char *argv[]) max_size = std::stoi(argv[2]); } - pt::ptree config; string instance = ""; string access_token = ""; string feedurl = ""; - const string profile = argv[1]; + profile = argv[1]; std::uint16_t ret; - - read_config(config, profile, instance, access_token, feedurl); - curlpp_init(); - string answer; - string last_entry = config.get(profile + ".last_entry", ""); std::vector<string> entries; + read_config(instance, access_token, feedurl); + curlpp_init(); + ret = http_get(feedurl, answer, "mastorss/" + (string)global::version); if (ret != 0) { return ret; } - entries = parse_website(profile, answer); + entries = parse_website(answer); + string last_entry = config.get(profile + ".last_entry", ""); if (last_entry.empty()) { + // If no last_entry is stored in the config file, + // make last_entry the second-newest entry. last_entry = entries.at(1); } config.put(profile + ".last_entry", entries.front()); @@ -85,6 +87,8 @@ int main(int argc, char *argv[]) { if (!new_content && (*rit).compare(last_entry) == 0) { + // If the last entry is found in entries, + // start tooting in the next loop. new_content = true; continue; } @@ -113,14 +117,7 @@ int main(int argc, char *argv[]) std::this_thread::sleep_for(std::chrono::seconds(2)); } - // If the last entry is not in the current feed, set the oldest item as last entry - // Could lead to spamming if an item gets deleted or changed. - // Update: It DID couse spamming :-( - // TODO: Think of something better - // if (!new_content) - // { - // config.put(profile + ".last_entry", entries.at(entries.size() - 1)); - // } + // Write the new last_entry only if no error happened. pt::write_json(filepath + "config-" + profile + ".json", config); return 0; diff --git a/src/mastorss.hpp b/src/mastorss.hpp index ea77c8b..349860d 100644 --- a/src/mastorss.hpp +++ b/src/mastorss.hpp @@ -11,10 +11,12 @@ using std::string; extern std::uint16_t max_size; extern const string filepath; +extern pt::ptree config; +extern std::string profile; -std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl); +std::uint16_t read_config(string &instance, string &access_token, string &feedurl); -std::vector<string> parse_website(const string &profile, const string &xml); +std::vector<string> parse_website(const string &xml); void unescape_html(const string &str); void individual_fixes(string &str); diff --git a/src/parse.cpp b/src/parse.cpp index 893f66f..298456d 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -64,7 +64,7 @@ void unescape_html(string &str) str = std::regex_replace(str, reapos, "\'"); } -std::vector<string> parse_website(const string &profile, const string &xml) +std::vector<string> parse_website(const string &xml) { pt::ptree json; std::vector<string> watchwords; @@ -83,6 +83,7 @@ std::vector<string> parse_website(const string &profile, const string &xml) try { + // Read profile-specific hashtags or fail silently for (const pt::ptree::value_type &value : json.get_child(profile + ".tags")) { watchwords.push_back(value.second.data()); @@ -94,6 +95,7 @@ std::vector<string> parse_website(const string &profile, const string &xml) } try { + // Read global hashtags or fail silently for (const pt::ptree::value_type &value : json.get_child("global.tags")) { watchwords.push_back(value.second.data()); @@ -120,14 +122,29 @@ std::vector<string> parse_website(const string &profile, const string &xml) string desc = v.second.get_child("description").data(); string str = title + "\n\n" + desc; - // ANF News puts this always on top, causing us to think it's new - if (title.compare(0, 35, "Newsticker zu den Angriffen auf Efr") == 0) + try { - continue; + // Skip entries beginning with this text + for (const pt::ptree::value_type &v : config.get_child(profile + ".skip")) + { + const string skip = v.second.data(); + if (!skip.empty()) + { + if (title.compare(0, skip.length(), skip) == 0) + { + continue; + } + } + } + } + catch (const std::exception &e) + { + // Node not found, no problem } unescape_html(str); + // Try to turn the HTML into human-readable text std::regex reparagraph("</p><p>"); std::regex recdata1("<!\\[CDATA\\["); std::regex recdata2("\\]\\]>"); @@ -166,16 +183,19 @@ std::vector<string> parse_website(const string &profile, const string &xml) return ret; } +// Read regular expressions from the config file and delete all matches. void individual_fixes(string &str) { - // de.indymedia.org articles sometimes have CSS in the description - std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$"); - // Direkte Action closing - std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion."); - // GG/BO closing - std::regex reggboclosing("Die von den einzelnen AutorInnen .*$"); - - str = std::regex_replace(str, reindyfuckup, ""); - str = std::regex_replace(str, redaclosing, ""); - str = std::regex_replace(str, reggboclosing, ""); + try + { + for (const pt::ptree::value_type &v : config.get_child(profile + ".fixes")) + { + std::regex refix(v.second.data()); + str = std::regex_replace(str, refix, ""); + } + } + catch (const std::exception &e) + { + // Node not found, no problem + } }