From 7ef6c5fad75bbfaa24ae4049a6463d601ec04afe Mon Sep 17 00:00:00 2001
From: tastytea
Date: Thu, 15 Mar 2018 13:20:26 +0100
Subject: [PATCH] refactoring
---
CMakeLists.txt | 2 +-
README.md | 25 +++++++++++++++++--------
src/config.cpp | 8 +++-----
src/mastorss.cpp | 29 +++++++++++++----------------
src/mastorss.hpp | 6 ++++--
src/parse.cpp | 48 ++++++++++++++++++++++++++++++++++--------------
6 files changed, 72 insertions(+), 46 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28e2832..1a13762 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.7)
project (mastorss
- VERSION 0.3.7
+ VERSION 0.4.0
LANGUAGES CXX
)
diff --git a/README.md b/README.md
index 9e6061e..ace9bd2 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,9 @@
**mastorss** dumps RSS feeds into a mastodon account.
-It is hacked together and generally only extended/fixed when it fails.
-Do NOT assume it follows any standards.
-Use at your own risk.
+Supports RSS 2.0 but not RSS 0.92. Does not support Atom at the moment.
+
+- s in feeds must have , and .
+
+The documentation is far from complete, sorry.
# Install
@@ -13,7 +15,7 @@ Use at your own risk.
* [boost](http://www.boost.org/) (tested: 1.63.0)
* [libcurl](https://curl.haxx.se/) (tested: 7.58.0)
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
- * [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.2.13)
+ * [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.6.4)
## Get sourcecode
@@ -28,9 +30,7 @@ Use at your own risk.
cmake ..
make
-cmake options:
-
- * `-DCMAKE_BUILD_TYPE=Debug` for a debug build
+## Install
Install with `make install`.
@@ -53,7 +53,16 @@ ${HOME}/.config/mastorss/config-example.json
"feedurl": "https:\/\/example.com\/feed.rss",
"access_token": "123abc",
"max_size": "400",
- "last_entry": "Example\n\nThis is an example.\n\nhttps:\/\/example.com\/12345.html\n\n#bot"
+ "skip":
+ [
+ "If the entry starts with this, skip it",
+ "Skip me too!"
+ ],
+ "fixes":
+ [
+ "delete this",
+ "[Rr]ead more(\.{3}|…)"
+ ]
}
}
diff --git a/src/config.cpp b/src/config.cpp
index 5ef16af..85a6bae 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -31,7 +31,7 @@ using std::cerr;
using std::cin;
using std::string;
-std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl)
+std::uint16_t read_config(string &instance, string &access_token, string &feedurl)
{
bool config_changed = false;
@@ -63,8 +63,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "No access token found.\n";
string client_id, client_secret, url;
Mastodon::API masto(instance, "");
- std::uint16_t ret = masto.register_app1(instance,
- "mastorss",
+ std::uint16_t ret = masto.register_app1("mastorss",
"urn:ietf:wg:oauth:2.0:oob",
"write",
"https://github.com/tastytea/mastorss",
@@ -78,8 +77,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "Insert code: ";
cin >> code;
- masto.register_app2(instance,
- client_id,
+ masto.register_app2(client_id,
client_secret,
"urn:ietf:wg:oauth:2.0:oob",
code,
diff --git a/src/mastorss.cpp b/src/mastorss.cpp
index ef39eab..8ebbc53 100644
--- a/src/mastorss.cpp
+++ b/src/mastorss.cpp
@@ -39,6 +39,8 @@ using std::string;
// Initialize global variables
std::uint16_t max_size = 500;
const string filepath = string(getenv("HOME")) + "/.config/mastorss/";
+pt::ptree config;
+std::string profile;
int main(int argc, char *argv[])
{
@@ -53,29 +55,29 @@ int main(int argc, char *argv[])
max_size = std::stoi(argv[2]);
}
- pt::ptree config;
string instance = "";
string access_token = "";
string feedurl = "";
- const string profile = argv[1];
+ profile = argv[1];
std::uint16_t ret;
-
- read_config(config, profile, instance, access_token, feedurl);
- curlpp_init();
-
string answer;
- string last_entry = config.get(profile + ".last_entry", "");
std::vector entries;
+ read_config(instance, access_token, feedurl);
+ curlpp_init();
+
ret = http_get(feedurl, answer, "mastorss/" + (string)global::version);
if (ret != 0)
{
return ret;
}
- entries = parse_website(profile, answer);
+ entries = parse_website(answer);
+ string last_entry = config.get(profile + ".last_entry", "");
if (last_entry.empty())
{
+ // If no last_entry is stored in the config file,
+ // make last_entry the second-newest entry.
last_entry = entries.at(1);
}
config.put(profile + ".last_entry", entries.front());
@@ -85,6 +87,8 @@ int main(int argc, char *argv[])
{
if (!new_content && (*rit).compare(last_entry) == 0)
{
+ // If the last entry is found in entries,
+ // start tooting in the next loop.
new_content = true;
continue;
}
@@ -113,14 +117,7 @@ int main(int argc, char *argv[])
std::this_thread::sleep_for(std::chrono::seconds(2));
}
- // If the last entry is not in the current feed, set the oldest item as last entry
- // Could lead to spamming if an item gets deleted or changed.
- // Update: It DID couse spamming :-(
- // TODO: Think of something better
- // if (!new_content)
- // {
- // config.put(profile + ".last_entry", entries.at(entries.size() - 1));
- // }
+ // Write the new last_entry only if no error happened.
pt::write_json(filepath + "config-" + profile + ".json", config);
return 0;
diff --git a/src/mastorss.hpp b/src/mastorss.hpp
index ea77c8b..349860d 100644
--- a/src/mastorss.hpp
+++ b/src/mastorss.hpp
@@ -11,10 +11,12 @@ using std::string;
extern std::uint16_t max_size;
extern const string filepath;
+extern pt::ptree config;
+extern std::string profile;
-std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl);
+std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
-std::vector parse_website(const string &profile, const string &xml);
+std::vector parse_website(const string &xml);
void unescape_html(const string &str);
void individual_fixes(string &str);
diff --git a/src/parse.cpp b/src/parse.cpp
index 893f66f..298456d 100644
--- a/src/parse.cpp
+++ b/src/parse.cpp
@@ -64,7 +64,7 @@ void unescape_html(string &str)
str = std::regex_replace(str, reapos, "\'");
}
-std::vector parse_website(const string &profile, const string &xml)
+std::vector parse_website(const string &xml)
{
pt::ptree json;
std::vector watchwords;
@@ -83,6 +83,7 @@ std::vector parse_website(const string &profile, const string &xml)
try
{
+ // Read profile-specific hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child(profile + ".tags"))
{
watchwords.push_back(value.second.data());
@@ -94,6 +95,7 @@ std::vector parse_website(const string &profile, const string &xml)
}
try
{
+ // Read global hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child("global.tags"))
{
watchwords.push_back(value.second.data());
@@ -120,14 +122,29 @@ std::vector parse_website(const string &profile, const string &xml)
string desc = v.second.get_child("description").data();
string str = title + "\n\n" + desc;
- // ANF News puts this always on top, causing us to think it's new
- if (title.compare(0, 35, "Newsticker zu den Angriffen auf Efr") == 0)
+ try
{
- continue;
+ // Skip entries beginning with this text
+ for (const pt::ptree::value_type &v : config.get_child(profile + ".skip"))
+ {
+ const string skip = v.second.data();
+ if (!skip.empty())
+ {
+ if (title.compare(0, skip.length(), skip) == 0)
+ {
+ continue;
+ }
+ }
+ }
+ }
+ catch (const std::exception &e)
+ {
+ // Node not found, no problem
}
unescape_html(str);
+ // Try to turn the HTML into human-readable text
std::regex reparagraph("
");
std::regex recdata1("");
@@ -166,16 +183,19 @@ std::vector parse_website(const string &profile, const string &xml)
return ret;
}
+// Read regular expressions from the config file and delete all matches.
void individual_fixes(string &str)
{
- // de.indymedia.org articles sometimes have CSS in the description
- std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
- // Direkte Action closing
- std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
- // GG/BO closing
- std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
-
- str = std::regex_replace(str, reindyfuckup, "");
- str = std::regex_replace(str, redaclosing, "");
- str = std::regex_replace(str, reggboclosing, "");
+ try
+ {
+ for (const pt::ptree::value_type &v : config.get_child(profile + ".fixes"))
+ {
+ std::regex refix(v.second.data());
+ str = std::regex_replace(str, refix, "");
+ }
+ }
+ catch (const std::exception &e)
+ {
+ // Node not found, no problem
+ }
}