refactoring

This commit is contained in:
tastytea 2018-03-15 13:20:26 +01:00
parent dd47092bbb
commit 7ef6c5fad7
Signed by: tastytea
GPG Key ID: 59346E0EA35C67E5
6 changed files with 72 additions and 46 deletions

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.7)
project (mastorss
VERSION 0.3.7
VERSION 0.4.0
LANGUAGES CXX
)

View File

@ -1,7 +1,9 @@
**mastorss** dumps RSS feeds into a mastodon account.
It is hacked together and generally only extended/fixed when it fails.
Do NOT assume it follows any standards.
Use at your own risk.
Supports RSS 2.0 but not RSS 0.92. Does not support Atom at the moment.
<item>s in feeds must have <link>, <title> and <description>.
The documentation is far from complete, sorry.
# Install
@ -13,7 +15,7 @@ Use at your own risk.
* [boost](http://www.boost.org/) (tested: 1.63.0)
* [libcurl](https://curl.haxx.se/) (tested: 7.58.0)
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.2.13)
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.6.4)
## Get sourcecode
@ -28,9 +30,7 @@ Use at your own risk.
cmake ..
make
cmake options:
* `-DCMAKE_BUILD_TYPE=Debug` for a debug build
## Install
Install with `make install`.
@ -53,7 +53,16 @@ ${HOME}/.config/mastorss/config-example.json
"feedurl": "https:\/\/example.com\/feed.rss",
"access_token": "123abc",
"max_size": "400",
"last_entry": "Example\n\nThis is an example.\n\nhttps:\/\/example.com\/12345.html\n\n#bot"
"skip":
[
"If the entry starts with this, skip it",
"Skip me too!"
],
"fixes":
[
"delete this",
"[Rr]ead more(\.{3}|…)"
]
}
}

View File

@ -31,7 +31,7 @@ using std::cerr;
using std::cin;
using std::string;
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl)
std::uint16_t read_config(string &instance, string &access_token, string &feedurl)
{
bool config_changed = false;
@ -63,8 +63,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "No access token found.\n";
string client_id, client_secret, url;
Mastodon::API masto(instance, "");
std::uint16_t ret = masto.register_app1(instance,
"mastorss",
std::uint16_t ret = masto.register_app1("mastorss",
"urn:ietf:wg:oauth:2.0:oob",
"write",
"https://github.com/tastytea/mastorss",
@ -78,8 +77,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
cout << "Insert code: ";
cin >> code;
masto.register_app2(instance,
client_id,
masto.register_app2(client_id,
client_secret,
"urn:ietf:wg:oauth:2.0:oob",
code,

View File

@ -39,6 +39,8 @@ using std::string;
// Initialize global variables
std::uint16_t max_size = 500;
const string filepath = string(getenv("HOME")) + "/.config/mastorss/";
pt::ptree config;
std::string profile;
int main(int argc, char *argv[])
{
@ -53,29 +55,29 @@ int main(int argc, char *argv[])
max_size = std::stoi(argv[2]);
}
pt::ptree config;
string instance = "";
string access_token = "";
string feedurl = "";
const string profile = argv[1];
profile = argv[1];
std::uint16_t ret;
read_config(config, profile, instance, access_token, feedurl);
curlpp_init();
string answer;
string last_entry = config.get(profile + ".last_entry", "");
std::vector<string> entries;
read_config(instance, access_token, feedurl);
curlpp_init();
ret = http_get(feedurl, answer, "mastorss/" + (string)global::version);
if (ret != 0)
{
return ret;
}
entries = parse_website(profile, answer);
entries = parse_website(answer);
string last_entry = config.get(profile + ".last_entry", "");
if (last_entry.empty())
{
// If no last_entry is stored in the config file,
// make last_entry the second-newest entry.
last_entry = entries.at(1);
}
config.put(profile + ".last_entry", entries.front());
@ -85,6 +87,8 @@ int main(int argc, char *argv[])
{
if (!new_content && (*rit).compare(last_entry) == 0)
{
// If the last entry is found in entries,
// start tooting in the next loop.
new_content = true;
continue;
}
@ -113,14 +117,7 @@ int main(int argc, char *argv[])
std::this_thread::sleep_for(std::chrono::seconds(2));
}
// If the last entry is not in the current feed, set the oldest item as last entry
// Could lead to spamming if an item gets deleted or changed.
// Update: It DID couse spamming :-(
// TODO: Think of something better
// if (!new_content)
// {
// config.put(profile + ".last_entry", entries.at(entries.size() - 1));
// }
// Write the new last_entry only if no error happened.
pt::write_json(filepath + "config-" + profile + ".json", config);
return 0;

View File

@ -11,10 +11,12 @@ using std::string;
extern std::uint16_t max_size;
extern const string filepath;
extern pt::ptree config;
extern std::string profile;
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl);
std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
std::vector<string> parse_website(const string &profile, const string &xml);
std::vector<string> parse_website(const string &xml);
void unescape_html(const string &str);
void individual_fixes(string &str);

View File

@ -64,7 +64,7 @@ void unescape_html(string &str)
str = std::regex_replace(str, reapos, "\'");
}
std::vector<string> parse_website(const string &profile, const string &xml)
std::vector<string> parse_website(const string &xml)
{
pt::ptree json;
std::vector<string> watchwords;
@ -83,6 +83,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
try
{
// Read profile-specific hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child(profile + ".tags"))
{
watchwords.push_back(value.second.data());
@ -94,6 +95,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
}
try
{
// Read global hashtags or fail silently
for (const pt::ptree::value_type &value : json.get_child("global.tags"))
{
watchwords.push_back(value.second.data());
@ -120,14 +122,29 @@ std::vector<string> parse_website(const string &profile, const string &xml)
string desc = v.second.get_child("description").data();
string str = title + "\n\n" + desc;
// ANF News puts this always on top, causing us to think it's new
if (title.compare(0, 35, "Newsticker zu den Angriffen auf Efr") == 0)
try
{
continue;
// Skip entries beginning with this text
for (const pt::ptree::value_type &v : config.get_child(profile + ".skip"))
{
const string skip = v.second.data();
if (!skip.empty())
{
if (title.compare(0, skip.length(), skip) == 0)
{
continue;
}
}
}
}
catch (const std::exception &e)
{
// Node not found, no problem
}
unescape_html(str);
// Try to turn the HTML into human-readable text
std::regex reparagraph("</p><p>");
std::regex recdata1("<!\\[CDATA\\[");
std::regex recdata2("\\]\\]>");
@ -166,16 +183,19 @@ std::vector<string> parse_website(const string &profile, const string &xml)
return ret;
}
// Read regular expressions from the config file and delete all matches.
void individual_fixes(string &str)
{
// de.indymedia.org articles sometimes have CSS in the description
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
// Direkte Action closing
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
// GG/BO closing
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
str = std::regex_replace(str, reindyfuckup, "");
str = std::regex_replace(str, redaclosing, "");
str = std::regex_replace(str, reggboclosing, "");
try
{
for (const pt::ptree::value_type &v : config.get_child(profile + ".fixes"))
{
std::regex refix(v.second.data());
str = std::regex_replace(str, refix, "");
}
}
catch (const std::exception &e)
{
// Node not found, no problem
}
}