refactoring
This commit is contained in:
parent
dd47092bbb
commit
7ef6c5fad7
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required (VERSION 3.7)
|
||||
project (mastorss
|
||||
VERSION 0.3.7
|
||||
VERSION 0.4.0
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
|
|
25
README.md
25
README.md
|
@ -1,7 +1,9 @@
|
|||
**mastorss** dumps RSS feeds into a mastodon account.
|
||||
It is hacked together and generally only extended/fixed when it fails.
|
||||
Do NOT assume it follows any standards.
|
||||
Use at your own risk.
|
||||
Supports RSS 2.0 but not RSS 0.92. Does not support Atom at the moment.
|
||||
|
||||
<item>s in feeds must have <link>, <title> and <description>.
|
||||
|
||||
The documentation is far from complete, sorry.
|
||||
|
||||
# Install
|
||||
|
||||
|
@ -13,7 +15,7 @@ Use at your own risk.
|
|||
* [boost](http://www.boost.org/) (tested: 1.63.0)
|
||||
* [libcurl](https://curl.haxx.se/) (tested: 7.58.0)
|
||||
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
|
||||
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.2.13)
|
||||
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.6.4)
|
||||
|
||||
## Get sourcecode
|
||||
|
||||
|
@ -28,9 +30,7 @@ Use at your own risk.
|
|||
cmake ..
|
||||
make
|
||||
|
||||
cmake options:
|
||||
|
||||
* `-DCMAKE_BUILD_TYPE=Debug` for a debug build
|
||||
## Install
|
||||
|
||||
Install with `make install`.
|
||||
|
||||
|
@ -53,7 +53,16 @@ ${HOME}/.config/mastorss/config-example.json
|
|||
"feedurl": "https:\/\/example.com\/feed.rss",
|
||||
"access_token": "123abc",
|
||||
"max_size": "400",
|
||||
"last_entry": "Example\n\nThis is an example.\n\nhttps:\/\/example.com\/12345.html\n\n#bot"
|
||||
"skip":
|
||||
[
|
||||
"If the entry starts with this, skip it",
|
||||
"Skip me too!"
|
||||
],
|
||||
"fixes":
|
||||
[
|
||||
"delete this",
|
||||
"[Rr]ead more(\.{3}|…)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ using std::cerr;
|
|||
using std::cin;
|
||||
using std::string;
|
||||
|
||||
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl)
|
||||
std::uint16_t read_config(string &instance, string &access_token, string &feedurl)
|
||||
{
|
||||
bool config_changed = false;
|
||||
|
||||
|
@ -63,8 +63,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
|
|||
cout << "No access token found.\n";
|
||||
string client_id, client_secret, url;
|
||||
Mastodon::API masto(instance, "");
|
||||
std::uint16_t ret = masto.register_app1(instance,
|
||||
"mastorss",
|
||||
std::uint16_t ret = masto.register_app1("mastorss",
|
||||
"urn:ietf:wg:oauth:2.0:oob",
|
||||
"write",
|
||||
"https://github.com/tastytea/mastorss",
|
||||
|
@ -78,8 +77,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
|
|||
cout << "Insert code: ";
|
||||
cin >> code;
|
||||
|
||||
masto.register_app2(instance,
|
||||
client_id,
|
||||
masto.register_app2(client_id,
|
||||
client_secret,
|
||||
"urn:ietf:wg:oauth:2.0:oob",
|
||||
code,
|
||||
|
|
|
@ -39,6 +39,8 @@ using std::string;
|
|||
// Initialize global variables
|
||||
std::uint16_t max_size = 500;
|
||||
const string filepath = string(getenv("HOME")) + "/.config/mastorss/";
|
||||
pt::ptree config;
|
||||
std::string profile;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
@ -53,29 +55,29 @@ int main(int argc, char *argv[])
|
|||
max_size = std::stoi(argv[2]);
|
||||
}
|
||||
|
||||
pt::ptree config;
|
||||
string instance = "";
|
||||
string access_token = "";
|
||||
string feedurl = "";
|
||||
const string profile = argv[1];
|
||||
profile = argv[1];
|
||||
std::uint16_t ret;
|
||||
|
||||
read_config(config, profile, instance, access_token, feedurl);
|
||||
curlpp_init();
|
||||
|
||||
string answer;
|
||||
string last_entry = config.get(profile + ".last_entry", "");
|
||||
std::vector<string> entries;
|
||||
|
||||
read_config(instance, access_token, feedurl);
|
||||
curlpp_init();
|
||||
|
||||
ret = http_get(feedurl, answer, "mastorss/" + (string)global::version);
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
entries = parse_website(profile, answer);
|
||||
entries = parse_website(answer);
|
||||
|
||||
string last_entry = config.get(profile + ".last_entry", "");
|
||||
if (last_entry.empty())
|
||||
{
|
||||
// If no last_entry is stored in the config file,
|
||||
// make last_entry the second-newest entry.
|
||||
last_entry = entries.at(1);
|
||||
}
|
||||
config.put(profile + ".last_entry", entries.front());
|
||||
|
@ -85,6 +87,8 @@ int main(int argc, char *argv[])
|
|||
{
|
||||
if (!new_content && (*rit).compare(last_entry) == 0)
|
||||
{
|
||||
// If the last entry is found in entries,
|
||||
// start tooting in the next loop.
|
||||
new_content = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -113,14 +117,7 @@ int main(int argc, char *argv[])
|
|||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
}
|
||||
|
||||
// If the last entry is not in the current feed, set the oldest item as last entry
|
||||
// Could lead to spamming if an item gets deleted or changed.
|
||||
// Update: It DID couse spamming :-(
|
||||
// TODO: Think of something better
|
||||
// if (!new_content)
|
||||
// {
|
||||
// config.put(profile + ".last_entry", entries.at(entries.size() - 1));
|
||||
// }
|
||||
// Write the new last_entry only if no error happened.
|
||||
pt::write_json(filepath + "config-" + profile + ".json", config);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -11,10 +11,12 @@ using std::string;
|
|||
|
||||
extern std::uint16_t max_size;
|
||||
extern const string filepath;
|
||||
extern pt::ptree config;
|
||||
extern std::string profile;
|
||||
|
||||
std::uint16_t read_config(pt::ptree &config, const string &profile, string &instance, string &access_token, string &feedurl);
|
||||
std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
|
||||
|
||||
std::vector<string> parse_website(const string &profile, const string &xml);
|
||||
std::vector<string> parse_website(const string &xml);
|
||||
void unescape_html(const string &str);
|
||||
void individual_fixes(string &str);
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ void unescape_html(string &str)
|
|||
str = std::regex_replace(str, reapos, "\'");
|
||||
}
|
||||
|
||||
std::vector<string> parse_website(const string &profile, const string &xml)
|
||||
std::vector<string> parse_website(const string &xml)
|
||||
{
|
||||
pt::ptree json;
|
||||
std::vector<string> watchwords;
|
||||
|
@ -83,6 +83,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
|
||||
try
|
||||
{
|
||||
// Read profile-specific hashtags or fail silently
|
||||
for (const pt::ptree::value_type &value : json.get_child(profile + ".tags"))
|
||||
{
|
||||
watchwords.push_back(value.second.data());
|
||||
|
@ -94,6 +95,7 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
}
|
||||
try
|
||||
{
|
||||
// Read global hashtags or fail silently
|
||||
for (const pt::ptree::value_type &value : json.get_child("global.tags"))
|
||||
{
|
||||
watchwords.push_back(value.second.data());
|
||||
|
@ -120,14 +122,29 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
string desc = v.second.get_child("description").data();
|
||||
string str = title + "\n\n" + desc;
|
||||
|
||||
// ANF News puts this always on top, causing us to think it's new
|
||||
if (title.compare(0, 35, "Newsticker zu den Angriffen auf Efr") == 0)
|
||||
try
|
||||
{
|
||||
continue;
|
||||
// Skip entries beginning with this text
|
||||
for (const pt::ptree::value_type &v : config.get_child(profile + ".skip"))
|
||||
{
|
||||
const string skip = v.second.data();
|
||||
if (!skip.empty())
|
||||
{
|
||||
if (title.compare(0, skip.length(), skip) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
// Node not found, no problem
|
||||
}
|
||||
|
||||
unescape_html(str);
|
||||
|
||||
// Try to turn the HTML into human-readable text
|
||||
std::regex reparagraph("</p><p>");
|
||||
std::regex recdata1("<!\\[CDATA\\[");
|
||||
std::regex recdata2("\\]\\]>");
|
||||
|
@ -166,16 +183,19 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
return ret;
|
||||
}
|
||||
|
||||
// Read regular expressions from the config file and delete all matches.
|
||||
void individual_fixes(string &str)
|
||||
{
|
||||
// de.indymedia.org articles sometimes have CSS in the description
|
||||
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
|
||||
// Direkte Action closing
|
||||
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
|
||||
// GG/BO closing
|
||||
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
|
||||
|
||||
str = std::regex_replace(str, reindyfuckup, "");
|
||||
str = std::regex_replace(str, redaclosing, "");
|
||||
str = std::regex_replace(str, reggboclosing, "");
|
||||
try
|
||||
{
|
||||
for (const pt::ptree::value_type &v : config.get_child(profile + ".fixes"))
|
||||
{
|
||||
std::regex refix(v.second.data());
|
||||
str = std::regex_replace(str, refix, "");
|
||||
}
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
// Node not found, no problem
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user