started to refactor a bit

This commit is contained in:
tastytea 2018-03-11 15:40:25 +01:00
parent 696ea1ff57
commit 8b0ceaf180
Signed by: tastytea
GPG Key ID: 59346E0EA35C67E5
3 changed files with 17 additions and 10 deletions

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.7)
project (mastorss
VERSION 0.3.5
VERSION 0.3.6
LANGUAGES CXX
)

View File

@ -16,6 +16,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
std::vector<string> parse_website(const string &profile, const string &xml);
void unescape_html(const string &str);
void individual_fixes(string &str);
const std::uint16_t http_get(const string &feedurl,
string &answer, const string &useragent = "");

View File

@ -133,20 +133,12 @@ std::vector<string> parse_website(const string &profile, const string &xml)
std::regex recdata2("\\]\\]>");
std::regex restrip("<[^>]*>");
// de.indymedia.org articles sometimes have CSS in the description
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
// Direkte Action closing
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
// GG/BO closing
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
individual_fixes(str);
str = std::regex_replace(str, reparagraph, "\n\n");
str = std::regex_replace(str, recdata1, "");
str = std::regex_replace(str, recdata2, "");
str = std::regex_replace(str, restrip, "");
str = std::regex_replace(str, reindyfuckup, "");
str = std::regex_replace(str, redaclosing, "");
str = std::regex_replace(str, reggboclosing, "");
str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines
str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines
@ -173,3 +165,17 @@ std::vector<string> parse_website(const string &profile, const string &xml)
return ret;
}
void individual_fixes(string &str)
{
// de.indymedia.org articles sometimes have CSS in the description
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
// Direkte Action closing
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
// GG/BO closing
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
str = std::regex_replace(str, reindyfuckup, "");
str = std::regex_replace(str, redaclosing, "");
str = std::regex_replace(str, reggboclosing, "");
}