diff --git a/CMakeLists.txt b/CMakeLists.txt index 0594b4d..a3c9a1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 3.7) project (mastorss - VERSION 0.3.5 + VERSION 0.3.6 LANGUAGES CXX ) diff --git a/src/mastorss.hpp b/src/mastorss.hpp index 584d95d..ea77c8b 100644 --- a/src/mastorss.hpp +++ b/src/mastorss.hpp @@ -16,6 +16,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst std::vector parse_website(const string &profile, const string &xml); void unescape_html(const string &str); +void individual_fixes(string &str); const std::uint16_t http_get(const string &feedurl, string &answer, const string &useragent = ""); diff --git a/src/parse.cpp b/src/parse.cpp index 438ce4f..893f66f 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -133,20 +133,12 @@ std::vector parse_website(const string &profile, const string &xml) std::regex recdata2("\\]\\]>"); std::regex restrip("<[^>]*>"); - // de.indymedia.org articles sometimes have CSS in the description - std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$"); - // Direkte Action closing - std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion."); - // GG/BO closing - std::regex reggboclosing("Die von den einzelnen AutorInnen .*$"); + individual_fixes(str); str = std::regex_replace(str, reparagraph, "\n\n"); str = std::regex_replace(str, recdata1, ""); str = std::regex_replace(str, recdata2, ""); str = std::regex_replace(str, restrip, ""); - str = std::regex_replace(str, reindyfuckup, ""); - str = std::regex_replace(str, redaclosing, ""); - str = std::regex_replace(str, reggboclosing, ""); str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines @@ -173,3 +165,17 @@ std::vector parse_website(const string &profile, const string &xml) return ret; } + +void individual_fixes(string &str) +{ + // de.indymedia.org articles sometimes have CSS in the description + std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$"); + // Direkte Action closing + std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion."); + // GG/BO closing + std::regex reggboclosing("Die von den einzelnen AutorInnen .*$"); + + str = std::regex_replace(str, reindyfuckup, ""); + str = std::regex_replace(str, redaclosing, ""); + str = std::regex_replace(str, reggboclosing, ""); +}