started to refactor a bit
This commit is contained in:
parent
696ea1ff57
commit
8b0ceaf180
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required (VERSION 3.7)
|
||||
project (mastorss
|
||||
VERSION 0.3.5
|
||||
VERSION 0.3.6
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
|
|||
|
||||
std::vector<string> parse_website(const string &profile, const string &xml);
|
||||
void unescape_html(const string &str);
|
||||
void individual_fixes(string &str);
|
||||
|
||||
const std::uint16_t http_get(const string &feedurl,
|
||||
string &answer, const string &useragent = "");
|
||||
|
|
|
@ -133,20 +133,12 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
std::regex recdata2("\\]\\]>");
|
||||
std::regex restrip("<[^>]*>");
|
||||
|
||||
// de.indymedia.org articles sometimes have CSS in the description
|
||||
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
|
||||
// Direkte Action closing
|
||||
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
|
||||
// GG/BO closing
|
||||
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
|
||||
individual_fixes(str);
|
||||
|
||||
str = std::regex_replace(str, reparagraph, "\n\n");
|
||||
str = std::regex_replace(str, recdata1, "");
|
||||
str = std::regex_replace(str, recdata2, "");
|
||||
str = std::regex_replace(str, restrip, "");
|
||||
str = std::regex_replace(str, reindyfuckup, "");
|
||||
str = std::regex_replace(str, redaclosing, "");
|
||||
str = std::regex_replace(str, reggboclosing, "");
|
||||
str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines
|
||||
str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines
|
||||
|
||||
|
@ -173,3 +165,17 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void individual_fixes(string &str)
|
||||
{
|
||||
// de.indymedia.org articles sometimes have CSS in the description
|
||||
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
|
||||
// Direkte Action closing
|
||||
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
|
||||
// GG/BO closing
|
||||
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
|
||||
|
||||
str = std::regex_replace(str, reindyfuckup, "");
|
||||
str = std::regex_replace(str, redaclosing, "");
|
||||
str = std::regex_replace(str, reggboclosing, "");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user