started to refactor a bit
This commit is contained in:
parent
696ea1ff57
commit
8b0ceaf180
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required (VERSION 3.7)
|
cmake_minimum_required (VERSION 3.7)
|
||||||
project (mastorss
|
project (mastorss
|
||||||
VERSION 0.3.5
|
VERSION 0.3.6
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ std::uint16_t read_config(pt::ptree &config, const string &profile, string &inst
|
||||||
|
|
||||||
std::vector<string> parse_website(const string &profile, const string &xml);
|
std::vector<string> parse_website(const string &profile, const string &xml);
|
||||||
void unescape_html(const string &str);
|
void unescape_html(const string &str);
|
||||||
|
void individual_fixes(string &str);
|
||||||
|
|
||||||
const std::uint16_t http_get(const string &feedurl,
|
const std::uint16_t http_get(const string &feedurl,
|
||||||
string &answer, const string &useragent = "");
|
string &answer, const string &useragent = "");
|
||||||
|
|
|
@ -133,20 +133,12 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
||||||
std::regex recdata2("\\]\\]>");
|
std::regex recdata2("\\]\\]>");
|
||||||
std::regex restrip("<[^>]*>");
|
std::regex restrip("<[^>]*>");
|
||||||
|
|
||||||
// de.indymedia.org articles sometimes have CSS in the description
|
individual_fixes(str);
|
||||||
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
|
|
||||||
// Direkte Action closing
|
|
||||||
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
|
|
||||||
// GG/BO closing
|
|
||||||
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
|
|
||||||
|
|
||||||
str = std::regex_replace(str, reparagraph, "\n\n");
|
str = std::regex_replace(str, reparagraph, "\n\n");
|
||||||
str = std::regex_replace(str, recdata1, "");
|
str = std::regex_replace(str, recdata1, "");
|
||||||
str = std::regex_replace(str, recdata2, "");
|
str = std::regex_replace(str, recdata2, "");
|
||||||
str = std::regex_replace(str, restrip, "");
|
str = std::regex_replace(str, restrip, "");
|
||||||
str = std::regex_replace(str, reindyfuckup, "");
|
|
||||||
str = std::regex_replace(str, redaclosing, "");
|
|
||||||
str = std::regex_replace(str, reggboclosing, "");
|
|
||||||
str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines
|
str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines
|
||||||
str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines
|
str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines
|
||||||
|
|
||||||
|
@ -173,3 +165,17 @@ std::vector<string> parse_website(const string &profile, const string &xml)
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void individual_fixes(string &str)
|
||||||
|
{
|
||||||
|
// de.indymedia.org articles sometimes have CSS in the description
|
||||||
|
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
|
||||||
|
// Direkte Action closing
|
||||||
|
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");
|
||||||
|
// GG/BO closing
|
||||||
|
std::regex reggboclosing("Die von den einzelnen AutorInnen .*$");
|
||||||
|
|
||||||
|
str = std::regex_replace(str, reindyfuckup, "");
|
||||||
|
str = std::regex_replace(str, redaclosing, "");
|
||||||
|
str = std::regex_replace(str, reggboclosing, "");
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user