better regular expressions for cleaning
This commit is contained in:
parent
b7616e3f44
commit
e887cbbf4b
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required (VERSION 3.7)
|
||||
project (mastorss
|
||||
VERSION 0.5.7
|
||||
VERSION 0.5.8
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
|
|
|
@ -155,9 +155,10 @@ std::vector<string> parse_website(const string &xml)
|
|||
str = std::regex_replace(str, recdata1, "");
|
||||
str = std::regex_replace(str, recdata2, "");
|
||||
str = std::regex_replace(str, restrip, "");
|
||||
str = std::regex_replace(str, std::regex("[\\r\\n] +[\\r\\n]"), "\n\n"); // remove space between newlines
|
||||
str = std::regex_replace(str, std::regex("[\\r\\n]{3,}"), "\n"); // remove excess newlines
|
||||
str = std::regex_replace(str, std::regex("^[ \t]*$"), ""); // remove nothing
|
||||
str = std::regex_replace(str, std::regex("\\r"), ""); // remove \r
|
||||
str = std::regex_replace(str, std::regex("\\n +\\n"), "\n\n"); // remove space between newlines
|
||||
str = std::regex_replace(str, std::regex("\\n{3,}"), "\n\n"); // remove excess newlines
|
||||
str = std::regex_replace(str, std::regex("\\n[ \t]*\\n"), ""); // remove nothing
|
||||
|
||||
for (const string &hashtag : watchwords)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue
Block a user