Added special case: ANF News

This commit is contained in:
tastytea 2018-02-20 23:29:55 +01:00
parent 7943ea5b9a
commit 52acbd90f6
Signed by: tastytea
GPG Key ID: 59346E0EA35C67E5
2 changed files with 15 additions and 4 deletions

View File

@ -1,8 +1,10 @@
cmake_minimum_required (VERSION 3.7)
include(GNUInstallDirs)
include(FindCURL)
include(FindBoost)
project (mastorss
VERSION 0.2.5
VERSION 0.2.6
LANGUAGES CXX
)
@ -18,9 +20,10 @@ configure_file (
"${PROJECT_BINARY_DIR}/version.hpp"
)
include(FindCURL)
find_package(CURL REQUIRED)
find_package(Boost REQUIRED COMPONENTS system filesystem)
add_executable(mastorss src/mastorss.cpp src/http.cpp src/config.cpp src/parse.cpp)
target_link_libraries(mastorss mastodon-cpp boost_system boost_filesystem ssl crypto ${CURL_LIBRARIES} curlpp)
file(GLOB sources src/*.cpp)
add_executable(mastorss ${sources})
target_link_libraries(mastorss mastodon-cpp ${Boost_LIBRARIES} ssl crypto ${CURL_LIBRARIES} curlpp)
install(TARGETS mastorss DESTINATION ${CMAKE_INSTALL_BINDIR})

View File

@ -88,6 +88,12 @@ std::vector<string> parse_website(const string &profile, const string &xml)
string desc = v.second.get_child("description").data();
string str = title + "\n\n" + desc;
// ANF News puts this always on top, causing us to think it's new
if (title.compare("Newsticker zu den Angriffen auf Efrîn") == 0)
{
continue;
}
// Some feeds contain encoded xhtml-tags >:|
std::regex relt("&lt;");
std::regex regt("&gt;");
@ -95,6 +101,8 @@ std::vector<string> parse_website(const string &profile, const string &xml)
std::regex recdata1("<!\\[CDATA\\[");
std::regex recdata2("\\]\\]>");
std::regex restrip("<[^>]*>");
// de.indymedia.org articles sometimes have CSS in the description
std::regex reindyfuckup("\\/\\* Style Definitions \\*\\/[.[:space:]]*$");
// Direkte Action closing
std::regex redaclosing("Der Beitrag .* erschien zuerst auf Direkte Aktion.");