From b25a09608f07065b4541f5403c88d085a1515495 Mon Sep 17 00:00:00 2001 From: tastytea Date: Wed, 2 May 2018 20:17:55 +0200 Subject: [PATCH] bugfix: NO-BREAK SPACE: confused unicode representation with UTF-8 hex representation --- CMakeLists.txt | 2 +- src/parse.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e8b23a3..ea41774 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 3.7) project (mastorss - VERSION 0.5.13 + VERSION 0.5.14 LANGUAGES CXX ) diff --git a/src/parse.cpp b/src/parse.cpp index 0ef7567..a7c27cb 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -166,7 +166,8 @@ std::vector parse_website(const string &xml) str = std::regex_replace(str, recdata2, ""); str = std::regex_replace(str, restrip, ""); str = std::regex_replace(str, std::regex("\\r"), ""); // remove \r - str = std::regex_replace(str, std::regex("\uc2a0"), " "); // replace NO-BREAK SPACE with space + // replace NO-BREAK SPACE with space (UTF-8: 0xc2a0) + str = std::regex_replace(str, std::regex("\u00a0"), " "); str = std::regex_replace(str, std::regex("\\n[ \t]+\\n"), ""); // remove whitespace between newlines str = std::regex_replace(str, std::regex("\\n{3,}"), "\n\n"); // remove excess newlines