From f9563cddcdb33432c8c5445e7f43108becba3728 Mon Sep 17 00:00:00 2001 From: tastytea Date: Fri, 17 May 2019 05:43:17 +0200 Subject: [PATCH] Replaced regular expressions in with find & replace, where possible. --- CMakeLists.txt | 2 +- src/uri.cpp | 50 +++++++++++++++++++++++++++++++++++++------------- src/uri.hpp | 3 ++- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f5e72b..2973673 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 3.2) project(remwharead - VERSION 0.1.1 + VERSION 0.1.2 LANGUAGES CXX ) diff --git a/src/uri.cpp b/src/uri.cpp index 71eea6c..db7d5c1 100644 --- a/src/uri.cpp +++ b/src/uri.cpp @@ -98,29 +98,53 @@ const string URI::strip_html(const string &html) { string out; - out = regex_replace(html, regex("', startpos); - out += html.substr(pos, startpos - pos); - pos = endpos; - if (pos != std::string::npos) + size_t pos = 0; + while (pos != std::string::npos) { - ++pos; + size_t startpos = html.find('<', pos); + size_t endpos = html.find('>', startpos); + out += html.substr(pos, startpos - pos); + pos = endpos; + if (pos != std::string::npos) + { + ++pos; + } + } + } + else + { + size_t pos = 0; + out = html; + while ((pos = out.find("<" + tag)) != std::string::npos) + { + size_t endpos = out.find(" + out.replace(pos, endpos - pos, ""); } } diff --git a/src/uri.hpp b/src/uri.hpp index 04caec6..3217013 100644 --- a/src/uri.hpp +++ b/src/uri.hpp @@ -44,7 +44,8 @@ protected: const string extract_title(const string &html); const string extract_description(const string &html); const string strip_html(const string &html); - const string remove_html_tags(const string &html); + //! Remove all HTML tags. If tag is not empty, remove tag and its content. + const string remove_html_tags(const string &html, const string &tag = ""); const string unescape_html(const string &html); const string remove_newlines(const string &text); };