From 3514103229cd995c629edfcc43caa96a3a902c3c Mon Sep 17 00:00:00 2001 From: tastytea Date: Wed, 18 Sep 2019 19:00:00 +0200 Subject: [PATCH 1/4] Include WebExtension in each release. --- .drone.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index bf96efc..9c358f0 100644 --- a/.drone.yml +++ b/.drone.yml @@ -141,11 +141,14 @@ steps: - alias apt-get='rm -f /var/cache/apt/archives/lock && apt-get' - apt-get update -q - apt-get install -qy g++ cmake pkg-config libpoco-dev libxdg-basedir-dev asciidoc catch - - apt-get install -qy build-essential file + - apt-get install -qy build-essential file zip - rm -rf build && mkdir -p build && cd build - cmake -DCMAKE_INSTALL_PREFIX=/usr -DWITH_MOZILLA=YES -DMOZILLA_NMH_DIR="lib/mozilla/native-messaging-hosts" -DWITH_DEB=YES .. - make package - cp -v remwharead_${DRONE_TAG}-0_amd64.deb .. + - cd ../browser-plugins/webextension + - ./build_xpi.sh + - cp -v ../remwharead.xpi ../../ volumes: - name: debian-package-cache path: /var/cache/apt/archives @@ -162,6 +165,7 @@ steps: files: - remwharead_${DRONE_TAG}-0_amd64.deb # - remwharead-${DRONE_TAG}-0.x86_64.rpm + - remwharead.xpi checksum: - sha512 From bb8be8e47e81477e0eea9b88d0c67fa63a6ca249 Mon Sep 17 00:00:00 2001 From: tastytea Date: Fri, 20 Sep 2019 06:29:27 +0200 Subject: [PATCH 2/4] Replaced std::regex with Poco::RegularExpression in URI. --- include/uri.hpp | 2 +- src/lib/uri.cpp | 77 +++++++++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/include/uri.hpp b/include/uri.hpp index 1cc13ef..c2df27e 100644 --- a/include/uri.hpp +++ b/include/uri.hpp @@ -144,7 +144,7 @@ namespace remwharead * * @since 0.6.0 */ - const string unescape_html(const string &html); + const string unescape_html(string html); /*! * @brief Replace newlines with spaces. diff --git a/src/lib/uri.cpp b/src/lib/uri.cpp index 4b06a9c..3953400 100644 --- a/src/lib/uri.cpp +++ b/src/lib/uri.cpp @@ -17,10 +17,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -29,20 +29,17 @@ #include #include #include +#include #include "version.hpp" #include "uri.hpp" namespace remwharead { - using std::regex; - using std::regex_replace; - using std::regex_search; - using std::smatch; - using std::regex_constants::icase; using std::array; using std::istream; using std::unique_ptr; using std::make_unique; + using std::vector; using Poco::Net::HTTPClientSession; using Poco::Net::HTTPSClientSession; using Poco::Net::HTTPRequest; @@ -50,6 +47,7 @@ namespace remwharead using Poco::Net::HTTPMessage; using Poco::StreamCopier; using Poco::Environment; + using RegEx = Poco::RegularExpression; html_extract::operator bool() { @@ -205,12 +203,16 @@ namespace remwharead const string URI::extract_title(const string &html) { - const regex re_htmlfile("\\.(.?html?|xml|rss)$"); - if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) + const RegEx re_htmlfile(".*\\.(.?html?|xml|rss)$", RegEx::RE_CASELESS); + if (_uri.substr(0, 4) == "http" || re_htmlfile.match(_uri)) { - smatch match; - regex_search(html, match, regex("([^<]+)", icase)); - return remove_newlines(unescape_html(match[1].str())); + const RegEx re_title("<title>([^<]+)", RegEx::RE_CASELESS); + vector<string> matches; + re_title.split(html, matches); + if (matches.size() >= 2) + { + return remove_newlines(unescape_html(matches[1])); + } } return ""; @@ -218,13 +220,17 @@ namespace remwharead const string URI::extract_description(const string &html) { - const regex re_htmlfile("\\.(.?html?|xml|rss)$"); - if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile)) + const RegEx re_htmlfile(".*\\.(.?html?|xml|rss)$", RegEx::RE_CASELESS); + if (_uri.substr(0, 4) == "http" || re_htmlfile.match(_uri)) { - smatch match; - const regex re("description\"[^>]+content=\"([^\"]+)", icase); - regex_search(html, match, re); - return remove_newlines(strip_html(match[1].str())); + const RegEx re_desc("description\"[^>]+content=\"([^\"]+)", + RegEx::RE_CASELESS); + vector<string> matches; + re_desc.split(html, matches); + if (matches.size() >= 2) + { + return remove_newlines(unescape_html(matches[1])); + } } return ""; @@ -244,8 +250,9 @@ namespace remwharead out.replace(pos, 1, ""); } - out = regex_replace(out, regex("\\s+\n"), "\n"); // Remove space at eol. - out = regex_replace(out, regex("\n{2,}"), "\n"); // Reduce newlines. + // Remove whitespace at eol. + RegEx("\\s+\n").subst(out, "\n", RegEx::RE_GLOBAL); + RegEx("\n{2,}").subst(out, "\n", RegEx::RE_GLOBAL); // Reduce newlines. return unescape_html(out); } @@ -288,32 +295,32 @@ namespace remwharead return out; } - const string URI::unescape_html(const string &html) + const string URI::unescape_html(string html) { - string buffer = html; - string output; - // Used to convert int to utf-8 char. std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c; - regex re_entity("&#(x)?([[:alnum:]]{1,8});"); - smatch match; + const RegEx re_entity("&#(x)?([[:alnum:]]{1,8});"); + RegEx::MatchVec matches; + string::size_type pos = 0; - while (regex_search(buffer, match, re_entity)) + while (re_entity.match(html, pos, matches) != 0) { char32_t codepoint = 0; + const string number = html.substr(matches[2].offset, + matches[2].length); // 'x' in front of the number means it's hexadecimal, else decimal. - if (match[1].length() == 1) + if (matches[1].length != 0) { - codepoint = std::stoi(match[2].str(), nullptr, 16); + codepoint = std::stoi(number, nullptr, 16); } else { - codepoint = std::stoi(match[2].str(), nullptr, 10); + codepoint = std::stoi(number, nullptr, 10); } - output += match.prefix().str() + u8c.to_bytes(codepoint); - buffer = match.suffix().str(); + const string unicode = u8c.to_bytes(codepoint); + html.replace(matches[0].offset, matches[0].length, unicode); + pos = matches[0].offset + unicode.length(); } - output += buffer; // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_ // entity_references#Character_entity_references_in_HTML @@ -581,11 +588,11 @@ namespace remwharead for (auto &pair : names) { - const regex re('&' + pair.first + ';'); - output = regex_replace(output, re, u8c.to_bytes(pair.second)); + const RegEx re('&' + pair.first + ';'); + re.subst(html, u8c.to_bytes(pair.second), RegEx::RE_GLOBAL); } - return output; + return html; } const archive_answer URI::archive() From 247f49296ed2700c2513c221ca06fc88bbcef14d Mon Sep 17 00:00:00 2001 From: tastytea <tastytea@tastytea.de> Date: Fri, 20 Sep 2019 17:49:03 +0200 Subject: [PATCH 3/4] Replaced std::regex with Poco::RegularExpression in searches. --- src/lib/search.cpp | 47 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/lib/search.cpp b/src/lib/search.cpp index 3999781..e620c07 100644 --- a/src/lib/search.cpp +++ b/src/lib/search.cpp @@ -14,7 +14,6 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <regex> #include <algorithm> #include <locale> #include <list> @@ -22,18 +21,17 @@ #include <utility> #include <iterator> #include <Poco/UTF8String.h> +#include <Poco/RegularExpression.h> #include "search.hpp" namespace remwharead { using std::list; - using std::regex; - using std::regex_search; - using std::smatch; using std::find; using std::find_if; using std::thread; using std::move; + using RegEx = Poco::RegularExpression; Search::Search(const list<Database::entry> &entries) :_entries(entries) @@ -43,30 +41,37 @@ namespace remwharead const { vector<vector<string>> searchlist; - const regex re_or("(.+?) (OR|\\|\\|) "); - const regex re_and("(.+?) (AND|&&) "); - smatch match; + const RegEx re_or("(.+?) (OR|\\|\\|) "); + const RegEx re_and("(.+?) (AND|&&) "); + RegEx::MatchVec matches; + string::size_type pos = 0; vector<string> subexpressions; { // Split expression at OR. - while (regex_search(expression, match, re_or)) + while (re_or.match(expression, pos, matches) != 0) { - subexpressions.push_back(match[1].str()); - expression = match.suffix().str(); + const string &subexpr = expression.substr(matches[1].offset, + matches[1].length); + subexpressions.push_back(subexpr); + pos = matches[0].offset + matches[0].length; } - subexpressions.push_back(expression); + subexpressions.push_back(expression.substr(pos)); } { for (string sub : subexpressions) { // Split each OR-slice at AND. vector<string> terms; - while (regex_search(sub, match, re_and)) + pos = 0; + + while (re_and.match(sub, pos, matches) != 0) { - terms.push_back(to_lowercase(match[1].str())); - sub = match.suffix().str(); + const string &term = sub.substr(matches[1].offset, + matches[1].length); + terms.push_back(to_lowercase(term)); + pos = matches[0].offset + matches[0].length; } - terms.push_back(to_lowercase(sub)); + terms.push_back(to_lowercase(sub.substr(pos))); searchlist.push_back(terms); } } @@ -100,8 +105,8 @@ namespace remwharead s = to_lowercase(s); if (is_re) { - const regex re("^" + tag + "$"); - return regex_search(s, re); + const RegEx re("^" + tag + "$"); + return (re == s); } else { @@ -154,19 +159,19 @@ namespace remwharead // Set matched_* to false if term is not found. if (is_re) { - const regex re(term); + const RegEx re(term); - if(!regex_search(title, re)) + if (!(re == title)) { matched_title = false; } - if(!regex_search(description, re)) + if (!(re == description)) { matched_description = false; } - if(!regex_search(fulltext, re)) + if (!(re == fulltext)) { matched_fulltext = false; } From 3424a9009ff609b4d2ecd05f94e0146e36210fb1 Mon Sep 17 00:00:00 2001 From: tastytea <tastytea@tastytea.de> Date: Fri, 20 Sep 2019 17:52:54 +0200 Subject: [PATCH 4/4] Removed unused regex include from asciidoc export. --- src/lib/export/adoc.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/lib/export/adoc.cpp b/src/lib/export/adoc.cpp index 317d8ea..43f7178 100644 --- a/src/lib/export/adoc.cpp +++ b/src/lib/export/adoc.cpp @@ -16,7 +16,6 @@ #include <iostream> #include <string> -#include <regex> #include <algorithm> #include <utility> #include <locale> @@ -29,8 +28,6 @@ namespace remwharead using std::string; using std::cerr; using std::endl; - using std::regex; - using std::regex_replace; using tagpair = std::pair<string,list<Database::entry>>; void Export::AsciiDoc::print() const