From 21fe64b59cd30ae4e3263a2c7731b39b15703c5f Mon Sep 17 00:00:00 2001 From: tastytea Date: Thu, 25 Jul 2019 02:57:10 +0200 Subject: [PATCH 1/2] Fixed to_lowercase(). Converting some text to lowercase caused range errors. I don't really know what I'm doing here, so I replaced the code with this answer from StackOverflow: . It works now, let's hope it stays that way. --- src/search.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index a067427..4c0c8a7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -62,12 +62,12 @@ const vector> parse_expression(string expression) const string to_lowercase(const string &str) { - std::wstring_convert> converter; - std::wstring in = converter.from_bytes(str); - std::wstring out; - - std::transform(in.begin(), in.end(), std::back_inserter(out), ::towlower); - return converter.to_bytes(out); + string out; + std::locale loc(""); + const std::ctype& ct = std::use_facet>(loc); + std::transform(str.begin(), str.end(), std::back_inserter(out), + std::bind1st(std::mem_fun(&std::ctype::tolower), &ct)); + return out; } const vector From d6dd8d4e165efadb6f6784287f9e8736de0bc85d Mon Sep 17 00:00:00 2001 From: tastytea Date: Thu, 25 Jul 2019 03:38:26 +0200 Subject: [PATCH 2/2] Added support for regular expressions. --- remwharead.1.adoc | 13 ++++++-- src/main.cpp | 4 +-- src/parse_options.cpp | 4 ++- src/parse_options.hpp | 1 + src/search.cpp | 70 ++++++++++++++++++++++++++++++++++--------- src/search.hpp | 6 ++-- 6 files changed, 77 insertions(+), 21 deletions(-) diff --git a/remwharead.1.adoc b/remwharead.1.adoc index b0bdb41..17b6e8b 100644 --- a/remwharead.1.adoc +++ b/remwharead.1.adoc @@ -2,7 +2,7 @@ :doctype: manpage :Author: tastytea :Email: tastytea@tastytea.de -:Date: 2019-07-21 +:Date: 2019-07-25 :Revision: 0.0.0 :man source: remwharead :man manual: General Commands Manual @@ -15,7 +15,7 @@ remwharead - Saves URIs of things you want to remember in a database *remwharead* [*-t* _tags_] [*-N*] _URI_ -*remwharead* *-e* _format_ [*-f* _file_] [*-T* _start_,_end_] [[*-s*|*-S*] _expression_] +*remwharead* *-e* _format_ [*-f* _file_] [*-T* _start_,_end_] [[*-s*|*-S*] _expression_] [*-r*] == DESCRIPTION @@ -55,6 +55,10 @@ insensitive. Search in tags, title, description and full text. See _SEARCH EXPRESSIONS_. Case insensitive. +*-r*, *--regex*:: +Use regular expressions for search, case insensitive. With *--search-tags*, +every tag is enclosed by _^_ and _$_. + *-N*, *--no-archive*:: Do not archive URI. @@ -86,6 +90,11 @@ Print version, copyright and license. `remwharead -e csv -s "grub AND boot"` ==== +.Output all articles by Jan Müller, consider different spellings. +==== +`remwharead -e simple -S 'Jan[[:space:]]+M(ü|ue)ller' -r` +==== + === Display database *remwharead* does not provide an interface to display the database. However, you diff --git a/src/main.cpp b/src/main.cpp index c22d3f0..0ac6782 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -88,11 +88,11 @@ int main(const int argc, const char *argv[]) db.retrieve(opts.span[0], opts.span[1]); if (!opts.search_tags.empty()) { - entries = search_tags(entries, opts.search_tags); + entries = search_tags(entries, opts.search_tags, opts.regex); } else if (!opts.search_all.empty()) { - entries = search_all(entries, opts.search_all); + entries = search_all(entries, opts.search_all, opts.regex); } switch (opts.format) diff --git a/src/parse_options.cpp b/src/parse_options.cpp index 0f2beb2..8910276 100644 --- a/src/parse_options.cpp +++ b/src/parse_options.cpp @@ -58,6 +58,8 @@ const options parse_options(const int argc, const char *argv[]) ("S", "search-all", "Search in tags, title, description and full text.", "", &opts.search_all); + op.add + ("r", "regex", "Use regular expression for search.", &opts.regex); auto option_noarchive = op.add ("N", "no-archive", "Do not archive URI."); auto option_help = op.add @@ -71,7 +73,7 @@ const options parse_options(const int argc, const char *argv[]) cout << "Usage: " << argv[0] << " [-t tags] [-N] URI\n" << " " << argv[0] << " -e format [-f file] [-T start,end] " - << "[[-s|-S] expression]\n"; + << "[[-s|-S] expression] [-r]\n"; cout << op; return options(0); } diff --git a/src/parse_options.hpp b/src/parse_options.hpp index 653ed35..2438d3e 100644 --- a/src/parse_options.hpp +++ b/src/parse_options.hpp @@ -41,6 +41,7 @@ typedef struct options string uri; string search_tags; string search_all; + bool regex = false; bool archive = true; uint8_t status_code = 0; diff --git a/src/search.cpp b/src/search.cpp index 4c0c8a7..564896e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -21,6 +21,7 @@ #include "search.hpp" using std::regex; +using std::regex_constants::icase; using std::regex_search; using std::smatch; using std::find; @@ -71,7 +72,8 @@ const string to_lowercase(const string &str) } const vector -search_tags(const vector &entries, string expression) +search_tags(const vector &entries, string expression, + const bool is_re) { vector> searchlist = parse_expression(expression); vector result; @@ -81,11 +83,23 @@ search_tags(const vector &entries, string expression) for (const Database::entry &entry : entries) { // Add entry to result if all tags in an OR-slice match. bool matched = true; + for (const string &tag : tags_or) { const auto it = find_if(entry.tags.begin(), entry.tags.end(), - [&tag](const string &s) - { return to_lowercase(s) == tag; }); + [&tag, is_re](const string &s) + { + if (is_re) + { + const regex re("^" + tag + "$", + icase); + return regex_search(s, re); + } + else + { + return to_lowercase(s) == tag; + } + }); if (it == entry.tags.end()) { matched = false; @@ -102,10 +116,11 @@ search_tags(const vector &entries, string expression) } const vector -search_all(const vector &entries, string expression) +search_all(const vector &entries, string expression, + const bool is_re) { vector> searchlist = parse_expression(expression); - vector result = search_tags(entries, expression); + vector result = search_tags(entries, expression, is_re); for (const vector &terms_or : searchlist) { @@ -125,19 +140,46 @@ search_all(const vector &entries, string expression) for (const string &term : terms_or) { - if (to_lowercase(entry.title).find(term) == string::npos) - { - matched_title = false; - } + const string title = to_lowercase(entry.title); + const string description = to_lowercase(entry.description); + const string fulltext = to_lowercase(entry.fulltext); - if (to_lowercase(entry.description).find(term) == string::npos) + // Set matched_* to false if term is not found. + if (is_re) { - matched_description = false; - } + const regex re(term, icase); - if (to_lowercase(entry.fulltext).find(term) == string::npos) + if(!regex_search(title, re)) + { + matched_title = false; + } + + if(!regex_search(description, re)) + { + matched_description = false; + } + + if(!regex_search(fulltext, re)) + { + matched_fulltext = false; + } + } + else { - matched_fulltext = false; + if (title.find(term) == string::npos) + { + matched_title = false; + } + + if (description.find(term) == string::npos) + { + matched_description = false; + } + + if (fulltext.find(term) == string::npos) + { + matched_fulltext = false; + } } } if (matched_title == true diff --git a/src/search.hpp b/src/search.hpp index c027f3c..620e94e 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -29,10 +29,12 @@ const string to_lowercase(const string &str); //! Seach database entries for tags. const vector -search_tags(const vector &entries, string expression); +search_tags(const vector &entries, string expression, + const bool is_re); //! Search tags, title, description and full text. const vector -search_all(const vector &entries, string expression); +search_all(const vector &entries, string expression, + const bool is_re); #endif // REMWHAREAD_SEARCH_HPP