Added support for regular expressions.
This commit is contained in:
parent
21fe64b59c
commit
d6dd8d4e16
|
@ -2,7 +2,7 @@
|
||||||
:doctype: manpage
|
:doctype: manpage
|
||||||
:Author: tastytea
|
:Author: tastytea
|
||||||
:Email: tastytea@tastytea.de
|
:Email: tastytea@tastytea.de
|
||||||
:Date: 2019-07-21
|
:Date: 2019-07-25
|
||||||
:Revision: 0.0.0
|
:Revision: 0.0.0
|
||||||
:man source: remwharead
|
:man source: remwharead
|
||||||
:man manual: General Commands Manual
|
:man manual: General Commands Manual
|
||||||
|
@ -15,7 +15,7 @@ remwharead - Saves URIs of things you want to remember in a database
|
||||||
|
|
||||||
*remwharead* [*-t* _tags_] [*-N*] _URI_
|
*remwharead* [*-t* _tags_] [*-N*] _URI_
|
||||||
|
|
||||||
*remwharead* *-e* _format_ [*-f* _file_] [*-T* _start_,_end_] [[*-s*|*-S*] _expression_]
|
*remwharead* *-e* _format_ [*-f* _file_] [*-T* _start_,_end_] [[*-s*|*-S*] _expression_] [*-r*]
|
||||||
|
|
||||||
== DESCRIPTION
|
== DESCRIPTION
|
||||||
|
|
||||||
|
@ -55,6 +55,10 @@ insensitive.
|
||||||
Search in tags, title, description and full text. See _SEARCH EXPRESSIONS_. Case
|
Search in tags, title, description and full text. See _SEARCH EXPRESSIONS_. Case
|
||||||
insensitive.
|
insensitive.
|
||||||
|
|
||||||
|
*-r*, *--regex*::
|
||||||
|
Use regular expressions for search, case insensitive. With *--search-tags*,
|
||||||
|
every tag is enclosed by _^_ and _$_.
|
||||||
|
|
||||||
*-N*, *--no-archive*::
|
*-N*, *--no-archive*::
|
||||||
Do not archive URI.
|
Do not archive URI.
|
||||||
|
|
||||||
|
@ -86,6 +90,11 @@ Print version, copyright and license.
|
||||||
`remwharead -e csv -s "grub AND boot"`
|
`remwharead -e csv -s "grub AND boot"`
|
||||||
====
|
====
|
||||||
|
|
||||||
|
.Output all articles by Jan Müller, consider different spellings.
|
||||||
|
====
|
||||||
|
`remwharead -e simple -S 'Jan[[:space:]]+M(ü|ue)ller' -r`
|
||||||
|
====
|
||||||
|
|
||||||
=== Display database
|
=== Display database
|
||||||
|
|
||||||
*remwharead* does not provide an interface to display the database. However, you
|
*remwharead* does not provide an interface to display the database. However, you
|
||||||
|
|
|
@ -88,11 +88,11 @@ int main(const int argc, const char *argv[])
|
||||||
db.retrieve(opts.span[0], opts.span[1]);
|
db.retrieve(opts.span[0], opts.span[1]);
|
||||||
if (!opts.search_tags.empty())
|
if (!opts.search_tags.empty())
|
||||||
{
|
{
|
||||||
entries = search_tags(entries, opts.search_tags);
|
entries = search_tags(entries, opts.search_tags, opts.regex);
|
||||||
}
|
}
|
||||||
else if (!opts.search_all.empty())
|
else if (!opts.search_all.empty())
|
||||||
{
|
{
|
||||||
entries = search_all(entries, opts.search_all);
|
entries = search_all(entries, opts.search_all, opts.regex);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (opts.format)
|
switch (opts.format)
|
||||||
|
|
|
@ -58,6 +58,8 @@ const options parse_options(const int argc, const char *argv[])
|
||||||
("S", "search-all",
|
("S", "search-all",
|
||||||
"Search in tags, title, description and full text.",
|
"Search in tags, title, description and full text.",
|
||||||
"", &opts.search_all);
|
"", &opts.search_all);
|
||||||
|
op.add<popl::Switch>
|
||||||
|
("r", "regex", "Use regular expression for search.", &opts.regex);
|
||||||
auto option_noarchive = op.add<popl::Switch>
|
auto option_noarchive = op.add<popl::Switch>
|
||||||
("N", "no-archive", "Do not archive URI.");
|
("N", "no-archive", "Do not archive URI.");
|
||||||
auto option_help = op.add<popl::Switch>
|
auto option_help = op.add<popl::Switch>
|
||||||
|
@ -71,7 +73,7 @@ const options parse_options(const int argc, const char *argv[])
|
||||||
cout << "Usage: " << argv[0] << " [-t tags] [-N] URI\n"
|
cout << "Usage: " << argv[0] << " [-t tags] [-N] URI\n"
|
||||||
<< " " << argv[0]
|
<< " " << argv[0]
|
||||||
<< " -e format [-f file] [-T start,end] "
|
<< " -e format [-f file] [-T start,end] "
|
||||||
<< "[[-s|-S] expression]\n";
|
<< "[[-s|-S] expression] [-r]\n";
|
||||||
cout << op;
|
cout << op;
|
||||||
return options(0);
|
return options(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ typedef struct options
|
||||||
string uri;
|
string uri;
|
||||||
string search_tags;
|
string search_tags;
|
||||||
string search_all;
|
string search_all;
|
||||||
|
bool regex = false;
|
||||||
bool archive = true;
|
bool archive = true;
|
||||||
uint8_t status_code = 0;
|
uint8_t status_code = 0;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
using std::regex;
|
using std::regex;
|
||||||
|
using std::regex_constants::icase;
|
||||||
using std::regex_search;
|
using std::regex_search;
|
||||||
using std::smatch;
|
using std::smatch;
|
||||||
using std::find;
|
using std::find;
|
||||||
|
@ -71,7 +72,8 @@ const string to_lowercase(const string &str)
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Database::entry>
|
const vector<Database::entry>
|
||||||
search_tags(const vector<Database::entry> &entries, string expression)
|
search_tags(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re)
|
||||||
{
|
{
|
||||||
vector<vector<string>> searchlist = parse_expression(expression);
|
vector<vector<string>> searchlist = parse_expression(expression);
|
||||||
vector<Database::entry> result;
|
vector<Database::entry> result;
|
||||||
|
@ -81,11 +83,23 @@ search_tags(const vector<Database::entry> &entries, string expression)
|
||||||
for (const Database::entry &entry : entries)
|
for (const Database::entry &entry : entries)
|
||||||
{ // Add entry to result if all tags in an OR-slice match.
|
{ // Add entry to result if all tags in an OR-slice match.
|
||||||
bool matched = true;
|
bool matched = true;
|
||||||
|
|
||||||
for (const string &tag : tags_or)
|
for (const string &tag : tags_or)
|
||||||
{
|
{
|
||||||
const auto it = find_if(entry.tags.begin(), entry.tags.end(),
|
const auto it = find_if(entry.tags.begin(), entry.tags.end(),
|
||||||
[&tag](const string &s)
|
[&tag, is_re](const string &s)
|
||||||
{ return to_lowercase(s) == tag; });
|
{
|
||||||
|
if (is_re)
|
||||||
|
{
|
||||||
|
const regex re("^" + tag + "$",
|
||||||
|
icase);
|
||||||
|
return regex_search(s, re);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return to_lowercase(s) == tag;
|
||||||
|
}
|
||||||
|
});
|
||||||
if (it == entry.tags.end())
|
if (it == entry.tags.end())
|
||||||
{
|
{
|
||||||
matched = false;
|
matched = false;
|
||||||
|
@ -102,10 +116,11 @@ search_tags(const vector<Database::entry> &entries, string expression)
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Database::entry>
|
const vector<Database::entry>
|
||||||
search_all(const vector<Database::entry> &entries, string expression)
|
search_all(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re)
|
||||||
{
|
{
|
||||||
vector<vector<string>> searchlist = parse_expression(expression);
|
vector<vector<string>> searchlist = parse_expression(expression);
|
||||||
vector<Database::entry> result = search_tags(entries, expression);
|
vector<Database::entry> result = search_tags(entries, expression, is_re);
|
||||||
|
|
||||||
for (const vector<string> &terms_or : searchlist)
|
for (const vector<string> &terms_or : searchlist)
|
||||||
{
|
{
|
||||||
|
@ -125,21 +140,48 @@ search_all(const vector<Database::entry> &entries, string expression)
|
||||||
|
|
||||||
for (const string &term : terms_or)
|
for (const string &term : terms_or)
|
||||||
{
|
{
|
||||||
if (to_lowercase(entry.title).find(term) == string::npos)
|
const string title = to_lowercase(entry.title);
|
||||||
|
const string description = to_lowercase(entry.description);
|
||||||
|
const string fulltext = to_lowercase(entry.fulltext);
|
||||||
|
|
||||||
|
// Set matched_* to false if term is not found.
|
||||||
|
if (is_re)
|
||||||
|
{
|
||||||
|
const regex re(term, icase);
|
||||||
|
|
||||||
|
if(!regex_search(title, re))
|
||||||
{
|
{
|
||||||
matched_title = false;
|
matched_title = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_lowercase(entry.description).find(term) == string::npos)
|
if(!regex_search(description, re))
|
||||||
{
|
{
|
||||||
matched_description = false;
|
matched_description = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_lowercase(entry.fulltext).find(term) == string::npos)
|
if(!regex_search(fulltext, re))
|
||||||
{
|
{
|
||||||
matched_fulltext = false;
|
matched_fulltext = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (title.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_title = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (description.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_description = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fulltext.find(term) == string::npos)
|
||||||
|
{
|
||||||
|
matched_fulltext = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (matched_title == true
|
if (matched_title == true
|
||||||
|| matched_description == true
|
|| matched_description == true
|
||||||
|| matched_fulltext == true)
|
|| matched_fulltext == true)
|
||||||
|
|
|
@ -29,10 +29,12 @@ const string to_lowercase(const string &str);
|
||||||
|
|
||||||
//! Seach database entries for tags.
|
//! Seach database entries for tags.
|
||||||
const vector<Database::entry>
|
const vector<Database::entry>
|
||||||
search_tags(const vector<Database::entry> &entries, string expression);
|
search_tags(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re);
|
||||||
|
|
||||||
//! Search tags, title, description and full text.
|
//! Search tags, title, description and full text.
|
||||||
const vector<Database::entry>
|
const vector<Database::entry>
|
||||||
search_all(const vector<Database::entry> &entries, string expression);
|
search_all(const vector<Database::entry> &entries, string expression,
|
||||||
|
const bool is_re);
|
||||||
|
|
||||||
#endif // REMWHAREAD_SEARCH_HPP
|
#endif // REMWHAREAD_SEARCH_HPP
|
||||||
|
|
Loading…
Reference in New Issue