Only attempt to extract title and description from HTML files.
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
tastytea 2019-05-18 01:47:10 +02:00
parent ea3a545769
commit 28503cc3bd
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07

View File

@ -82,16 +82,28 @@ const html_extract URI::get()
const string URI::extract_title(const string &html)
{
smatch match;
regex_search(html, match, regex("<title>([^<]+)"));
return remove_newlines(match[1].str());
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
{
smatch match;
regex_search(html, match, regex("<title>([^<]+)"));
return remove_newlines(match[1].str());
}
return "";
}
const string URI::extract_description(const string &html)
{
smatch match;
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
return remove_newlines(match[1].str());
const regex re_htmlfile("\\.(.?html?|xml|rss)$");
if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
{
smatch match;
regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
return remove_newlines(match[1].str());
}
return "";
}
const string URI::strip_html(const string &html)