Remove HTML-encoded newlines in descriptions.
continuous-integration/drone/push Build is passing Details

We didn't catch newlines encoded as HTML entities before.
This commit is contained in:
tastytea 2019-07-13 18:20:37 +02:00
parent ff708e9403
commit a83a2548c3
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
2 changed files with 14 additions and 7 deletions

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.2) cmake_minimum_required (VERSION 3.2)
project(remwharead project(remwharead
VERSION 0.4.2 VERSION 0.4.3
LANGUAGES CXX LANGUAGES CXX
) )

View File

@ -64,7 +64,7 @@ const html_extract URI::get()
return return
{ {
extract_title(answer), extract_title(answer),
strip_html(extract_description(answer)), extract_description(answer),
strip_html(answer) strip_html(answer)
}; };
} }
@ -106,7 +106,7 @@ const string URI::extract_description(const string &html)
smatch match; smatch match;
const regex re("description\"[^>]+content=\"([^\"]+)", icase); const regex re("description\"[^>]+content=\"([^\"]+)", icase);
regex_search(html, match, re); regex_search(html, match, re);
return remove_newlines(match[1].str()); return remove_newlines(strip_html(match[1].str()));
} }
return ""; return "";
@ -509,11 +509,18 @@ const string URI::archive()
const string URI::remove_newlines(string text) const string URI::remove_newlines(string text)
{ {
size_t pos = 0; size_t posn = 0;
while ((pos = text.find("\n", pos)) != std::string::npos) while ((posn = text.find('\n', posn)) != std::string::npos)
{ {
text.replace(pos, 1, " "); text.replace(posn, 1, " ");
++pos;
size_t posr = posn - 1;
if (text[posr] == '\r')
{
text.replace(posr, 1, " ");
}
++posn;
} }
return text; return text;
} }