Remove HTML-encoded newlines in descriptions.
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
We didn't catch newlines encoded as HTML entities before.
This commit is contained in:
parent
ff708e9403
commit
a83a2548c3
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required (VERSION 3.2)
|
cmake_minimum_required (VERSION 3.2)
|
||||||
project(remwharead
|
project(remwharead
|
||||||
VERSION 0.4.2
|
VERSION 0.4.3
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
19
src/uri.cpp
19
src/uri.cpp
|
@ -64,7 +64,7 @@ const html_extract URI::get()
|
||||||
return
|
return
|
||||||
{
|
{
|
||||||
extract_title(answer),
|
extract_title(answer),
|
||||||
strip_html(extract_description(answer)),
|
extract_description(answer),
|
||||||
strip_html(answer)
|
strip_html(answer)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -106,7 +106,7 @@ const string URI::extract_description(const string &html)
|
||||||
smatch match;
|
smatch match;
|
||||||
const regex re("description\"[^>]+content=\"([^\"]+)", icase);
|
const regex re("description\"[^>]+content=\"([^\"]+)", icase);
|
||||||
regex_search(html, match, re);
|
regex_search(html, match, re);
|
||||||
return remove_newlines(match[1].str());
|
return remove_newlines(strip_html(match[1].str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
|
@ -509,11 +509,18 @@ const string URI::archive()
|
||||||
|
|
||||||
const string URI::remove_newlines(string text)
|
const string URI::remove_newlines(string text)
|
||||||
{
|
{
|
||||||
size_t pos = 0;
|
size_t posn = 0;
|
||||||
while ((pos = text.find("\n", pos)) != std::string::npos)
|
while ((posn = text.find('\n', posn)) != std::string::npos)
|
||||||
{
|
{
|
||||||
text.replace(pos, 1, " ");
|
text.replace(posn, 1, " ");
|
||||||
++pos;
|
|
||||||
|
size_t posr = posn - 1;
|
||||||
|
if (text[posr] == '\r')
|
||||||
|
{
|
||||||
|
text.replace(posr, 1, " ");
|
||||||
|
}
|
||||||
|
++posn;
|
||||||
}
|
}
|
||||||
|
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue