Fix pagebreak-regex and range in which pagebreaks are searched.
This commit is contained in:
parent
f1a0015f28
commit
bdf9a86651
|
@ -129,8 +129,9 @@ void cleanup_text(string &text)
|
||||||
{
|
{
|
||||||
static const boost::regex re_header_start{"<[hH][1-6]"};
|
static const boost::regex re_header_start{"<[hH][1-6]"};
|
||||||
static const boost::regex re_header_end{"</[hH][1-6]"};
|
static const boost::regex re_header_end{"</[hH][1-6]"};
|
||||||
static const boost::regex re_pagebreak{".+pagebreak.+(title|aria-label)"
|
static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
|
||||||
"=\"([[:alnum:]]+)\".*"};
|
"(title|aria-label)"
|
||||||
|
"=\"([[:alnum:]]+)\""};
|
||||||
|
|
||||||
size_t pos{};
|
size_t pos{};
|
||||||
while ((pos = text.find('<', pos)) != string::npos)
|
while ((pos = text.find('<', pos)) != string::npos)
|
||||||
|
@ -149,8 +150,8 @@ void cleanup_text(string &text)
|
||||||
{
|
{
|
||||||
auto endpos{text.find('>')};
|
auto endpos{text.find('>')};
|
||||||
boost::match_results<const char *> match;
|
boost::match_results<const char *> match;
|
||||||
if (boost::regex_search(text.substr(pos, endpos).data(), match,
|
if (boost::regex_search(text.substr(pos, endpos - pos).data(),
|
||||||
re_pagebreak))
|
match, re_pagebreak))
|
||||||
{
|
{
|
||||||
// FIXME: -fsanitize=address is complaining about this. ↓ 🤷
|
// FIXME: -fsanitize=address is complaining about this. ↓ 🤷
|
||||||
// Could not reproduce it.
|
// Could not reproduce it.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user