Fix pagebreak-regex and range in which pagebreaks are searched.
This commit is contained in:
parent
f1a0015f28
commit
bdf9a86651
|
@ -129,8 +129,9 @@ void cleanup_text(string &text)
|
|||
{
|
||||
static const boost::regex re_header_start{"<[hH][1-6]"};
|
||||
static const boost::regex re_header_end{"</[hH][1-6]"};
|
||||
static const boost::regex re_pagebreak{".+pagebreak.+(title|aria-label)"
|
||||
"=\"([[:alnum:]]+)\".*"};
|
||||
static const boost::regex re_pagebreak{"[^>]+pagebreak[^>]+"
|
||||
"(title|aria-label)"
|
||||
"=\"([[:alnum:]]+)\""};
|
||||
|
||||
size_t pos{};
|
||||
while ((pos = text.find('<', pos)) != string::npos)
|
||||
|
@ -149,8 +150,8 @@ void cleanup_text(string &text)
|
|||
{
|
||||
auto endpos{text.find('>')};
|
||||
boost::match_results<const char *> match;
|
||||
if (boost::regex_search(text.substr(pos, endpos).data(), match,
|
||||
re_pagebreak))
|
||||
if (boost::regex_search(text.substr(pos, endpos - pos).data(),
|
||||
match, re_pagebreak))
|
||||
{
|
||||
// FIXME: -fsanitize=address is complaining about this. ↓ 🤷
|
||||
// Could not reproduce it.
|
||||
|
|
Loading…
Reference in New Issue
Block a user