Move is_whitespace() and urldecode() to helpers.

This commit is contained in:
tastytea 2021-05-30 21:52:52 +02:00
parent af9563e669
commit 7ddfe32e30
Signed by: tastytea
GPG Key ID: CFC39497F1B26E07
6 changed files with 63 additions and 38 deletions

38
src/helpers.cpp Normal file
View File

@ -0,0 +1,38 @@
#include "helpers.hpp"
#include <boost/regex.hpp>
#include <codecvt>
#include <locale>
#include <map>
#include <string_view>
namespace epubgrep::helpers
{
bool is_whitespace(const char check)
{
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
return std::any_of(whitespace.begin(), whitespace.end(),
[&check](const char ws) { return check == ws; });
}
std::string urldecode(const std::string_view url)
{ // RFC 3986, section 2.1.
size_t pos{0};
size_t lastpos{0};
std::string decoded;
while ((pos = url.find('%', pos)) != std::string_view::npos)
{
decoded += url.substr(lastpos, pos - lastpos);
decoded += static_cast<char>(
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
pos += 3;
lastpos = pos;
}
decoded += url.substr(lastpos);
return decoded;
}
} // namespace epubgrep::helpers

16
src/helpers.hpp Normal file
View File

@ -0,0 +1,16 @@
#ifndef EPUBGREP_HELPERS_HPP
#define EPUBGREP_HELPERS_HPP
#include <string>
namespace epubgrep::helpers
{
//! Return true if check is whitespace.
[[nodiscard]] bool is_whitespace(char check);
//! Decode percent-encoded text. Used for restricted characters in URLs.
[[nodiscard]] std::string urldecode(std::string_view url);
} // namespace epubgrep::helpers
#endif // EPUBGREP_HELPERS_HPP

View File

@ -17,6 +17,7 @@
#include "search.hpp"
#include "fs-compat.hpp"
#include "helpers.hpp"
#include "zip.hpp"
#include <boost/regex.hpp>
@ -204,7 +205,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
whitespace.end());
if (pos_before != rend_before)
{
while (is_whitespace(*pos_before))
while (helpers::is_whitespace(*pos_before))
{
++pos_before;
}
@ -218,7 +219,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
whitespace.end());
if (pos_after != end_after)
{
while (is_whitespace(*pos_after))
while (helpers::is_whitespace(*pos_after))
{
++pos_after;
}
@ -230,11 +231,11 @@ match_context context(const boost::match_results<string::const_iterator> &match,
const std::string before_reversed(rbegin_before, pos_before);
string before(before_reversed.rbegin(), before_reversed.rend());
std::string after(begin_after, pos_after);
while (is_whitespace(*before.begin()))
while (helpers::is_whitespace(*before.begin()))
{
before.erase(0, 1);
}
while (is_whitespace(*after.rbegin()))
while (helpers::is_whitespace(*after.rbegin()))
{
after.erase(after.size() - 1);
}
@ -250,11 +251,11 @@ string headline(const std::string_view prefix)
pos += 3;
string result{prefix.substr(pos, prefix.find('<', pos) - pos)};
while (is_whitespace(*result.begin()))
while (helpers::is_whitespace(*result.begin()))
{
result.erase(0, 1);
}
while (is_whitespace(*result.rbegin()))
while (helpers::is_whitespace(*result.rbegin()))
{
result.erase(result.size() - 1);
}
@ -277,11 +278,4 @@ string page(const std::string_view prefix)
return {};
}
bool is_whitespace(const char check)
{
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
return std::any_of(whitespace.begin(), whitespace.end(),
[&check](const char ws) { return check == ws; });
}
} // namespace epubgrep::search

View File

@ -71,9 +71,6 @@ context(const boost::match_results<std::string::const_iterator> &match,
//! Return current page if possible.
[[nodiscard]] std::string page(std::string_view prefix);
//! Return true if check is whitespace.
[[nodiscard]] bool is_whitespace(char check);
} // namespace epubgrep::search
#endif // EPUBGREP_SEARCH_HPP

View File

@ -17,6 +17,7 @@
#include "zip.hpp"
#include "fs-compat.hpp"
#include "helpers.hpp"
#include <archive.h>
#include <archive_entry.h>
@ -210,7 +211,7 @@ std::vector<std::string> list_spine(const fs::path &filepath)
const auto &idref{itemref.attribute("idref").value()};
const auto &item{manifest.find_child_by_attribute("id", idref)};
const std::string href{
urldecode(item.attribute("href").value())};
helpers::urldecode(item.attribute("href").value())};
if (href[0] != '/')
{
spine_filepaths.emplace_back(
@ -241,22 +242,4 @@ std::vector<std::string> list_spine(const fs::path &filepath)
return spine_filepaths;
}
std::string urldecode(const std::string_view url)
{ // RFC 3986, section 2.1.
size_t pos{0};
size_t lastpos{0};
std::string decoded;
while ((pos = url.find('%', pos)) != std::string_view::npos)
{
decoded += url.substr(lastpos, pos - lastpos);
decoded += static_cast<char>(
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
pos += 3;
lastpos = pos;
}
decoded += url.substr(lastpos);
return decoded;
}
} // namespace epubgrep::zip

View File

@ -46,9 +46,6 @@ void close_file(struct archive *zipfile, const fs::path &filepath);
//! Returns the files in the EPUB “spine” (all pages that are actually text).
[[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
//! Decode percent-encoding. Used for restricted characters in URLs.
[[nodiscard]] std::string urldecode(std::string_view url);
//! It's std::runtime_error, but with another name.
class exception : public std::runtime_error
{