Move is_whitespace() and urldecode() to helpers.
This commit is contained in:
parent
af9563e669
commit
7ddfe32e30
|
@ -0,0 +1,38 @@
|
|||
#include "helpers.hpp"
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
#include <codecvt>
|
||||
#include <locale>
|
||||
#include <map>
|
||||
#include <string_view>
|
||||
|
||||
namespace epubgrep::helpers
|
||||
{
|
||||
|
||||
bool is_whitespace(const char check)
|
||||
{
|
||||
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
|
||||
return std::any_of(whitespace.begin(), whitespace.end(),
|
||||
[&check](const char ws) { return check == ws; });
|
||||
}
|
||||
|
||||
std::string urldecode(const std::string_view url)
|
||||
{ // RFC 3986, section 2.1.
|
||||
size_t pos{0};
|
||||
size_t lastpos{0};
|
||||
std::string decoded;
|
||||
while ((pos = url.find('%', pos)) != std::string_view::npos)
|
||||
{
|
||||
decoded += url.substr(lastpos, pos - lastpos);
|
||||
decoded += static_cast<char>(
|
||||
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
|
||||
pos += 3;
|
||||
lastpos = pos;
|
||||
}
|
||||
decoded += url.substr(lastpos);
|
||||
|
||||
return decoded;
|
||||
}
|
||||
|
||||
} // namespace epubgrep::helpers
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef EPUBGREP_HELPERS_HPP
|
||||
#define EPUBGREP_HELPERS_HPP
|
||||
|
||||
#include <string>
|
||||
namespace epubgrep::helpers
|
||||
{
|
||||
|
||||
//! Return true if check is whitespace.
|
||||
[[nodiscard]] bool is_whitespace(char check);
|
||||
|
||||
//! Decode percent-encoded text. Used for restricted characters in URLs.
|
||||
[[nodiscard]] std::string urldecode(std::string_view url);
|
||||
|
||||
} // namespace epubgrep::helpers
|
||||
|
||||
#endif // EPUBGREP_HELPERS_HPP
|
|
@ -17,6 +17,7 @@
|
|||
#include "search.hpp"
|
||||
|
||||
#include "fs-compat.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "zip.hpp"
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
|
@ -204,7 +205,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
|||
whitespace.end());
|
||||
if (pos_before != rend_before)
|
||||
{
|
||||
while (is_whitespace(*pos_before))
|
||||
while (helpers::is_whitespace(*pos_before))
|
||||
{
|
||||
++pos_before;
|
||||
}
|
||||
|
@ -218,7 +219,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
|||
whitespace.end());
|
||||
if (pos_after != end_after)
|
||||
{
|
||||
while (is_whitespace(*pos_after))
|
||||
while (helpers::is_whitespace(*pos_after))
|
||||
{
|
||||
++pos_after;
|
||||
}
|
||||
|
@ -230,11 +231,11 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
|||
const std::string before_reversed(rbegin_before, pos_before);
|
||||
string before(before_reversed.rbegin(), before_reversed.rend());
|
||||
std::string after(begin_after, pos_after);
|
||||
while (is_whitespace(*before.begin()))
|
||||
while (helpers::is_whitespace(*before.begin()))
|
||||
{
|
||||
before.erase(0, 1);
|
||||
}
|
||||
while (is_whitespace(*after.rbegin()))
|
||||
while (helpers::is_whitespace(*after.rbegin()))
|
||||
{
|
||||
after.erase(after.size() - 1);
|
||||
}
|
||||
|
@ -250,11 +251,11 @@ string headline(const std::string_view prefix)
|
|||
pos += 3;
|
||||
string result{prefix.substr(pos, prefix.find('<', pos) - pos)};
|
||||
|
||||
while (is_whitespace(*result.begin()))
|
||||
while (helpers::is_whitespace(*result.begin()))
|
||||
{
|
||||
result.erase(0, 1);
|
||||
}
|
||||
while (is_whitespace(*result.rbegin()))
|
||||
while (helpers::is_whitespace(*result.rbegin()))
|
||||
{
|
||||
result.erase(result.size() - 1);
|
||||
}
|
||||
|
@ -277,11 +278,4 @@ string page(const std::string_view prefix)
|
|||
return {};
|
||||
}
|
||||
|
||||
bool is_whitespace(const char check)
|
||||
{
|
||||
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
|
||||
return std::any_of(whitespace.begin(), whitespace.end(),
|
||||
[&check](const char ws) { return check == ws; });
|
||||
}
|
||||
|
||||
} // namespace epubgrep::search
|
||||
|
|
|
@ -71,9 +71,6 @@ context(const boost::match_results<std::string::const_iterator> &match,
|
|||
//! Return current page if possible.
|
||||
[[nodiscard]] std::string page(std::string_view prefix);
|
||||
|
||||
//! Return true if check is whitespace.
|
||||
[[nodiscard]] bool is_whitespace(char check);
|
||||
|
||||
} // namespace epubgrep::search
|
||||
|
||||
#endif // EPUBGREP_SEARCH_HPP
|
||||
|
|
21
src/zip.cpp
21
src/zip.cpp
|
@ -17,6 +17,7 @@
|
|||
#include "zip.hpp"
|
||||
|
||||
#include "fs-compat.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
@ -210,7 +211,7 @@ std::vector<std::string> list_spine(const fs::path &filepath)
|
|||
const auto &idref{itemref.attribute("idref").value()};
|
||||
const auto &item{manifest.find_child_by_attribute("id", idref)};
|
||||
const std::string href{
|
||||
urldecode(item.attribute("href").value())};
|
||||
helpers::urldecode(item.attribute("href").value())};
|
||||
if (href[0] != '/')
|
||||
{
|
||||
spine_filepaths.emplace_back(
|
||||
|
@ -241,22 +242,4 @@ std::vector<std::string> list_spine(const fs::path &filepath)
|
|||
return spine_filepaths;
|
||||
}
|
||||
|
||||
std::string urldecode(const std::string_view url)
|
||||
{ // RFC 3986, section 2.1.
|
||||
size_t pos{0};
|
||||
size_t lastpos{0};
|
||||
std::string decoded;
|
||||
while ((pos = url.find('%', pos)) != std::string_view::npos)
|
||||
{
|
||||
decoded += url.substr(lastpos, pos - lastpos);
|
||||
decoded += static_cast<char>(
|
||||
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
|
||||
pos += 3;
|
||||
lastpos = pos;
|
||||
}
|
||||
decoded += url.substr(lastpos);
|
||||
|
||||
return decoded;
|
||||
}
|
||||
|
||||
} // namespace epubgrep::zip
|
||||
|
|
|
@ -46,9 +46,6 @@ void close_file(struct archive *zipfile, const fs::path &filepath);
|
|||
//! Returns the files in the EPUB “spine” (all pages that are actually text).
|
||||
[[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
|
||||
|
||||
//! Decode percent-encoding. Used for restricted characters in URLs.
|
||||
[[nodiscard]] std::string urldecode(std::string_view url);
|
||||
|
||||
//! It's std::runtime_error, but with another name.
|
||||
class exception : public std::runtime_error
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue