Move is_whitespace() and urldecode() to helpers.
This commit is contained in:
parent
af9563e669
commit
7ddfe32e30
|
@ -0,0 +1,38 @@
|
||||||
|
#include "helpers.hpp"
|
||||||
|
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
|
#include <codecvt>
|
||||||
|
#include <locale>
|
||||||
|
#include <map>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
namespace epubgrep::helpers
|
||||||
|
{
|
||||||
|
|
||||||
|
bool is_whitespace(const char check)
|
||||||
|
{
|
||||||
|
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
|
||||||
|
return std::any_of(whitespace.begin(), whitespace.end(),
|
||||||
|
[&check](const char ws) { return check == ws; });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string urldecode(const std::string_view url)
|
||||||
|
{ // RFC 3986, section 2.1.
|
||||||
|
size_t pos{0};
|
||||||
|
size_t lastpos{0};
|
||||||
|
std::string decoded;
|
||||||
|
while ((pos = url.find('%', pos)) != std::string_view::npos)
|
||||||
|
{
|
||||||
|
decoded += url.substr(lastpos, pos - lastpos);
|
||||||
|
decoded += static_cast<char>(
|
||||||
|
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
|
||||||
|
pos += 3;
|
||||||
|
lastpos = pos;
|
||||||
|
}
|
||||||
|
decoded += url.substr(lastpos);
|
||||||
|
|
||||||
|
return decoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace epubgrep::helpers
|
|
@ -0,0 +1,16 @@
|
||||||
|
#ifndef EPUBGREP_HELPERS_HPP
|
||||||
|
#define EPUBGREP_HELPERS_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
namespace epubgrep::helpers
|
||||||
|
{
|
||||||
|
|
||||||
|
//! Return true if check is whitespace.
|
||||||
|
[[nodiscard]] bool is_whitespace(char check);
|
||||||
|
|
||||||
|
//! Decode percent-encoded text. Used for restricted characters in URLs.
|
||||||
|
[[nodiscard]] std::string urldecode(std::string_view url);
|
||||||
|
|
||||||
|
} // namespace epubgrep::helpers
|
||||||
|
|
||||||
|
#endif // EPUBGREP_HELPERS_HPP
|
|
@ -17,6 +17,7 @@
|
||||||
#include "search.hpp"
|
#include "search.hpp"
|
||||||
|
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
#include "zip.hpp"
|
#include "zip.hpp"
|
||||||
|
|
||||||
#include <boost/regex.hpp>
|
#include <boost/regex.hpp>
|
||||||
|
@ -204,7 +205,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
||||||
whitespace.end());
|
whitespace.end());
|
||||||
if (pos_before != rend_before)
|
if (pos_before != rend_before)
|
||||||
{
|
{
|
||||||
while (is_whitespace(*pos_before))
|
while (helpers::is_whitespace(*pos_before))
|
||||||
{
|
{
|
||||||
++pos_before;
|
++pos_before;
|
||||||
}
|
}
|
||||||
|
@ -218,7 +219,7 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
||||||
whitespace.end());
|
whitespace.end());
|
||||||
if (pos_after != end_after)
|
if (pos_after != end_after)
|
||||||
{
|
{
|
||||||
while (is_whitespace(*pos_after))
|
while (helpers::is_whitespace(*pos_after))
|
||||||
{
|
{
|
||||||
++pos_after;
|
++pos_after;
|
||||||
}
|
}
|
||||||
|
@ -230,11 +231,11 @@ match_context context(const boost::match_results<string::const_iterator> &match,
|
||||||
const std::string before_reversed(rbegin_before, pos_before);
|
const std::string before_reversed(rbegin_before, pos_before);
|
||||||
string before(before_reversed.rbegin(), before_reversed.rend());
|
string before(before_reversed.rbegin(), before_reversed.rend());
|
||||||
std::string after(begin_after, pos_after);
|
std::string after(begin_after, pos_after);
|
||||||
while (is_whitespace(*before.begin()))
|
while (helpers::is_whitespace(*before.begin()))
|
||||||
{
|
{
|
||||||
before.erase(0, 1);
|
before.erase(0, 1);
|
||||||
}
|
}
|
||||||
while (is_whitespace(*after.rbegin()))
|
while (helpers::is_whitespace(*after.rbegin()))
|
||||||
{
|
{
|
||||||
after.erase(after.size() - 1);
|
after.erase(after.size() - 1);
|
||||||
}
|
}
|
||||||
|
@ -250,11 +251,11 @@ string headline(const std::string_view prefix)
|
||||||
pos += 3;
|
pos += 3;
|
||||||
string result{prefix.substr(pos, prefix.find('<', pos) - pos)};
|
string result{prefix.substr(pos, prefix.find('<', pos) - pos)};
|
||||||
|
|
||||||
while (is_whitespace(*result.begin()))
|
while (helpers::is_whitespace(*result.begin()))
|
||||||
{
|
{
|
||||||
result.erase(0, 1);
|
result.erase(0, 1);
|
||||||
}
|
}
|
||||||
while (is_whitespace(*result.rbegin()))
|
while (helpers::is_whitespace(*result.rbegin()))
|
||||||
{
|
{
|
||||||
result.erase(result.size() - 1);
|
result.erase(result.size() - 1);
|
||||||
}
|
}
|
||||||
|
@ -277,11 +278,4 @@ string page(const std::string_view prefix)
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_whitespace(const char check)
|
|
||||||
{
|
|
||||||
const std::array<char, 4> whitespace{' ', '\n', '\r', '\t'};
|
|
||||||
return std::any_of(whitespace.begin(), whitespace.end(),
|
|
||||||
[&check](const char ws) { return check == ws; });
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
|
@ -71,9 +71,6 @@ context(const boost::match_results<std::string::const_iterator> &match,
|
||||||
//! Return current page if possible.
|
//! Return current page if possible.
|
||||||
[[nodiscard]] std::string page(std::string_view prefix);
|
[[nodiscard]] std::string page(std::string_view prefix);
|
||||||
|
|
||||||
//! Return true if check is whitespace.
|
|
||||||
[[nodiscard]] bool is_whitespace(char check);
|
|
||||||
|
|
||||||
} // namespace epubgrep::search
|
} // namespace epubgrep::search
|
||||||
|
|
||||||
#endif // EPUBGREP_SEARCH_HPP
|
#endif // EPUBGREP_SEARCH_HPP
|
||||||
|
|
21
src/zip.cpp
21
src/zip.cpp
|
@ -17,6 +17,7 @@
|
||||||
#include "zip.hpp"
|
#include "zip.hpp"
|
||||||
|
|
||||||
#include "fs-compat.hpp"
|
#include "fs-compat.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
|
||||||
#include <archive.h>
|
#include <archive.h>
|
||||||
#include <archive_entry.h>
|
#include <archive_entry.h>
|
||||||
|
@ -210,7 +211,7 @@ std::vector<std::string> list_spine(const fs::path &filepath)
|
||||||
const auto &idref{itemref.attribute("idref").value()};
|
const auto &idref{itemref.attribute("idref").value()};
|
||||||
const auto &item{manifest.find_child_by_attribute("id", idref)};
|
const auto &item{manifest.find_child_by_attribute("id", idref)};
|
||||||
const std::string href{
|
const std::string href{
|
||||||
urldecode(item.attribute("href").value())};
|
helpers::urldecode(item.attribute("href").value())};
|
||||||
if (href[0] != '/')
|
if (href[0] != '/')
|
||||||
{
|
{
|
||||||
spine_filepaths.emplace_back(
|
spine_filepaths.emplace_back(
|
||||||
|
@ -241,22 +242,4 @@ std::vector<std::string> list_spine(const fs::path &filepath)
|
||||||
return spine_filepaths;
|
return spine_filepaths;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string urldecode(const std::string_view url)
|
|
||||||
{ // RFC 3986, section 2.1.
|
|
||||||
size_t pos{0};
|
|
||||||
size_t lastpos{0};
|
|
||||||
std::string decoded;
|
|
||||||
while ((pos = url.find('%', pos)) != std::string_view::npos)
|
|
||||||
{
|
|
||||||
decoded += url.substr(lastpos, pos - lastpos);
|
|
||||||
decoded += static_cast<char>(
|
|
||||||
std::stoul(std::string(url.substr(pos + 1, 2)), nullptr, 16));
|
|
||||||
pos += 3;
|
|
||||||
lastpos = pos;
|
|
||||||
}
|
|
||||||
decoded += url.substr(lastpos);
|
|
||||||
|
|
||||||
return decoded;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace epubgrep::zip
|
} // namespace epubgrep::zip
|
||||||
|
|
|
@ -46,9 +46,6 @@ void close_file(struct archive *zipfile, const fs::path &filepath);
|
||||||
//! Returns the files in the EPUB “spine” (all pages that are actually text).
|
//! Returns the files in the EPUB “spine” (all pages that are actually text).
|
||||||
[[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
|
[[nodiscard]] std::vector<std::string> list_spine(const fs::path &filepath);
|
||||||
|
|
||||||
//! Decode percent-encoding. Used for restricted characters in URLs.
|
|
||||||
[[nodiscard]] std::string urldecode(std::string_view url);
|
|
||||||
|
|
||||||
//! It's std::runtime_error, but with another name.
|
//! It's std::runtime_error, but with another name.
|
||||||
class exception : public std::runtime_error
|
class exception : public std::runtime_error
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue