replaced own unescape_html() with the one from mastodon-cpp
This commit is contained in:
parent
a184b9f231
commit
03d3e0e179
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required (VERSION 3.7)
|
cmake_minimum_required (VERSION 3.7)
|
||||||
project (mastorss
|
project (mastorss
|
||||||
VERSION 0.5.15
|
VERSION 0.5.16
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ The documentation is far from complete, sorry.
|
||||||
* [cmake](https://cmake.org/) (tested: 3.9.6)
|
* [cmake](https://cmake.org/) (tested: 3.9.6)
|
||||||
* [boost](http://www.boost.org/) (tested: 1.63.0)
|
* [boost](http://www.boost.org/) (tested: 1.63.0)
|
||||||
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
|
* [curlpp](http://www.curlpp.org/) (tested: 0.8.1)
|
||||||
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.8.6)
|
* [mastodon-cpp](https://github.com/tastytea/mastodon-cpp) (at least: 0.12.0)
|
||||||
* [jsoncpp](https://github.com/open-source-parsers/jsoncpp) (tested: 1.8.4)
|
* [jsoncpp](https://github.com/open-source-parsers/jsoncpp) (tested: 1.8.4)
|
||||||
|
|
||||||
## Get sourcecode
|
## Get sourcecode
|
||||||
|
|
|
@ -33,46 +33,6 @@ using std::cerr;
|
||||||
using std::string;
|
using std::string;
|
||||||
namespace pt = boost::property_tree;
|
namespace pt = boost::property_tree;
|
||||||
|
|
||||||
// Translate { to chars, translate some named entities to chars
|
|
||||||
void unescape_html(string &str)
|
|
||||||
{
|
|
||||||
string html = str;
|
|
||||||
str = "";
|
|
||||||
// Used to convert int to utf-8 char
|
|
||||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> u8c;
|
|
||||||
std::regex re_entity("&#(x)?(\\d{1,8});");
|
|
||||||
std::smatch match;
|
|
||||||
|
|
||||||
while (std::regex_search(html, match, re_entity))
|
|
||||||
{
|
|
||||||
char32_t codepoint = 0;
|
|
||||||
// 'x' in front of the number means it's hexadecimal, else decimal.
|
|
||||||
if (match[1].length() == 1)
|
|
||||||
{
|
|
||||||
codepoint = std::stoi(match[2].str(), nullptr, 16);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
codepoint = std::stoi(match[2].str(), nullptr, 10);
|
|
||||||
}
|
|
||||||
str += match.prefix().str() + u8c.to_bytes(codepoint);
|
|
||||||
html = match.suffix().str();
|
|
||||||
}
|
|
||||||
str += html;
|
|
||||||
|
|
||||||
std::regex relt("<");
|
|
||||||
std::regex regt(">");
|
|
||||||
std::regex reamp("&");
|
|
||||||
std::regex requot(""");
|
|
||||||
std::regex reapos("'");
|
|
||||||
|
|
||||||
str = std::regex_replace(str, relt, "<");
|
|
||||||
str = std::regex_replace(str, regt, ">");
|
|
||||||
str = std::regex_replace(str, reamp, "&");
|
|
||||||
str = std::regex_replace(str, requot, "\"");
|
|
||||||
str = std::regex_replace(str, reapos, "\'");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<string> parse_website(const string &xml)
|
std::vector<string> parse_website(const string &xml)
|
||||||
{
|
{
|
||||||
Json::Value list;
|
Json::Value list;
|
||||||
|
@ -157,7 +117,7 @@ std::vector<string> parse_website(const string &xml)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
unescape_html(str);
|
Mastodon::API::unescape_html(str);
|
||||||
|
|
||||||
// Try to turn the HTML into human-readable text
|
// Try to turn the HTML into human-readable text
|
||||||
std::regex reparagraph("<p>");
|
std::regex reparagraph("<p>");
|
||||||
|
|
Loading…
Reference in New Issue
Block a user