rewrote parse_website() to return vector of Mastodon::Easy::Status
This commit is contained in:
parent
473442f612
commit
929cb540d4
|
@ -5,6 +5,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <jsoncpp/json/json.h>
|
#include <jsoncpp/json/json.h>
|
||||||
|
#include <mastodon-cpp/easy/easy.hpp>
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
|
@ -16,7 +17,7 @@ extern std::string profile;
|
||||||
std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
|
std::uint16_t read_config(string &instance, string &access_token, string &feedurl);
|
||||||
const bool write_config();
|
const bool write_config();
|
||||||
|
|
||||||
std::vector<string> parse_website(const string &xml);
|
std::vector<Mastodon::Easy::Status> parse_website(const string &xml);
|
||||||
void individual_fixes(string &str);
|
void individual_fixes(string &str);
|
||||||
|
|
||||||
const std::uint16_t http_get(const string &feedurl,
|
const std::uint16_t http_get(const string &feedurl,
|
||||||
|
|
|
@ -27,13 +27,14 @@
|
||||||
#include <boost/property_tree/ptree.hpp>
|
#include <boost/property_tree/ptree.hpp>
|
||||||
#include <boost/property_tree/xml_parser.hpp>
|
#include <boost/property_tree/xml_parser.hpp>
|
||||||
#include <mastodon-cpp/mastodon-cpp.hpp>
|
#include <mastodon-cpp/mastodon-cpp.hpp>
|
||||||
|
#include <mastodon-cpp/easy/all.hpp>
|
||||||
#include "mastorss.hpp"
|
#include "mastorss.hpp"
|
||||||
|
|
||||||
using std::cerr;
|
using std::cerr;
|
||||||
using std::string;
|
using std::string;
|
||||||
namespace pt = boost::property_tree;
|
namespace pt = boost::property_tree;
|
||||||
|
|
||||||
std::vector<string> parse_website(const string &xml)
|
std::vector<Mastodon::Easy::Status> parse_website(const string &xml)
|
||||||
{
|
{
|
||||||
Json::Value list;
|
Json::Value list;
|
||||||
std::vector<string> watchwords;
|
std::vector<string> watchwords;
|
||||||
|
@ -67,7 +68,7 @@ std::vector<string> parse_website(const string &xml)
|
||||||
pt::ptree rss;
|
pt::ptree rss;
|
||||||
std::istringstream iss(xml);
|
std::istringstream iss(xml);
|
||||||
pt::read_xml(iss, rss);
|
pt::read_xml(iss, rss);
|
||||||
std::vector<string> ret;
|
std::vector<Mastodon::Easy::Status> ret;
|
||||||
|
|
||||||
for (const pt::ptree::value_type &v : rss.get_child("rss.channel"))
|
for (const pt::ptree::value_type &v : rss.get_child("rss.channel"))
|
||||||
{
|
{
|
||||||
|
@ -79,15 +80,28 @@ std::vector<string> parse_website(const string &xml)
|
||||||
string link = v.second.get_child("link").data();
|
string link = v.second.get_child("link").data();
|
||||||
string desc = v.second.get_child("description").data();
|
string desc = v.second.get_child("description").data();
|
||||||
|
|
||||||
string str = title;
|
Mastodon::Easy::Status status;
|
||||||
|
string content = "";
|
||||||
|
if (config[profile]["titles_as_cw"].asBool())
|
||||||
|
{
|
||||||
|
status.spoiler_text(title);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
content = title;
|
||||||
|
}
|
||||||
if (!config[profile]["titles_only"].asBool())
|
if (!config[profile]["titles_only"].asBool())
|
||||||
{
|
{
|
||||||
str += "\n\n" + desc;
|
if (!content.empty())
|
||||||
|
{
|
||||||
|
content += "\n\n";
|
||||||
|
}
|
||||||
|
content += desc;
|
||||||
|
|
||||||
// Shrink overly long texts, to speed up replace operations
|
// Shrink overly long texts, to speed up replace operations
|
||||||
if (str.length() > 2000)
|
if (content.length() > 2000)
|
||||||
{
|
{
|
||||||
str.resize(2000);
|
content.resize(2000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,7 +131,7 @@ std::vector<string> parse_website(const string &xml)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
str = Mastodon::API::unescape_html(str);
|
content = Mastodon::API::unescape_html(content);
|
||||||
|
|
||||||
// Try to turn the HTML into human-readable text
|
// Try to turn the HTML into human-readable text
|
||||||
std::regex reparagraph("<p>");
|
std::regex reparagraph("<p>");
|
||||||
|
@ -125,49 +139,52 @@ std::vector<string> parse_website(const string &xml)
|
||||||
std::regex recdata2("\\]\\]>");
|
std::regex recdata2("\\]\\]>");
|
||||||
std::regex restrip("<[^>]*>");
|
std::regex restrip("<[^>]*>");
|
||||||
|
|
||||||
individual_fixes(str);
|
individual_fixes(content);
|
||||||
|
|
||||||
str = std::regex_replace(str, reparagraph, "\n\n");
|
content = std::regex_replace(content, reparagraph, "\n\n");
|
||||||
str = std::regex_replace(str, recdata1, "");
|
content = std::regex_replace(content, recdata1, "");
|
||||||
str = std::regex_replace(str, recdata2, "");
|
content = std::regex_replace(content, recdata2, "");
|
||||||
str = std::regex_replace(str, restrip, "");
|
content = std::regex_replace(content, restrip, "");
|
||||||
str = std::regex_replace(str, std::regex("\\r"), ""); // remove \r
|
// remove \r
|
||||||
|
content = std::regex_replace(content, std::regex("\\r"), "");
|
||||||
// replace NO-BREAK SPACE with space (UTF-8: 0xc2a0)
|
// replace NO-BREAK SPACE with space (UTF-8: 0xc2a0)
|
||||||
str = std::regex_replace(str, std::regex("\u00a0"), " ");
|
content = std::regex_replace(content, std::regex("\u00a0"), " ");
|
||||||
str = std::regex_replace(str, std::regex("\\n[ \t]+\\n"), ""); // remove whitespace between newlines
|
// remove whitespace between newlines
|
||||||
str = std::regex_replace(str, std::regex("\\n{3,}"), "\n\n"); // remove excess newlines
|
content = std::regex_replace(content, std::regex("\\n[ \t]+\\n"), "");
|
||||||
|
// remove excess newlines
|
||||||
|
content = std::regex_replace(content, std::regex("\\n{3,}"), "\n\n");
|
||||||
|
|
||||||
for (const string &hashtag : watchwords)
|
for (const string &hashtag : watchwords)
|
||||||
{
|
{
|
||||||
std::regex rehashtag("([[:space:][:punct:]]|^)(" + hashtag + ")([[:space:][:punct:]]|$)",
|
std::regex rehashtag("([[:space:][:punct:]]|^)(" + hashtag +
|
||||||
std::regex_constants::icase);
|
")([[:space:][:punct:]]|$)", std::regex_constants::icase);
|
||||||
str = std::regex_replace(str, rehashtag, "$1#$2$3",
|
content = std::regex_replace(content, rehashtag, "$1#$2$3",
|
||||||
std::regex_constants::format_first_only);
|
std::regex_constants::format_first_only);
|
||||||
}
|
}
|
||||||
// Why is this necessary? Why does ##hashtag happen?
|
// Why is this necessary? Why does ##hashtag happen?
|
||||||
str = std::regex_replace(str, std::regex("##"), "#");
|
content = std::regex_replace(content, std::regex("##"), "#");
|
||||||
if ((str.size() + link.size()) > static_cast<std::uint16_t>(max_size - 15))
|
if ((content.size() + link.size()) > static_cast<std::uint16_t>(max_size - 15))
|
||||||
{
|
{
|
||||||
str.resize((max_size - link.size() -
|
content.resize((max_size - link.size() -
|
||||||
config[profile]["append"].asString().length()
|
config[profile]["append"].asString().length() - 4));
|
||||||
- 4));
|
content.resize(content.rfind(' ')); // Cut at word boundary
|
||||||
str.resize(str.rfind(' ')); // Cut at word boundary
|
content += " […]";
|
||||||
str += " […]";
|
|
||||||
}
|
}
|
||||||
// Remove trailing newlines
|
// Remove trailing newlines
|
||||||
while (str.back() == '\n' ||
|
while (content.back() == '\n' ||
|
||||||
str.back() == '\r')
|
content.back() == '\r')
|
||||||
{
|
{
|
||||||
str.resize(str.length() - 1);
|
content.resize(content.length() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
str += "\n\n" + link;
|
content += "\n\n" + link;
|
||||||
|
|
||||||
if (!config[profile]["append"].empty())
|
if (!config[profile]["append"].empty())
|
||||||
{
|
{
|
||||||
str += "\n\n" + config[profile]["append"].asString();
|
content += "\n\n" + config[profile]["append"].asString();
|
||||||
}
|
}
|
||||||
ret.push_back(str);
|
status.content(content);
|
||||||
|
ret.push_back(status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user