From af1993c71f4c35345f8bb5f88ccd7e7976f8f1dc Mon Sep 17 00:00:00 2001
From: tastytea <tastytea@tastytea.de>
Date: Sun, 2 Feb 2020 15:01:50 +0100
Subject: [PATCH 1/2] Add hyperlinks to git commands in contributing
 guidelines.

---
 CONTRIBUTING.adoc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/CONTRIBUTING.adoc b/CONTRIBUTING.adoc
index eab82a8..720981f 100644
--- a/CONTRIBUTING.adoc
+++ b/CONTRIBUTING.adoc
@@ -4,6 +4,8 @@
 :contact-email: tastytea@tastytea.de
 :contact-xmpp: {contact-email}
 :contact-fediverse: https://likeable.space/users/tastytea
+:uri-git-format-patch: https://git-scm.com/docs/git-format-patch
+:uri-git-send-email: https://git-scm.com/docs/git-send-email
 
 == How to contribute
 
@@ -26,4 +28,5 @@ Please use similar coding conventions as the rest of the project. The basic rule
 to remember is to write code in the same style as the existing/surrounding code.
 
 You can also send me your patches via mailto:{contact-email}[E-Mail], ideally
-using `git format-patch` or `git send-email`.
+using link:{uri-git-format-patch}[git format-patch] or
+link:{uri-git-send-email}[git send-email].

From 3bbc24ba577e5caab57cb9e5abb934483af9e3c8 Mon Sep 17 00:00:00 2001
From: tastytea <tastytea@tastytea.de>
Date: Sun, 2 Feb 2020 15:02:19 +0100
Subject: [PATCH 2/2] Decrease loops drastically in unescape_html().

---
 src/helpers.cpp | 73 ++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/src/helpers.cpp b/src/helpers.cpp
index fedcf81..ce5f679 100644
--- a/src/helpers.cpp
+++ b/src/helpers.cpp
@@ -16,57 +16,34 @@
 
 #include "helpers.hpp"
 
-#include <array>
 #include <codecvt>
 #include <locale>
+#include <map>
 #include <regex>
+#include <stdexcept>
 #include <string_view>
-#include <utility>
 
 namespace mastodonpp
 {
 
-using std::array;
-using std::stol;
+using std::stoul;
 using std::codecvt_utf8;
 using std::wstring_convert;
+using std::map;
 using std::regex;
 using std::regex_search;
 using std::smatch;
 using std::string_view;
 using std::move;
-using std::pair;
 
 string unescape_html(string html)
 {
     string buffer{move(html)};
     string output;
 
-    // Used to convert int to utf-8 char.
-    wstring_convert<codecvt_utf8<char32_t>, char32_t> u8c;
-    // Matches numbered entities between 1 and 8 digits, decimal or hexadecimal.
-    const regex re_entity{"&#(x)?([[:alnum:]]{1,8});"};
-    smatch match;
-
-    while (regex_search(buffer, match, re_entity))
-    {
-        const char32_t codepoint{[&match]
-        {
-            // 'x' in front of the number means it's hexadecimal, else decimal.
-            if (match[1].length() == 1)
-            {
-                return static_cast<char32_t>(stol(match[2].str(), nullptr, 16));
-            }
-            return static_cast<char32_t>(stol(match[2].str(), nullptr, 10));
-        }()};
-        output += match.prefix().str() + u8c.to_bytes(codepoint);
-        buffer = match.suffix().str();
-    }
-    output += buffer;
-
     // Source: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_
     //         entity_references#Character_entity_references_in_HTML
-    constexpr array<const pair<const string_view, const char32_t>, 258> names
+    const map<string_view, char32_t> names
         {{
             { "exclamation", 0x0021 },
             { "quot", 0x0022 },
@@ -328,11 +305,45 @@ string unescape_html(string html)
             { "diams", 0x2666 }
         }};
 
-    for (const auto &pair : names)
+    // Used to convert number to utf-8 char.
+    wstring_convert<codecvt_utf8<char32_t>, char32_t> u8c;
+    // Matches numbered entities between 1 and 8 digits, decimal or hexadecimal,
+    // or named entities.
+    const regex re_entity{"&(#(x)?([[:alnum:]]{1,8})"
+        "|[^;[:space:][:punct:]]+);"};
+    smatch match;
+
+    while (regex_search(buffer, match, re_entity))
     {
-        const regex re((string("&") += pair.first) += ';');
-        output = regex_replace(output, re, u8c.to_bytes(pair.second));
+        output += match.prefix().str();
+        try
+        {
+            const char32_t codepoint{[&match, &names]
+            {
+                // If it doesn't start with a '#' it is a named entity.
+                if (match[1].str()[0] != '#')
+                {
+                    return names.at(match[1].str());
+                }
+                // 'x' after '#' means the number is hexadecimal.
+                if (match[2].length() == 1)
+                {
+                    return static_cast<char32_t>(stoul(match[3].str(),
+                                                      nullptr, 16));
+                }
+                // '#' without 'x' means the number is decimal.
+                return static_cast<char32_t>(stoul(match[3].str(),
+                                                   nullptr, 10));
+            }()};
+            output += u8c.to_bytes(codepoint);
+        }
+        catch (const std::out_of_range &) // Named entity could not be found.
+        {
+            output += match.str();
+        }
+        buffer = match.suffix().str();
     }
+    output += buffer;
 
     return output;
 }