From 28503cc3bdc54565dcfe27b719078c788b9576ba Mon Sep 17 00:00:00 2001
From: tastytea <tastytea@tastytea.de>
Date: Sat, 18 May 2019 01:47:10 +0200
Subject: [PATCH] Only attempt to extract title and description from HTML
 files.

---
 src/uri.cpp | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/src/uri.cpp b/src/uri.cpp
index c5444b6..ca5763e 100644
--- a/src/uri.cpp
+++ b/src/uri.cpp
@@ -82,16 +82,28 @@ const html_extract URI::get()
 
 const string URI::extract_title(const string &html)
 {
-    smatch match;
-    regex_search(html, match, regex("<title>([^<]+)"));
-    return remove_newlines(match[1].str());
+    const regex re_htmlfile("\\.(.?html?|xml|rss)$");
+    if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
+    {
+        smatch match;
+        regex_search(html, match, regex("<title>([^<]+)"));
+        return remove_newlines(match[1].str());
+    }
+
+    return "";
 }
 
 const string URI::extract_description(const string &html)
 {
-    smatch match;
-    regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
-    return remove_newlines(match[1].str());
+    const regex re_htmlfile("\\.(.?html?|xml|rss)$");
+    if (_uri.substr(0, 4) == "http" || regex_search(_uri, re_htmlfile))
+    {
+        smatch match;
+        regex_search(html, match, regex("description\"[^>]+content=\"([^\"]+)"));
+        return remove_newlines(match[1].str());
+    }
+
+    return "";
 }
 
 const string URI::strip_html(const string &html)