From: Brion Vibber <brion@pobox.com>
Date: Thu, 30 Sep 2010 18:29:31 +0000 (-0700)
Subject: Always specify UTF-8 targt charset for html_entity_decode(); default is 8-bit ISO... 
X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=1acc7d66c63e0b9f794791e3a2e2b0f60bc2ebb1;p=quix0rs-gnu-social.git

Always specify UTF-8 targt charset for html_entity_decode(); default is 8-bit ISO-8859-1 which causes things to break when we later pass them through things that expect to work with UTF-8. For instance, running through preg_replace() with the /u option results in NULL, leading to problems with OStatus and SubMirror generating their plaintext versions and doing length-cropping.
---

diff --git a/lib/activityobject.php b/lib/activityobject.php
index 95615d581c..c957933d04 100644
--- a/lib/activityobject.php
+++ b/lib/activityobject.php
@@ -203,7 +203,7 @@ class ActivityObject
 
         $title = ActivityUtils::childHtmlContent($element, self::TITLE);
 
-        $this->title = html_entity_decode(strip_tags($title));
+        $this->title = html_entity_decode(strip_tags($title), ENT_QUOTES, 'UTF-8');
 
         $this->source  = $this->_getSource($element);
 
diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php
index 047435f668..10cee917e1 100644
--- a/plugins/OStatus/classes/Ostatus_profile.php
+++ b/plugins/OStatus/classes/Ostatus_profile.php
@@ -558,7 +558,7 @@ class Ostatus_profile extends Memcached_DataObject
         // Get (safe!) HTML and text versions of the content
 
         $rendered = $this->purify($sourceContent);
-        $content = html_entity_decode(strip_tags($rendered));
+        $content = html_entity_decode(strip_tags($rendered), ENT_QUOTES, 'UTF-8');
 
         $shortened = common_shorten_links($content);
 
@@ -569,7 +569,7 @@ class Ostatus_profile extends Memcached_DataObject
 
         if (Notice::contentTooLong($shortened)) {
             $attachment = $this->saveHTMLFile($note->title, $rendered);
-            $summary = html_entity_decode(strip_tags($note->summary));
+            $summary = html_entity_decode(strip_tags($note->summary), ENT_QUOTES, 'UTF-8');
             if (empty($summary)) {
                 $summary = $content;
             }
diff --git a/plugins/TwitterBridge/daemons/twitterstatusfetcher.php b/plugins/TwitterBridge/daemons/twitterstatusfetcher.php
index 590fa2954d..cef67b1806 100755
--- a/plugins/TwitterBridge/daemons/twitterstatusfetcher.php
+++ b/plugins/TwitterBridge/daemons/twitterstatusfetcher.php
@@ -321,7 +321,7 @@ class TwitterStatusFetcher extends ParallelizingDaemon
 
         $notice->is_local   = Notice::GATEWAY;
 
-        $notice->content  = html_entity_decode($status->text);
+        $notice->content  = html_entity_decode($status->text, ENT_QUOTES, 'UTF-8');
         $notice->rendered = $this->linkify($status);
 
         if (Event::handle('StartNoticeSave', array(&$notice))) {
diff --git a/scripts/importtwitteratom.php b/scripts/importtwitteratom.php
index 261dfb1d02..a29526f27e 100644
--- a/scripts/importtwitteratom.php
+++ b/scripts/importtwitteratom.php
@@ -89,7 +89,7 @@ function importActivityStream($user, $doc)
 
         $html = htmLawed($html, $config);
 
-        $content = html_entity_decode(strip_tags($html));
+        $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
 
         $notice = Notice::saveNew($user->id,
                                   $content,
diff --git a/scripts/install_cli.php b/scripts/install_cli.php
index 61fbe18ef6..dadbcf66f0 100755
--- a/scripts/install_cli.php
+++ b/scripts/install_cli.php
@@ -208,7 +208,7 @@ END_HELP;
         $breakout = preg_replace('/<a[^>+]\bhref="(.*)"[^>]*>(.*)<\/a>/',
                                  '\2 &lt;\1&gt;',
                                  $html);
-        return html_entity_decode(strip_tags($breakout));
+        return html_entity_decode(strip_tags($breakout), ENT_QUOTES, 'UTF-8');
     }
 }
 
diff --git a/scripts/restoreuser.php b/scripts/restoreuser.php
index de3816dd53..82eb9bbaa1 100644
--- a/scripts/restoreuser.php
+++ b/scripts/restoreuser.php
@@ -213,7 +213,7 @@ function postNote($user, $activity)
     // Get (safe!) HTML and text versions of the content
 
     $rendered = purify($sourceContent);
-    $content = html_entity_decode(strip_tags($rendered));
+    $content = html_entity_decode(strip_tags($rendered), ENT_QUOTES, 'UTF-8');
 
     $shortened = common_shorten_links($content);