]> git.mxchange.org Git - quix0rs-gnu-social.git/commitdiff
Display scrubbed HTML attachments inline on attachment view page.
authorBrion Vibber <brion@pobox.com>
Wed, 17 Mar 2010 20:58:25 +0000 (13:58 -0700)
committerBrion Vibber <brion@pobox.com>
Wed, 17 Mar 2010 20:58:25 +0000 (13:58 -0700)
lib/attachmentlist.php

index dc6709d6780b68634c2570f1d8f77bde5fde1b94..22ae8ba07af1abd14171d43bc769e718296ce66c 100644 (file)
@@ -330,6 +330,13 @@ class Attachment extends AttachmentListItem
                     $this->out->element('param', array('name' => 'autoStart', 'value' => 1));
                     $this->out->elementEnd('object');
                     break;
+
+                case 'text/html':
+                    if ($this->attachment->filename) {
+                        // Locally-uploaded HTML. Scrub and display inline.
+                        $this->showHtmlFile($this->attachment);
+                    }
+                    break;
                 }
             }
         } else {
@@ -356,5 +363,60 @@ class Attachment extends AttachmentListItem
             }
         }
     }
+
+    protected function showHtmlFile(File $attachment)
+    {
+        $body = $this->scrubHtmlFile($attachment);
+        if ($body) {
+            $this->out->elementStart('div', array('class' => 'inline-attachment'));
+            $this->out->raw($body);
+            $this->out->elementEnd('div');
+        }
+    }
+
+    /**
+     * @return mixed false on failure, HTML fragment string on success
+     */
+    protected function scrubHtmlFile(File $attachment)
+    {
+        $path = File::path($attachment->filename);
+        if (!file_exists($path) || !is_readable($path)) {
+            common_log(LOG_ERR, "Missing local HTML attachment $path");
+            return false;
+        }
+        $raw = file_get_contents($path);
+
+        // Normalize...
+        $dom = new DOMDocument();
+        if(!$dom->loadHTML($raw)) {
+            common_log(LOG_ERR, "Bad HTML in local HTML attachment $path");
+            return false;
+        }
+
+        // Remove <script>s or htmlawed will dump their contents into output!
+        // Note: removing child nodes while iterating seems to mess things up,
+        // hence the double loop.
+        $scripts = array();
+        foreach ($dom->getElementsByTagName('script') as $script) {
+            $scripts[] = $script;
+        }
+        foreach ($scripts as $script) {
+            common_log(LOG_DEBUG, $script->textContent);
+            $script->parentNode->removeChild($script);
+        }
+
+        // Trim out everything outside the body...
+        $body = $dom->saveHTML();
+        $body = preg_replace('/^.*<body[^>]*>/is', '', $body);
+        $body = preg_replace('/<\/body[^>]*>.*$/is', '', $body);
+
+        require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php';
+        $config = array('safe' => 1,
+                        'deny_attribute' => 'id,style,on*',
+                        'comment' => 1); // remove comments
+        $scrubbed = htmLawed($body, $config);
+
+        return $scrubbed;
+    }
 }