Merge remote branch 'upstream/master'

author Michael Vogel <icarus@dabo.de>

Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)

committer Michael Vogel <icarus@dabo.de>

Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)
author Michael Vogel <icarus@dabo.de>
Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)
committer Michael Vogel <icarus@dabo.de>
Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)
diff --cc include/bb2diaspora.php

index 4a82635e57f065246adc702dc8d9d49b042ede47,b5feacea8b4ae5a70fbd54dff2729cc554de0b45..436412dbd487bc700465314ab812d5670f417df8
--- 1/include/bb2diaspora.php
--- 2/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@@@ -104,45 -192,62 +192,29 @@@ function diaspora_ol($s) 
   
   function bb2diaspora($Text,$preserve_nl = false) {
   
- -//////////////////////
- -// An attempt was made to convert bbcode to html and then to markdown
- -// consisting of the following lines.
- -// I'm undoing this as we have a lot of bbcode constructs which
- -// were simply getting lost, for instance bookmark, vimeo, video, youtube, events, etc.
- -// We can try this again, but need a very good test sequence to verify
- -// all the major bbcode constructs that we use are getting through.
- -//////////////////////
- -/*
--      // bbcode() will convert "[*]" into "<li>" with no closing "</li>"
--      // Markdownify() is unable to handle these, as it makes each new
--      // "<li>" into a deeper nested element until it crashes. So pre-format
--      // the lists as Diaspora lists before sending the $Text to bbcode()
--      //
--      // Note that to get nested lists to work for Diaspora, we would need
--      // to define the closing tag for the list elements. So nested lists
--      // are going to be flattened out in Diaspora for now
- /*    $endlessloop = 0;
- -
- -      $endlessloop = 0;
--      while ((((strpos($Text, "[/list]") !== false) && (strpos($Text, "[list") !== false)) ||
--             ((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) || 
--             ((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false))) && (++$endlessloop < 20)) {
--              $Text = preg_replace_callback("/\[list\](.*?)\[\/list\]/is", 'diaspora_ul', $Text);
--              $Text = preg_replace_callback("/\[list=1\](.*?)\[\/list\]/is", 'diaspora_ol', $Text);
--              $Text = preg_replace_callback("/\[list=i\](.*?)\[\/list\]/s",'diaspora_ol', $Text);
--              $Text = preg_replace_callback("/\[list=I\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
--              $Text = preg_replace_callback("/\[list=a\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
--              $Text = preg_replace_callback("/\[list=A\](.*?)\[\/list\]/s", 'diaspora_ol', $Text);
--              $Text = preg_replace_callback("/\[ul\](.*?)\[\/ul\]/is", 'diaspora_ul', $Text);
--              $Text = preg_replace_callback("/\[ol\](.*?)\[\/ol\]/is", 'diaspora_ol', $Text);
--      }
- -
--*/
++      // Re-enabling the converter again.
++      // The bbcode parser now handles youtube-links (and the other stuff) correctly.
++      // Additionally the html code is now fixed so that lists are now working.
+ 
         // Convert it to HTML - don't try oembed
- -//    $Text = bbcode($Text, $preserve_nl, false);
+ +      $Text = bbcode($Text, $preserve_nl, false);
   
         // Now convert HTML to Markdown
- -//    $md = new Markdownify(false, false, false);
- -//    $Text = $md->parseString($Text);
+ +      $md = new Markdownify(false, false, false);
+ +      $Text = $md->parseString($Text);
   
         // If the text going into bbcode() has a plain URL in it, i.e.
         // with no [url] tags around it, it will come out of parseString()
         // looking like: <http://url.com>, which gets removed by strip_tags().
         // So take off the angle brackets of any such URL
- -//    $Text = preg_replace("/<http(.*?)>/is", "http$1", $Text);
+ +      $Text = preg_replace("/<http(.*?)>/is", "http$1", $Text);
   
         // Remove all unconverted tags
- -//    $Text = strip_tags($Text);
- -
- -////// 
- -// end of bb->html->md conversion attempt
- -//////
+ +      $Text = strip_tags($Text);
   
- /*
+ 
++/* Old routine
+ 
         $ev = bbtoevent($Text);
   
         // Replace any html brackets with HTML Entities to prevent executing HTML or script
@@@ -309,7 -421,7 +388,8 @@@
         $Text = preg_replace("/\<(.*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism",'<$1$2=$3&$4>',$Text);
   
         $Text = preg_replace_callback('/\[(.*?)\]\((.*?)\)/ism','unescape_underscores_in_links',$Text);
+ 
+ +*/
   
         // Remove any leading or trailing whitespace, as this will mess up
         // the Diaspora signature verification and cause the item to disappear
diff --cc include/bbcode.php

index e212ec4aed0f1016282d43a0c4e702cca9f2f203,63dd9695e76f1bad88a19d922a9098eec53360b3..4aac33f112e9badf10953d260baabae11ebece90
--- 1/include/bbcode.php
--- 2/include/bbcode.php
+++ b/include/bbcode.php
@@@ -335,28 -386,10 +397,29 @@@ function bbcode($Text,$preserve_nl = fa
   
         // fix any escaped ampersands that may have been converted into links
         $Text = preg_replace("/\<(.*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism",'<$1$2=$3&$4>',$Text);
-       if(strlen($saved_image))
-               $Text = str_replace('[$#saved_image#$]','<img src="' . $saved_image .'" alt="' . t('Image/photo') . '" />',$Text);
+ 
+       if($saved_image)
+               $Text = bb_replace_images($Text, $saved_image);
   
+ +      // Clean up the HTML by loading and saving the HTML with the DOM
+ +      // Only do it when it has to be done - for performance reasons
+ +      if (!$tryoembed) {
+ +              $doc = new DOMDocument();
+ +              $doc->preserveWhiteSpace = false;
+ +
+ +              $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
+ +
+ +              $doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
+ +              @$doc->loadHTML($doctype."<html><body>".$Text."</body></html>");
+ +
+ +              $Text = $doc->saveHTML();
+ +              $Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
+ +
+ +              $Text = str_replace('<br></li>','</li>', $Text);
+ +
+ +              $Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
+ +      }
+ +
         call_hooks('bbcode',$Text);
   
         return $Text;
diff --cc include/dba.php
Simple merge
diff --cc include/items.php

index d888f314de1a366ac9e4ffddb600bebd8f7d94a6,6d58bd182aab30a91939f7bd4fa1faf141773fab..b81208f3666bda612614011667979fb2fa119084
--- 1/include/items.php
--- 2/include/items.php
+++ b/include/items.php
@@@ -278,31 -280,100 +280,123 @@@ function construct_activity_target($ite
         }
   
         return '';
- } 
+ }
+ 
+ /* limit_body_size()
+  *
+  *            The purpose of this function is to apply system message length limits to
+  *            imported messages without including any embedded photos in the length
+  */
+ if(! function_exists('limit_body_size')) {
+ function limit_body_size($body) {
+ 
+       logger('limit_body_size: start', LOGGER_DEBUG);
+ 
+       $maxlen = get_max_import_size();
+ 
+       // If the length of the body, including the embedded images, is smaller
+       // than the maximum, then don't waste time looking for the images
+       if($maxlen && (strlen($body) > $maxlen)) {
+ 
+               logger('limit_body_size: the total body length exceeds the limit', LOGGER_DEBUG);
+ 
+               $orig_body = $body;
+               $new_body = '';
+               $textlen = 0;
+               $max_found = false;
+ 
+               $img_start = strpos($orig_body, '[img');
+               $img_st_close = ($img_start !== false ? strpos(substr($orig_body, $img_start), ']') : false);
+               $img_end = ($img_start !== false ? strpos(substr($orig_body, $img_start), '[/img]') : false);
+               while(($img_st_close !== false) && ($img_end !== false)) {
+ 
+                       $img_st_close++; // make it point to AFTER the closing bracket
+                       $img_end += $img_start;
+                       $img_end += strlen('[/img]');
+ 
+                       if(! strcmp(substr($orig_body, $img_start + $img_st_close, 5), 'data:')) {
+                               // This is an embedded image
+ 
+                               if( ($textlen + $img_start) > $maxlen ) {
+                                       if($textlen < $maxlen) {
+                                               logger('limit_body_size: the limit happens before an embedded image', LOGGER_DEBUG);
+                                               $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen);
+                                               $textlen = $maxlen;
+                                       }
+                               }
+                               else {
+                                       $new_body = $new_body . substr($orig_body, 0, $img_start);
+                                       $textlen += $img_start;
+                               }
+ 
+                               $new_body = $new_body . substr($orig_body, $img_start, $img_end - $img_start);
+                       }
+                       else {
+ 
+                               if( ($textlen + $img_end) > $maxlen ) {
+                                       if($textlen < $maxlen) {
+                                               logger('limit_body_size: the limit happens before the end of a non-embedded image', LOGGER_DEBUG);
+                                               $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen);
+                                               $textlen = $maxlen;
+                                       }
+                               }
+                               else {
+                                       $new_body = $new_body . substr($orig_body, 0, $img_end);
+                                       $textlen += $img_end;
+                               }
+                       }
+                       $orig_body = substr($orig_body, $img_end);
+ 
+                       if($orig_body === false) // in case the body ends on a closing image tag
+                               $orig_body = '';
+ 
+                       $img_start = strpos($orig_body, '[img');
+                       $img_st_close = ($img_start !== false ? strpos(substr($orig_body, $img_start), ']') : false);
+                       $img_end = ($img_start !== false ? strpos(substr($orig_body, $img_start), '[/img]') : false);
+               }
+ 
+               if( ($textlen + strlen($orig_body)) > $maxlen) {
+                       if($textlen < $maxlen) {
+                               logger('limit_body_size: the limit happens after the end of the last image', LOGGER_DEBUG);
+                               $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen);
+                               $textlen = $maxlen;
+                       }
+               }
+               else {
+                       logger('limit_body_size: the text size with embedded images extracted did not violate the limit', LOGGER_DEBUG);
+                       $new_body = $new_body . $orig_body;
+                       $textlen += strlen($orig_body);
+               }
+ 
+               return $new_body;
+       }
+       else
+               return $body;
+ }}
   
+ +function title_is_body($title, $body) {
+ +
+ +      $title = strip_tags($title);
+ +      $title = trim($title);
+ +      $title = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $title);
+ +
+ +      $body = strip_tags($body);
+ +      $body = trim($body);
+ +      $body = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $body);
+ +
+ +      if (strlen($title) < strlen($body))
+ +              $body = substr($body, 0, strlen($title));
+ +
+ +      if (($title != $body) and (substr($title, -3) == "...")) {
+ +              $pos = strrpos($title, "...");
+ +              if ($pos > 0) {
+ +                      $title = substr($title, 0, $pos);
+ +                      $body = substr($body, 0, $pos);
+ +              }
+ +      }
+ +
+ +      return($title == $body);
+ +}
   
   
   
diff --cc include/network.php
Simple merge
author	Michael Vogel <icarus@dabo.de>
	Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)
committer	Michael Vogel <icarus@dabo.de>
	Fri, 13 Jul 2012 21:39:51 +0000 (23:39 +0200)
		1	2
include/bb2diaspora.php	patch \|	diff1 \|	diff2 \|	blob \| history
include/bbcode.php	patch \|	diff1 \|	diff2 \|	blob \| history
include/dba.php	patch \|	diff1 \|	diff2 \|	blob \| history
include/items.php	patch \|	diff1 \|	diff2 \|	blob \| history
include/network.php	patch \|	diff1 \|	diff2 \|	blob \| history