}
// Move all the entities into order so we can
- // replace them in reverse order and thus
- // not mess up their indices
+ // replace them and escape surrounding plaintext
+ // in order
$toReplace = array();
}
}
- // sort in reverse order by key
+ // sort in forward order by key
- krsort($toReplace);
+ ksort($toReplace);
+
+ $result = '';
+ $cursor = 0;
foreach ($toReplace as $part) {
list($type, $object) = $part;
- $orig = mb_substr($text, $object->indices[0], $object->indices[1] - $object->indices[0]);
+ $start = $object->indices[0];
+ $end = $object->indices[1];
+ if ($cursor < $start) {
+ // Copy in the preceding plaintext
+ $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor));
+ $cursor = $start;
+ }
+ $orig = $this->twitEscape(mb_substr($text, $start, $end - $start));
switch($type) {
case self::URL:
$linkText = $this->makeUrlLink($object, $orig);
$linkText = $this->makeMentionLink($object, $orig);
break;
default:
+ $linkText = $orig;
continue;
}
- $text = mb_substr($text, 0, $object->indices[0]) . $linkText . mb_substr($text, $object->indices[1]);
+ $result .= $linkText;
+ $cursor = $end;
}
- return $text;
+ $last = $this->twitEscape(mb_substr($text, $cursor));
+ $result .= $last;
+
+ return $result;
+ }
+
+ function twitEscape($str)
+ {
+ // Twitter seems to preemptive turn < and > into < and >
+ // but doesn't for &, so while you may have some magic protection
+ // against XSS by not bothing to escape manually, you still get
+ // invalid XHTML. Thanks!
+ //
+ // Looks like their web interface pretty much sends anything
+ // through intact, so.... to do equivalent, decode all entities
+ // and then re-encode the special ones.
+ return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8'));
}
function makeUrlLink($object, $orig)