// Then clean up what the regex left behind
$offset = 0;
- foreach($matches[0] as $url) {
- $url = htmlspecialchars_decode($url);
+ foreach($matches[0] as $orig_url) {
+ $url = htmlspecialchars_decode($orig_url);
// Make sure we didn't pick up an email address
if (preg_match('#^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$#i', $url)) continue;
- // Remove trailing punctuation
- $url = rtrim($url, '.?!,;:\'"`');
+ // Remove surrounding punctuation
+ $url = trim($url, '.?!,;:\'"`([<');
// Remove surrounding parens and the like
preg_match('/[)\]>]+$/', $url, $trailing);
// If the first part wasn't cap'd but the last part was, we captured too much
if ((!$prev_part && $last_part)) {
- $url = substr_replace($url, '', mb_strpos($url, '.'.$url_parts[2], 0));
+ $url = mb_substr($url, 0 , mb_strpos($url, '.'.$url_parts['2'], 0));
}
// Capture the new TLD
if (!in_array($url_parts[2], $tlds)) continue;
+ // Put the url back the way we found it.
+ $url = (mb_strpos($orig_url, htmlspecialchars($url)) === FALSE) ? $url:htmlspecialchars($url);
+
// Call user specified func
$modified_url = $callback($url);