From 854e208c05ae1cfb584911f93f70f81878d0dff1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 2 Nov 2008 04:02:41 -0500 Subject: [PATCH] heuristics for paren linking (thanks @brion!) darcs-hash:20081102090241-5ed1f-f5278c69aa0c7427294b40b91f8957530b07062f.gz --- lib/util.php | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/util.php b/lib/util.php index c65adc76bc..44c75156c7 100644 --- a/lib/util.php +++ b/lib/util.php @@ -711,12 +711,40 @@ function common_render_text($text) { $r = htmlspecialchars($text); $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); - $r = preg_replace('@https?://[^)\]>\s]+@', '\0', $r); + $r = preg_replace_callback('@https?://[^\]>\s]+@', 'common_render_uri_thingy', $r); $r = preg_replace('/(^|\s+)#([A-Za-z0-9_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); # XXX: machine tags return $r; } +function common_render_uri_thingy($matches) { + $uri = $matches[0]; + $trailer = ''; + + # Some heuristics for extracting URIs from surrounding punctuation + # Strip from trailing text... + if (preg_match('/^(.*)([,.:"\']+)$/', $uri, $matches)) { + $uri = $matches[1]; + $trailer = $matches[2]; + } + + $pairs = array( + ']' => '[', # technically disallowed in URIs, but used in Java docs + ')' => '(', # far too frequent in Wikipedia and MSDN + ); + $final = substr($uri, -1, 1); + if (isset($pairs[$final])) { + $openers = substr_count($uri, $pairs[$final]); + $closers = substr_count($uri, $final); + if ($closers > $openers) { + // Assume the paren was opened outside the URI + $uri = substr($uri, 0, -1); + $trailer = $final . $trailer; + } + } + return '' . $uri . '' . $trailer; +} + function common_tag_link($tag) { $canonical = common_canonical_tag($tag); $url = common_local_url('tag', array('tag' => $canonical)); -- 2.39.5