]> git.mxchange.org Git - quix0rs-gnu-social.git/commitdiff
Improved the URL tests, and improve the matcher so more tests are passed. The remaini...
authorCraig Andrews <candrews@integralblue.com>
Tue, 25 Aug 2009 15:21:45 +0000 (11:21 -0400)
committerCraig Andrews <candrews@integralblue.com>
Tue, 25 Aug 2009 15:21:45 +0000 (11:21 -0400)
lib/util.php
tests/URLDetectionTest.php

index 2be4213e794f74cae3457c994a7360eeeabfa305..ee3fe5ddcfd4b924de89eff0412f88ca55d91c8c 100644 (file)
@@ -412,30 +412,34 @@ function common_render_text($text)
 function common_replace_urls_callback($text, $callback, $notice_id = null) {
     // Start off with a regex
     $regex = '#'.
-    '(?:^|[\s\(\)\[\]\{\}]+)'.
-        '('.
+    '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
+    '('.
         '(?:'.
             '(?:'. //Known protocols
                 '(?:'.
                     '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'.
                     '|'.
                     '(?:mailto|aim|tel|xmpp):'.
-                ')[^\s\/]+'.
+                ')'.
+                '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@
+                '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
             ')'.
             '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
             '|(?:'. //IPv6
                 '(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))'.
             ')|(?:'. //DNS
-                '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'.
-            ')'.
+                '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@
+                '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.(?:museum|travel|onion|local|[a-z]{2,4})'.
+            ')(?![\pN\pL\-\_])'.
         ')'.
         '(?:'.
-            '$|(?:'.
-                '/[^\s\(\)\[\]\{\}]*'.
-            ')'.
-        ')'.
+            '(?:\:\d+)?'. //:port
+            '(?:/[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path
+            '(?:\?[\pN\pL\$\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string
+            '(?:\#[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment
+        ')(?<![\?\.\,\#\)])'.
     ')'.
-    '#ix';
+    '#ixu';
     return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text);
 }
 
index e69f1a2c384e06548ac71480c293b6db55d9c7b3..05c02d6bb5a9ed96ec6953b3f00c4fbdff071a53 100644 (file)
@@ -25,10 +25,50 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
     static public function provider()
     {
         return array(
+                     array('127.0.0.1',
+                           '<a href="http://127.0.0.1/" rel="external">127.0.0.1</a>'),
+                     array('127.0.0.1/test.php',
+                           '<a href="http://127.0.0.1/test.php" rel="external">127.0.0.1/test.php</a>'),
+                     array('http://::1/test.php',
+                           '<a href="http://::1/test.php" rel="external">http://::1/test.php</a>'),
+                     array('http://::1',
+                           '<a href="http://::1/" rel="external">http://::1</a>'),
+                     array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php',
+                           '<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php</a>'),
+                     array('2001:4978:1b5:0:21d:e0ff:fe66:59ab',
+                           '<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab</a>'),
+                     array('http://127.0.0.1',
+                           '<a href="http://127.0.0.1/" rel="external">http://127.0.0.1</a>'),
+                     array('example.com',
+                           '<a href="http://example.com/" rel="external">example.com</a>'),
+                     array('example.com',
+                           '<a href="http://example.com/" rel="external">example.com</a>'),
+                     array('http://example.com',
+                           '<a href="http://example.com/" rel="external">http://example.com</a>'),
+                     array('http://example.com.',
+                           '<a href="http://example.com/" rel="external">http://example.com</a>.'),
+                     array('/var/lib/example.so',
+                           '/var/lib/example.so'),
                      array('example',
                            'example'),
+                     array('user@example.com',
+                           '<a href="mailto:user@example.com" rel="external">user@example.com</a>'),
+                     array('user_name+other@example.com',
+                           '<a href="mailto:user_name+other@example.com" rel="external">user_name+other@example.com</a>'),
+                     array('mailto:user@example.com',
+                           '<a href="mailto:user@example.com" rel="external">mailto:user@example.com</a>'),
+                     array('mailto:user@example.com?subject=test',
+                           '<a href="mailto:user@example.com?subject=test" rel="external">mailto:user@example.com?subject=test</a>'),
+                     array('#example',
+                           '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example'))) . '" rel="tag">example</a></span>'),
+                     array('#example.com',
+                           '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example.com'))) . '" rel="tag">example.com</a></span>'),
+                     array('#.net',
+                           '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('.net'))) . '" rel="tag">.net</a></span>'),
                      array('http://example',
                            '<a href="http://example/" rel="external">http://example</a>'),
+                     array('http://3xampl3',
+                           '<a href="http://3xampl3/" rel="external">http://3xampl3</a>'),
                      array('http://example/',
                            '<a href="http://example/" rel="external">http://example/</a>'),
                      array('http://example/path',
@@ -47,6 +87,10 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
                            '<a href="http://user:pass@example.com/" rel="external">http://user:pass@example.com</a>'),
                      array('http://example.com:8080',
                            '<a href="http://example.com:8080/" rel="external">http://example.com:8080</a>'),
+                     array('http://example.com:8080/test.php',
+                           '<a href="http://example.com:8080/test.php" rel="external">http://example.com:8080/test.php</a>'),
+                     array('example.com:8080/test.php',
+                           '<a href="http://example.com:8080/test.php" rel="external">example.com:8080/test.php</a>'),
                      array('http://www.example.com',
                            '<a href="http://www.example.com/" rel="external">http://www.example.com</a>'),
                      array('http://example.com/',
@@ -59,60 +103,65 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
                            '<a href="http://example.com/path.html#fragment" rel="external">http://example.com/path.html#fragment</a>'),
                      array('http://example.com/path.php?foo=bar&bar=foo',
                            '<a href="http://example.com/path.php?foo=bar&amp;bar=foo" rel="external">http://example.com/path.php?foo=bar&amp;bar=foo</a>'),
+                     array('http://example.com.',
+                           '<a href="http://example.com/" rel="external">http://example.com</a>.'),
                      array('http://müllärör.de',
-                           '<a href="http://müllärör.de" rel="external">http://müllärör.de</a>'),
+                           '<a href="http://m&#xFC;ll&#xE4;r&#xF6;r.de/" rel="external">http://müllärör.de</a>'),
                      array('http://ﺱﺲﺷ.com',
-                           '<a href="http://ﺱﺲﺷ.com" rel="external">http://ﺱﺲﺷ.com</a>'),
+                           '<a href="http://&#xFEB1;&#xFEB2;&#xFEB7;.com/" rel="external">http://ﺱﺲﺷ.com</a>'),
                      array('http://сделаткартинки.com',
-                           '<a href="http://сделаткартинки.com" rel="external">http://сделаткартинки.com</a>'),
+                           '<a href="http://&#x441;&#x434;&#x435;&#x43B;&#x430;&#x442;&#x43A;&#x430;&#x440;&#x442;&#x438;&#x43D;&#x43A;&#x438;.com/" rel="external">http://сделаткартинки.com</a>'),
                      array('http://tūdaliņ.lv',
-                           '<a href="http://tūdaliņ.lv" rel="external">http://tūdaliņ.lv</a>'),
+                           '<a href="http://t&#x16B;dali&#x146;.lv/" rel="external">http://tūdaliņ.lv</a>'),
                      array('http://brændendekærlighed.com',
-                           '<a href="http://brændendekærlighed.com" rel="external">http://brændendekærlighed.com</a>'),
+                           '<a href="http://br&#xE6;ndendek&#xE6;rlighed.com/" rel="external">http://brændendekærlighed.com</a>'),
                      array('http://あーるいん.com',
-                           '<a href="http://あーるいん.com" rel="external">http://あーるいん.com</a>'),
+                           '<a href="http://&#x3042;&#x30FC;&#x308B;&#x3044;&#x3093;.com/" rel="external">http://あーるいん.com</a>'),
                      array('http://예비교사.com',
-                           '<a href="http://예비교사.com" rel="external">http://예비교사.com</a>'),
+                           '<a href="http://&#xC608;&#xBE44;&#xAD50;&#xC0AC;.com/" rel="external">http://예비교사.com</a>'),
                      array('http://example.com.',
-                           '<a href="http://example.com" rel="external">http://example.com</a>.'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>.'),
                      array('http://example.com?',
-                           '<a href="http://example.com" rel="external">http://example.com</a>?'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>?'),
                      array('http://example.com!',
-                           '<a href="http://example.com" rel="external">http://example.com</a>!'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>!'),
                      array('http://example.com,',
-                           '<a href="http://example.com" rel="external">http://example.com</a>,'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>,'),
                      array('http://example.com;',
-                           '<a href="http://example.com" rel="external">http://example.com</a>;'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>;'),
                      array('http://example.com:',
-                           '<a href="http://example.com" rel="external">http://example.com</a>:'),
+                           '<a href="http://example.com/" rel="external">http://example.com</a>:'),
                      array('\'http://example.com\'',
-                           '\'<a href="http://example.com" rel="external">http://example.com</a>\''),
+                           '\'<a href="http://example.com/" rel="external">http://example.com</a>\''),
                      array('"http://example.com"',
-                           '"<a href="http://example.com" rel="external">http://example.com</a>"'),
-                     array('http://example.com\r',
-                           '<a href="http://example.com" rel="external">http://example.com</a>'),
+                           '&quot;<a href="http://example.com/" rel="external">http://example.com</a>&quot;'),
+                     array('http://example.com',
+                           '<a href="http://example.com/" rel="external">http://example.com</a>'),
                      array('(http://example.com)',
-                           '(<a href="http://example.com" rel="external">http://example.com</a>)'),
+                           '(<a href="http://example.com/" rel="external">http://example.com</a>)'),
                      array('[http://example.com]',
-                           '[<a href="http://example.com" rel="external">http://example.com</a>]'),
+                           '[<a href="http://example.com/" rel="external">http://example.com</a>]'),
                      array('<http://example.com>',
-                           '<<a href="http://example.com" rel="external">http://example.com</a>>'),
+                           '&lt;<a href="http://example.com/" rel="external">http://example.com</a>&gt;'),
                      array('http://example.com/path/(foo)/bar',
                            '<a href="http://example.com/path/(foo)/bar" rel="external">http://example.com/path/(foo)/bar</a>'),
+                     //Not a valid url - urls cannot contain unencoded square brackets
                      array('http://example.com/path/[foo]/bar',
                            '<a href="http://example.com/path/[foo]/bar" rel="external">http://example.com/path/[foo]/bar</a>'),
                      array('http://example.com/path/foo/(bar)',
                            '<a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>'),
-                     array('http://example.com/path/foo/[bar]',
-                           '<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'),
+                     //Not a valid url - urls cannot contain unencoded square brackets
+                     //array('http://example.com/path/foo/[bar]',
+                     //      '<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'),
                      array('Hey, check out my cool site http://example.com okay?',
-                           'Hey, check out my cool site <a href="http://example.com" rel="external">http://example.com</a> okay?'),
+                           'Hey, check out my cool site <a href="http://example.com/" rel="external">http://example.com</a> okay?'),
                      array('What about parens (e.g. http://example.com/path/foo/(bar))?',
                            'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)?'),
                      array('What about parens (e.g. http://example.com/path/foo/(bar)?',
                            'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>?'),
                      array('What about parens (e.g. http://example.com/path/foo/(bar).)?',
                            'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>.)?'),
+                     //Not a valid url - urls cannot contain unencoded commas
                      array('What about parens (e.g. http://example.com/path/(foo,bar)?',
                            'What about parens (e.g. <a href="http://example.com/path/(foo,bar)" rel="external">http://example.com/path/(foo,bar)</a>?'),
                      array('Unbalanced too (e.g. http://example.com/path/((((foo)/bar)?',
@@ -124,51 +173,51 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
                      array('Unbalanced too (e.g. http://example.com/path/foo/(bar))))?',
                            'Unbalanced too (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)))?'),
                      array('example.com',
-                           '<a href="http://example.com" rel="external">example.com</a>'),
+                           '<a href="http://example.com/" rel="external">example.com</a>'),
                      array('example.org',
-                           '<a href="http://example.org" rel="external">example.org</a>'),
+                           '<a href="http://example.org/" rel="external">example.org</a>'),
                      array('example.co.uk',
-                           '<a href="http://example.co.uk" rel="external">example.co.uk</a>'),
+                           '<a href="http://example.co.uk/" rel="external">example.co.uk</a>'),
                      array('www.example.co.uk',
-                           '<a href="http://www.example.co.uk" rel="external">www.example.co.uk</a>'),
+                           '<a href="http://www.example.co.uk/" rel="external">www.example.co.uk</a>'),
                      array('farm1.images.example.co.uk',
-                           '<a href="http://farm1.images.example.co.uk" rel="external">farm1.images.example.co.uk</a>'),
+                           '<a href="http://farm1.images.example.co.uk/" rel="external">farm1.images.example.co.uk</a>'),
                      array('example.museum',
-                           '<a href="http://example.museum" rel="external">example.museum</a>'),
+                           '<a href="http://example.museum/" rel="external">example.museum</a>'),
                      array('example.travel',
-                           '<a href="http://example.travel" rel="external">example.travel</a>'),
+                           '<a href="http://example.travel/" rel="external">example.travel</a>'),
                      array('example.com.',
-                           '<a href="http://example.com" rel="external">example.com</a>.'),
+                           '<a href="http://example.com/" rel="external">example.com</a>.'),
                      array('example.com?',
-                           '<a href="http://example.com" rel="external">example.com</a>?'),
+                           '<a href="http://example.com/" rel="external">example.com</a>?'),
                      array('example.com!',
-                           '<a href="http://example.com" rel="external">example.com</a>!'),
+                           '<a href="http://example.com/" rel="external">example.com</a>!'),
                      array('example.com,',
-                           '<a href="http://example.com" rel="external">example.com</a>,'),
+                           '<a href="http://example.com/" rel="external">example.com</a>,'),
                      array('example.com;',
-                           '<a href="http://example.com" rel="external">example.com</a>;'),
+                           '<a href="http://example.com/" rel="external">example.com</a>;'),
                      array('example.com:',
-                           '<a href="http://example.com" rel="external">example.com</a>:'),
+                           '<a href="http://example.com/" rel="external">example.com</a>:'),
                      array('\'example.com\'',
-                           '\'<a href="http://example.com" rel="external">example.com</a>\''),
+                           '\'<a href="http://example.com/" rel="external">example.com</a>\''),
                      array('"example.com"',
-                           '"<a href="http://example.com" rel="external">example.com</a>"'),
-                     array('example.com\r',
-                           '<a href="http://example.com" rel="external">example.com</a>'),
+                           '&quot;<a href="http://example.com/" rel="external">example.com</a>&quot;'),
+                     array('example.com',
+                           '<a href="http://example.com/" rel="external">example.com</a>'),
                      array('(example.com)',
-                           '(<a href="http://example.com" rel="external">example.com</a>)'),
+                           '(<a href="http://example.com/" rel="external">example.com</a>)'),
                      array('[example.com]',
-                           '[<a href="http://example.com" rel="external">example.com</a>]'),
+                           '[<a href="http://example.com/" rel="external">example.com</a>]'),
                      array('<example.com>',
-                           '<<a href="http://example.com" rel="external">example.com</a>>'),
+                           '&lt;<a href="http://example.com/" rel="external">example.com</a>&gt;'),
                      array('Hey, check out my cool site example.com okay?',
-                           'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a> okay?'),
+                           'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a> okay?'),
                      array('Hey, check out my cool site example.com.I made it.',
-                           'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.I made it.'),
+                           'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.I made it.'),
                      array('Hey, check out my cool site example.com.Funny thing...',
-                           'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.Funny thing...'),
+                           'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.Funny thing...'),
                      array('Hey, check out my cool site example.com.You will love it.',
-                           'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.You will love it.'),
+                           'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.You will love it.'),
                      array('What about parens (e.g. example.com/path/foo/(bar))?',
                            'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">example.com/path/foo/(bar)</a>)?'),
                      array('What about parens (e.g. example.com/path/foo/(bar)?',