]> git.mxchange.org Git - quix0rs-gnu-social.git/commitdiff
Ticket #1281: JID validation now more or less follows spec instead of calling e-mail...
authorBrion Vibber <brion@pobox.com>
Wed, 31 Mar 2010 00:35:27 +0000 (17:35 -0700)
committerBrion Vibber <brion@pobox.com>
Wed, 31 Mar 2010 00:35:27 +0000 (17:35 -0700)
Basic splitting/validation code submitted via http://status.net/wiki/XMPP/JID_validation -- Copyright 2009 Patrick Georgi <patrick@georgi-clan.de> Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact.

Added PEAR Net_IDNA package to extlib to handle IDN normalization (also used by Validate's email verifier if present).

* added test suite, supplemented my own test cases with JID validation and normalization test cases from libpurple
* follows XMPP rules for validation of name part
* fixes for normalization with non-ASCII names
* will do domain checks if $config['email']['check_domain'] is on, checking for an XMPP-server SRV record or any lookup. (We don't actually need to ping those direct though.)
* some more obscure stringprep validation rules aren't quite followed yet, but we err on the side of permissiveness.
* we still don't actually let you save your address with a resource on it, as we strip resources when looking up users who've sent us presence or message updates. I would recommend saving the outgoing resource as a separate field if/when we add that..?

actions/imsettings.php
extlib/Net/IDNA.php [new file with mode: 0644]
extlib/Net/IDNA/php5.php [new file with mode: 0644]
lib/jabber.php
lib/util.php
tests/JidValidateTest.php [new file with mode: 0644]

index af4915843d5f799fab73d4075795d65aa23dc648..c3360fb12a5b16483428f118d10a9699b703e3d0 100644 (file)
@@ -292,7 +292,7 @@ class ImsettingsAction extends ConnectSettingsAction
             $this->showForm(_('Cannot normalize that Jabber ID'));
             return;
         }
-        if (!jabber_valid_base_jid($jabber)) {
+        if (!jabber_valid_base_jid($jabber, common_config('email', 'domain_check'))) {
             $this->showForm(_('Not a valid Jabber ID'));
             return;
         } else if ($user->jabber == $jabber) {
diff --git a/extlib/Net/IDNA.php b/extlib/Net/IDNA.php
new file mode 100644 (file)
index 0000000..987a37e
--- /dev/null
@@ -0,0 +1,100 @@
+<?php
+
+// {{{ license
+
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
+//
+// +----------------------------------------------------------------------+
+// | This library is free software; you can redistribute it and/or modify |
+// | it under the terms of the GNU Lesser General Public License as       |
+// | published by the Free Software Foundation; either version 2.1 of the |
+// | License, or (at your option) any later version.                      |
+// |                                                                      |
+// | This library is distributed in the hope that it will be useful, but  |
+// | WITHOUT ANY WARRANTY; without even the implied warranty of           |
+// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
+// | Lesser General Public License for more details.                      |
+// |                                                                      |
+// | You should have received a copy of the GNU Lesser General Public     |
+// | License along with this library; if not, write to the Free Software  |
+// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 |
+// | USA.                                                                 |
+// +----------------------------------------------------------------------+
+//
+
+// }}}
+
+
+/**
+ * Encode/decode Internationalized Domain Names.
+ * Factory class to get correct implementation either for php4 or php5.
+ *
+ * @author  Markus Nix <mnix@docuverse.de>
+ * @author  Matthias Sommerfeld <mso@phlylabs.de>
+ * @package Net
+ * @version $Id: IDNA.php 284681 2009-07-24 04:24:27Z clockwerx $
+ */
+
+class Net_IDNA
+{
+    // {{{ factory
+    /**
+     * Attempts to return a concrete IDNA instance for either php4 or php5.
+     *
+     * @param  array  $params   Set of paramaters
+     * @return object IDNA      The newly created concrete Log instance, or an
+     *                          false on an error.
+     * @access public
+     */
+    function getInstance($params = array())
+    {
+        $version   = explode( '.', phpversion() );
+        $handler   = ((int)$version[0] > 4) ? 'php5' : 'php4';
+        $class     = 'Net_IDNA_' . $handler;
+        $classfile = 'Net/IDNA/' . $handler . '.php';
+
+        /*
+         * Attempt to include our version of the named class, but don't treat
+         * a failure as fatal.  The caller may have already included their own
+         * version of the named class.
+         */
+        @include_once $classfile;
+
+        /* If the class exists, return a new instance of it. */
+        if (class_exists($class)) {
+            return new $class($params);
+        }
+
+        return false;
+    }
+    // }}}
+
+    // {{{ singleton
+    /**
+     * Attempts to return a concrete IDNA instance for either php4 or php5,
+     * only creating a new instance if no IDNA instance with the same
+     * parameters currently exists.
+     *
+     * @param  array  $params   Set of paramaters
+     * @return object IDNA      The newly created concrete Log instance, or an
+     *                          false on an error.
+     * @access public
+     */
+    function singleton($params = array())
+    {
+        static $instances;
+        if (!isset($instances)) {
+            $instances = array();
+        }
+
+        $signature = serialize($params);
+        if (!isset($instances[$signature])) {
+            $instances[$signature] = Net_IDNA::getInstance($params);
+        }
+
+        return $instances[$signature];
+    }
+    // }}}
+}
+
+?>
diff --git a/extlib/Net/IDNA/php5.php b/extlib/Net/IDNA/php5.php
new file mode 100644 (file)
index 0000000..d617721
--- /dev/null
@@ -0,0 +1,3269 @@
+<?php
+
+// {{{ license
+
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
+//
+// +----------------------------------------------------------------------+
+// | This library is free software; you can redistribute it and/or modify |
+// | it under the terms of the GNU Lesser General Public License as       |
+// | published by the Free Software Foundation; either version 2.1 of the |
+// | License, or (at your option) any later version.                      |
+// |                                                                      |
+// | This library is distributed in the hope that it will be useful, but  |
+// | WITHOUT ANY WARRANTY; without even the implied warranty of           |
+// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
+// | Lesser General Public License for more details.                      |
+// |                                                                      |
+// | You should have received a copy of the GNU Lesser General Public     |
+// | License along with this library; if not, write to the Free Software  |
+// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 |
+// | USA.                                                                 |
+// +----------------------------------------------------------------------+
+//
+
+// }}}
+
+
+/**
+ * Encode/decode Internationalized Domain Names.
+ *
+ * The class allows to convert internationalized domain names
+ * (see RFC 3490 for details) as they can be used with various registries worldwide
+ * to be translated between their original (localized) form and their encoded form
+ * as it will be used in the DNS (Domain Name System).
+ *
+ * The class provides two public methods, encode() and decode(), which do exactly
+ * what you would expect them to do. You are allowed to use complete domain names,
+ * simple strings and complete email addresses as well. That means, that you might
+ * use any of the following notations:
+ *
+ * - www.n�rgler.com
+ * - xn--nrgler-wxa
+ * - xn--brse-5qa.xn--knrz-1ra.info
+ *
+ * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
+ * array. Unicode output is available in the same formats.
+ * You can select your preferred format via {@link set_paramter()}.
+ *
+ * ACE input and output is always expected to be ASCII.
+ *
+ * @author  Markus Nix <mnix@docuverse.de>
+ * @author  Matthias Sommerfeld <mso@phlylabs.de>
+ * @author  Stefan Neufeind <pear.neufeind@speedpartner.de>
+ * @package Net
+ * @version $Id: php5.php 284682 2009-07-24 04:27:35Z clockwerx $
+ */
+
+class Net_IDNA_php5
+{
+    // {{{ npdata
+    /**
+     * These Unicode codepoints are
+     * mapped to nothing, See RFC3454 for details
+     *
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_np_map_nothing = array(
+        0xAD,
+        0x34F,
+        0x1806,
+        0x180B,
+        0x180C,
+        0x180D,
+        0x200B,
+        0x200C,
+        0x200D,
+        0x2060,
+        0xFE00,
+        0xFE01,
+        0xFE02,
+        0xFE03,
+        0xFE04,
+        0xFE05,
+        0xFE06,
+        0xFE07,
+        0xFE08,
+        0xFE09,
+        0xFE0A,
+        0xFE0B,
+        0xFE0C,
+        0xFE0D,
+        0xFE0E,
+        0xFE0F,
+        0xFEFF
+    );
+
+    /**
+     * Prohibited codepints
+     *
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_general_prohibited = array(
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        0xA,
+        0xB,
+        0xC,
+        0xD,
+        0xE,
+        0xF,
+        0x10,
+        0x11,
+        0x12,
+        0x13,
+        0x14,
+        0x15,
+        0x16,
+        0x17,
+        0x18,
+        0x19,
+        0x1A,
+        0x1B,
+        0x1C,
+        0x1D,
+        0x1E,
+        0x1F,
+        0x20,
+        0x21,
+        0x22,
+        0x23,
+        0x24,
+        0x25,
+        0x26,
+        0x27,
+        0x28,
+        0x29,
+        0x2A,
+        0x2B,
+        0x2C,
+        0x2F,
+        0x3B,
+        0x3C,
+        0x3D,
+        0x3E,
+        0x3F,
+        0x40,
+        0x5B,
+        0x5C,
+        0x5D,
+        0x5E,
+        0x5F,
+        0x60,
+        0x7B,
+        0x7C,
+        0x7D,
+        0x7E,
+        0x7F,
+        0x3002
+    );
+
+    /**
+     * Codepints prohibited by Nameprep
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_np_prohibit = array(
+        0xA0,
+        0x1680,
+        0x2000,
+        0x2001,
+        0x2002,
+        0x2003,
+        0x2004,
+        0x2005,
+        0x2006,
+        0x2007,
+        0x2008,
+        0x2009,
+        0x200A,
+        0x200B,
+        0x202F,
+        0x205F,
+        0x3000,
+        0x6DD,
+        0x70F,
+        0x180E,
+        0x200C,
+        0x200D,
+        0x2028,
+        0x2029,
+        0xFEFF,
+        0xFFF9,
+        0xFFFA,
+        0xFFFB,
+        0xFFFC,
+        0xFFFE,
+        0xFFFF,
+        0x1FFFE,
+        0x1FFFF,
+        0x2FFFE,
+        0x2FFFF,
+        0x3FFFE,
+        0x3FFFF,
+        0x4FFFE,
+        0x4FFFF,
+        0x5FFFE,
+        0x5FFFF,
+        0x6FFFE,
+        0x6FFFF,
+        0x7FFFE,
+        0x7FFFF,
+        0x8FFFE,
+        0x8FFFF,
+        0x9FFFE,
+        0x9FFFF,
+        0xAFFFE,
+        0xAFFFF,
+        0xBFFFE,
+        0xBFFFF,
+        0xCFFFE,
+        0xCFFFF,
+        0xDFFFE,
+        0xDFFFF,
+        0xEFFFE,
+        0xEFFFF,
+        0xFFFFE,
+        0xFFFFF,
+        0x10FFFE,
+        0x10FFFF,
+        0xFFF9,
+        0xFFFA,
+        0xFFFB,
+        0xFFFC,
+        0xFFFD,
+        0x340,
+        0x341,
+        0x200E,
+        0x200F,
+        0x202A,
+        0x202B,
+        0x202C,
+        0x202D,
+        0x202E,
+        0x206A,
+        0x206B,
+        0x206C,
+        0x206D,
+        0x206E,
+        0x206F,
+        0xE0001
+    );
+
+    /**
+     * Codepoint ranges prohibited by nameprep
+     *
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_np_prohibit_ranges = array(
+        array(0x80,     0x9F    ),
+        array(0x2060,   0x206F  ),
+        array(0x1D173,  0x1D17A ),
+        array(0xE000,   0xF8FF  ),
+        array(0xF0000,  0xFFFFD ),
+        array(0x100000, 0x10FFFD),
+        array(0xFDD0,   0xFDEF  ),
+        array(0xD800,   0xDFFF  ),
+        array(0x2FF0,   0x2FFB  ),
+        array(0xE0020,  0xE007F )
+    );
+
+    /**
+     * Replacement mappings (casemapping, replacement sequences, ...)
+     *
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_np_replacemaps = array(
+        0x41    => array(0x61),
+        0x42    => array(0x62),
+        0x43    => array(0x63),
+        0x44    => array(0x64),
+        0x45    => array(0x65),
+        0x46    => array(0x66),
+        0x47    => array(0x67),
+        0x48    => array(0x68),
+        0x49    => array(0x69),
+        0x4A    => array(0x6A),
+        0x4B    => array(0x6B),
+        0x4C    => array(0x6C),
+        0x4D    => array(0x6D),
+        0x4E    => array(0x6E),
+        0x4F    => array(0x6F),
+        0x50    => array(0x70),
+        0x51    => array(0x71),
+        0x52    => array(0x72),
+        0x53    => array(0x73),
+        0x54    => array(0x74),
+        0x55    => array(0x75),
+        0x56    => array(0x76),
+        0x57    => array(0x77),
+        0x58    => array(0x78),
+        0x59    => array(0x79),
+        0x5A    => array(0x7A),
+        0xB5    => array(0x3BC),
+        0xC0    => array(0xE0),
+        0xC1    => array(0xE1),
+        0xC2    => array(0xE2),
+        0xC3    => array(0xE3),
+        0xC4    => array(0xE4),
+        0xC5    => array(0xE5),
+        0xC6    => array(0xE6),
+        0xC7    => array(0xE7),
+        0xC8    => array(0xE8),
+        0xC9    => array(0xE9),
+        0xCA    => array(0xEA),
+        0xCB    => array(0xEB),
+        0xCC    => array(0xEC),
+        0xCD    => array(0xED),
+        0xCE    => array(0xEE),
+        0xCF    => array(0xEF),
+        0xD0    => array(0xF0),
+        0xD1    => array(0xF1),
+        0xD2    => array(0xF2),
+        0xD3    => array(0xF3),
+        0xD4    => array(0xF4),
+        0xD5    => array(0xF5),
+        0xD6    => array(0xF6),
+        0xD8    => array(0xF8),
+        0xD9    => array(0xF9),
+        0xDA    => array(0xFA),
+        0xDB    => array(0xFB),
+        0xDC    => array(0xFC),
+        0xDD    => array(0xFD),
+        0xDE    => array(0xFE),
+        0xDF    => array(0x73, 0x73),
+        0x100   => array(0x101),
+        0x102   => array(0x103),
+        0x104   => array(0x105),
+        0x106   => array(0x107),
+        0x108   => array(0x109),
+        0x10A   => array(0x10B),
+        0x10C   => array(0x10D),
+        0x10E   => array(0x10F),
+        0x110   => array(0x111),
+        0x112   => array(0x113),
+        0x114   => array(0x115),
+        0x116   => array(0x117),
+        0x118   => array(0x119),
+        0x11A   => array(0x11B),
+        0x11C   => array(0x11D),
+        0x11E   => array(0x11F),
+        0x120   => array(0x121),
+        0x122   => array(0x123),
+        0x124   => array(0x125),
+        0x126   => array(0x127),
+        0x128   => array(0x129),
+        0x12A   => array(0x12B),
+        0x12C   => array(0x12D),
+        0x12E   => array(0x12F),
+        0x130   => array(0x69, 0x307),
+        0x132   => array(0x133),
+        0x134   => array(0x135),
+        0x136   => array(0x137),
+        0x139   => array(0x13A),
+        0x13B   => array(0x13C),
+        0x13D   => array(0x13E),
+        0x13F   => array(0x140),
+        0x141   => array(0x142),
+        0x143   => array(0x144),
+        0x145   => array(0x146),
+        0x147   => array(0x148),
+        0x149   => array(0x2BC, 0x6E),
+        0x14A   => array(0x14B),
+        0x14C   => array(0x14D),
+        0x14E   => array(0x14F),
+        0x150   => array(0x151),
+        0x152   => array(0x153),
+        0x154   => array(0x155),
+        0x156   => array(0x157),
+        0x158   => array(0x159),
+        0x15A   => array(0x15B),
+        0x15C   => array(0x15D),
+        0x15E   => array(0x15F),
+        0x160   => array(0x161),
+        0x162   => array(0x163),
+        0x164   => array(0x165),
+        0x166   => array(0x167),
+        0x168   => array(0x169),
+        0x16A   => array(0x16B),
+        0x16C   => array(0x16D),
+        0x16E   => array(0x16F),
+        0x170   => array(0x171),
+        0x172   => array(0x173),
+        0x174   => array(0x175),
+        0x176   => array(0x177),
+        0x178   => array(0xFF),
+        0x179   => array(0x17A),
+        0x17B   => array(0x17C),
+        0x17D   => array(0x17E),
+        0x17F   => array(0x73),
+        0x181   => array(0x253),
+        0x182   => array(0x183),
+        0x184   => array(0x185),
+        0x186   => array(0x254),
+        0x187   => array(0x188),
+        0x189   => array(0x256),
+        0x18A   => array(0x257),
+        0x18B   => array(0x18C),
+        0x18E   => array(0x1DD),
+        0x18F   => array(0x259),
+        0x190   => array(0x25B),
+        0x191   => array(0x192),
+        0x193   => array(0x260),
+        0x194   => array(0x263),
+        0x196   => array(0x269),
+        0x197   => array(0x268),
+        0x198   => array(0x199),
+        0x19C   => array(0x26F),
+        0x19D   => array(0x272),
+        0x19F   => array(0x275),
+        0x1A0   => array(0x1A1),
+        0x1A2   => array(0x1A3),
+        0x1A4   => array(0x1A5),
+        0x1A6   => array(0x280),
+        0x1A7   => array(0x1A8),
+        0x1A9   => array(0x283),
+        0x1AC   => array(0x1AD),
+        0x1AE   => array(0x288),
+        0x1AF   => array(0x1B0),
+        0x1B1   => array(0x28A),
+        0x1B2   => array(0x28B),
+        0x1B3   => array(0x1B4),
+        0x1B5   => array(0x1B6),
+        0x1B7   => array(0x292),
+        0x1B8   => array(0x1B9),
+        0x1BC   => array(0x1BD),
+        0x1C4   => array(0x1C6),
+        0x1C5   => array(0x1C6),
+        0x1C7   => array(0x1C9),
+        0x1C8   => array(0x1C9),
+        0x1CA   => array(0x1CC),
+        0x1CB   => array(0x1CC),
+        0x1CD   => array(0x1CE),
+        0x1CF   => array(0x1D0),
+        0x1D1   => array(0x1D2),
+        0x1D3   => array(0x1D4),
+        0x1D5   => array(0x1D6),
+        0x1D7   => array(0x1D8),
+        0x1D9   => array(0x1DA),
+        0x1DB   => array(0x1DC),
+        0x1DE   => array(0x1DF),
+        0x1E0   => array(0x1E1),
+        0x1E2   => array(0x1E3),
+        0x1E4   => array(0x1E5),
+        0x1E6   => array(0x1E7),
+        0x1E8   => array(0x1E9),
+        0x1EA   => array(0x1EB),
+        0x1EC   => array(0x1ED),
+        0x1EE   => array(0x1EF),
+        0x1F0   => array(0x6A, 0x30C),
+        0x1F1   => array(0x1F3),
+        0x1F2   => array(0x1F3),
+        0x1F4   => array(0x1F5),
+        0x1F6   => array(0x195),
+        0x1F7   => array(0x1BF),
+        0x1F8   => array(0x1F9),
+        0x1FA   => array(0x1FB),
+        0x1FC   => array(0x1FD),
+        0x1FE   => array(0x1FF),
+        0x200   => array(0x201),
+        0x202   => array(0x203),
+        0x204   => array(0x205),
+        0x206   => array(0x207),
+        0x208   => array(0x209),
+        0x20A   => array(0x20B),
+        0x20C   => array(0x20D),
+        0x20E   => array(0x20F),
+        0x210   => array(0x211),
+        0x212   => array(0x213),
+        0x214   => array(0x215),
+        0x216   => array(0x217),
+        0x218   => array(0x219),
+        0x21A   => array(0x21B),
+        0x21C   => array(0x21D),
+        0x21E   => array(0x21F),
+        0x220   => array(0x19E),
+        0x222   => array(0x223),
+        0x224   => array(0x225),
+        0x226   => array(0x227),
+        0x228   => array(0x229),
+        0x22A   => array(0x22B),
+        0x22C   => array(0x22D),
+        0x22E   => array(0x22F),
+        0x230   => array(0x231),
+        0x232   => array(0x233),
+        0x345   => array(0x3B9),
+        0x37A   => array(0x20, 0x3B9),
+        0x386   => array(0x3AC),
+        0x388   => array(0x3AD),
+        0x389   => array(0x3AE),
+        0x38A   => array(0x3AF),
+        0x38C   => array(0x3CC),
+        0x38E   => array(0x3CD),
+        0x38F   => array(0x3CE),
+        0x390   => array(0x3B9, 0x308, 0x301),
+        0x391   => array(0x3B1),
+        0x392   => array(0x3B2),
+        0x393   => array(0x3B3),
+        0x394   => array(0x3B4),
+        0x395   => array(0x3B5),
+        0x396   => array(0x3B6),
+        0x397   => array(0x3B7),
+        0x398   => array(0x3B8),
+        0x399   => array(0x3B9),
+        0x39A   => array(0x3BA),
+        0x39B   => array(0x3BB),
+        0x39C   => array(0x3BC),
+        0x39D   => array(0x3BD),
+        0x39E   => array(0x3BE),
+        0x39F   => array(0x3BF),
+        0x3A0   => array(0x3C0),
+        0x3A1   => array(0x3C1),
+        0x3A3   => array(0x3C3),
+        0x3A4   => array(0x3C4),
+        0x3A5   => array(0x3C5),
+        0x3A6   => array(0x3C6),
+        0x3A7   => array(0x3C7),
+        0x3A8   => array(0x3C8),
+        0x3A9   => array(0x3C9),
+        0x3AA   => array(0x3CA),
+        0x3AB   => array(0x3CB),
+        0x3B0   => array(0x3C5, 0x308, 0x301),
+        0x3C2   => array(0x3C3),
+        0x3D0   => array(0x3B2),
+        0x3D1   => array(0x3B8),
+        0x3D2   => array(0x3C5),
+        0x3D3   => array(0x3CD),
+        0x3D4   => array(0x3CB),
+        0x3D5   => array(0x3C6),
+        0x3D6   => array(0x3C0),
+        0x3D8   => array(0x3D9),
+        0x3DA   => array(0x3DB),
+        0x3DC   => array(0x3DD),
+        0x3DE   => array(0x3DF),
+        0x3E0   => array(0x3E1),
+        0x3E2   => array(0x3E3),
+        0x3E4   => array(0x3E5),
+        0x3E6   => array(0x3E7),
+        0x3E8   => array(0x3E9),
+        0x3EA   => array(0x3EB),
+        0x3EC   => array(0x3ED),
+        0x3EE   => array(0x3EF),
+        0x3F0   => array(0x3BA),
+        0x3F1   => array(0x3C1),
+        0x3F2   => array(0x3C3),
+        0x3F4   => array(0x3B8),
+        0x3F5   => array(0x3B5),
+        0x400   => array(0x450),
+        0x401   => array(0x451),
+        0x402   => array(0x452),
+        0x403   => array(0x453),
+        0x404   => array(0x454),
+        0x405   => array(0x455),
+        0x406   => array(0x456),
+        0x407   => array(0x457),
+        0x408   => array(0x458),
+        0x409   => array(0x459),
+        0x40A   => array(0x45A),
+        0x40B   => array(0x45B),
+        0x40C   => array(0x45C),
+        0x40D   => array(0x45D),
+        0x40E   => array(0x45E),
+        0x40F   => array(0x45F),
+        0x410   => array(0x430),
+        0x411   => array(0x431),
+        0x412   => array(0x432),
+        0x413   => array(0x433),
+        0x414   => array(0x434),
+        0x415   => array(0x435),
+        0x416   => array(0x436),
+        0x417   => array(0x437),
+        0x418   => array(0x438),
+        0x419   => array(0x439),
+        0x41A   => array(0x43A),
+        0x41B   => array(0x43B),
+        0x41C   => array(0x43C),
+        0x41D   => array(0x43D),
+        0x41E   => array(0x43E),
+        0x41F   => array(0x43F),
+        0x420   => array(0x440),
+        0x421   => array(0x441),
+        0x422   => array(0x442),
+        0x423   => array(0x443),
+        0x424   => array(0x444),
+        0x425   => array(0x445),
+        0x426   => array(0x446),
+        0x427   => array(0x447),
+        0x428   => array(0x448),
+        0x429   => array(0x449),
+        0x42A   => array(0x44A),
+        0x42B   => array(0x44B),
+        0x42C   => array(0x44C),
+        0x42D   => array(0x44D),
+        0x42E   => array(0x44E),
+        0x42F   => array(0x44F),
+        0x460   => array(0x461),
+        0x462   => array(0x463),
+        0x464   => array(0x465),
+        0x466   => array(0x467),
+        0x468   => array(0x469),
+        0x46A   => array(0x46B),
+        0x46C   => array(0x46D),
+        0x46E   => array(0x46F),
+        0x470   => array(0x471),
+        0x472   => array(0x473),
+        0x474   => array(0x475),
+        0x476   => array(0x477),
+        0x478   => array(0x479),
+        0x47A   => array(0x47B),
+        0x47C   => array(0x47D),
+        0x47E   => array(0x47F),
+        0x480   => array(0x481),
+        0x48A   => array(0x48B),
+        0x48C   => array(0x48D),
+        0x48E   => array(0x48F),
+        0x490   => array(0x491),
+        0x492   => array(0x493),
+        0x494   => array(0x495),
+        0x496   => array(0x497),
+        0x498   => array(0x499),
+        0x49A   => array(0x49B),
+        0x49C   => array(0x49D),
+        0x49E   => array(0x49F),
+        0x4A0   => array(0x4A1),
+        0x4A2   => array(0x4A3),
+        0x4A4   => array(0x4A5),
+        0x4A6   => array(0x4A7),
+        0x4A8   => array(0x4A9),
+        0x4AA   => array(0x4AB),
+        0x4AC   => array(0x4AD),
+        0x4AE   => array(0x4AF),
+        0x4B0   => array(0x4B1),
+        0x4B2   => array(0x4B3),
+        0x4B4   => array(0x4B5),
+        0x4B6   => array(0x4B7),
+        0x4B8   => array(0x4B9),
+        0x4BA   => array(0x4BB),
+        0x4BC   => array(0x4BD),
+        0x4BE   => array(0x4BF),
+        0x4C1   => array(0x4C2),
+        0x4C3   => array(0x4C4),
+        0x4C5   => array(0x4C6),
+        0x4C7   => array(0x4C8),
+        0x4C9   => array(0x4CA),
+        0x4CB   => array(0x4CC),
+        0x4CD   => array(0x4CE),
+        0x4D0   => array(0x4D1),
+        0x4D2   => array(0x4D3),
+        0x4D4   => array(0x4D5),
+        0x4D6   => array(0x4D7),
+        0x4D8   => array(0x4D9),
+        0x4DA   => array(0x4DB),
+        0x4DC   => array(0x4DD),
+        0x4DE   => array(0x4DF),
+        0x4E0   => array(0x4E1),
+        0x4E2   => array(0x4E3),
+        0x4E4   => array(0x4E5),
+        0x4E6   => array(0x4E7),
+        0x4E8   => array(0x4E9),
+        0x4EA   => array(0x4EB),
+        0x4EC   => array(0x4ED),
+        0x4EE   => array(0x4EF),
+        0x4F0   => array(0x4F1),
+        0x4F2   => array(0x4F3),
+        0x4F4   => array(0x4F5),
+        0x4F8   => array(0x4F9),
+        0x500   => array(0x501),
+        0x502   => array(0x503),
+        0x504   => array(0x505),
+        0x506   => array(0x507),
+        0x508   => array(0x509),
+        0x50A   => array(0x50B),
+        0x50C   => array(0x50D),
+        0x50E   => array(0x50F),
+        0x531   => array(0x561),
+        0x532   => array(0x562),
+        0x533   => array(0x563),
+        0x534   => array(0x564),
+        0x535   => array(0x565),
+        0x536   => array(0x566),
+        0x537   => array(0x567),
+        0x538   => array(0x568),
+        0x539   => array(0x569),
+        0x53A   => array(0x56A),
+        0x53B   => array(0x56B),
+        0x53C   => array(0x56C),
+        0x53D   => array(0x56D),
+        0x53E   => array(0x56E),
+        0x53F   => array(0x56F),
+        0x540   => array(0x570),
+        0x541   => array(0x571),
+        0x542   => array(0x572),
+        0x543   => array(0x573),
+        0x544   => array(0x574),
+        0x545   => array(0x575),
+        0x546   => array(0x576),
+        0x547   => array(0x577),
+        0x548   => array(0x578),
+        0x549   => array(0x579),
+        0x54A   => array(0x57A),
+        0x54B   => array(0x57B),
+        0x54C   => array(0x57C),
+        0x54D   => array(0x57D),
+        0x54E   => array(0x57E),
+        0x54F   => array(0x57F),
+        0x550   => array(0x580),
+        0x551   => array(0x581),
+        0x552   => array(0x582),
+        0x553   => array(0x583),
+        0x554   => array(0x584),
+        0x555   => array(0x585),
+        0x556   => array(0x586),
+        0x587   => array(0x565, 0x582),
+        0x1E00  => array(0x1E01),
+        0x1E02  => array(0x1E03),
+        0x1E04  => array(0x1E05),
+        0x1E06  => array(0x1E07),
+        0x1E08  => array(0x1E09),
+        0x1E0A  => array(0x1E0B),
+        0x1E0C  => array(0x1E0D),
+        0x1E0E  => array(0x1E0F),
+        0x1E10  => array(0x1E11),
+        0x1E12  => array(0x1E13),
+        0x1E14  => array(0x1E15),
+        0x1E16  => array(0x1E17),
+        0x1E18  => array(0x1E19),
+        0x1E1A  => array(0x1E1B),
+        0x1E1C  => array(0x1E1D),
+        0x1E1E  => array(0x1E1F),
+        0x1E20  => array(0x1E21),
+        0x1E22  => array(0x1E23),
+        0x1E24  => array(0x1E25),
+        0x1E26  => array(0x1E27),
+        0x1E28  => array(0x1E29),
+        0x1E2A  => array(0x1E2B),
+        0x1E2C  => array(0x1E2D),
+        0x1E2E  => array(0x1E2F),
+        0x1E30  => array(0x1E31),
+        0x1E32  => array(0x1E33),
+        0x1E34  => array(0x1E35),
+        0x1E36  => array(0x1E37),
+        0x1E38  => array(0x1E39),
+        0x1E3A  => array(0x1E3B),
+        0x1E3C  => array(0x1E3D),
+        0x1E3E  => array(0x1E3F),
+        0x1E40  => array(0x1E41),
+        0x1E42  => array(0x1E43),
+        0x1E44  => array(0x1E45),
+        0x1E46  => array(0x1E47),
+        0x1E48  => array(0x1E49),
+        0x1E4A  => array(0x1E4B),
+        0x1E4C  => array(0x1E4D),
+        0x1E4E  => array(0x1E4F),
+        0x1E50  => array(0x1E51),
+        0x1E52  => array(0x1E53),
+        0x1E54  => array(0x1E55),
+        0x1E56  => array(0x1E57),
+        0x1E58  => array(0x1E59),
+        0x1E5A  => array(0x1E5B),
+        0x1E5C  => array(0x1E5D),
+        0x1E5E  => array(0x1E5F),
+        0x1E60  => array(0x1E61),
+        0x1E62  => array(0x1E63),
+        0x1E64  => array(0x1E65),
+        0x1E66  => array(0x1E67),
+        0x1E68  => array(0x1E69),
+        0x1E6A  => array(0x1E6B),
+        0x1E6C  => array(0x1E6D),
+        0x1E6E  => array(0x1E6F),
+        0x1E70  => array(0x1E71),
+        0x1E72  => array(0x1E73),
+        0x1E74  => array(0x1E75),
+        0x1E76  => array(0x1E77),
+        0x1E78  => array(0x1E79),
+        0x1E7A  => array(0x1E7B),
+        0x1E7C  => array(0x1E7D),
+        0x1E7E  => array(0x1E7F),
+        0x1E80  => array(0x1E81),
+        0x1E82  => array(0x1E83),
+        0x1E84  => array(0x1E85),
+        0x1E86  => array(0x1E87),
+        0x1E88  => array(0x1E89),
+        0x1E8A  => array(0x1E8B),
+        0x1E8C  => array(0x1E8D),
+        0x1E8E  => array(0x1E8F),
+        0x1E90  => array(0x1E91),
+        0x1E92  => array(0x1E93),
+        0x1E94  => array(0x1E95),
+        0x1E96  => array(0x68, 0x331),
+        0x1E97  => array(0x74, 0x308),
+        0x1E98  => array(0x77, 0x30A),
+        0x1E99  => array(0x79, 0x30A),
+        0x1E9A  => array(0x61, 0x2BE),
+        0x1E9B  => array(0x1E61),
+        0x1EA0  => array(0x1EA1),
+        0x1EA2  => array(0x1EA3),
+        0x1EA4  => array(0x1EA5),
+        0x1EA6  => array(0x1EA7),
+        0x1EA8  => array(0x1EA9),
+        0x1EAA  => array(0x1EAB),
+        0x1EAC  => array(0x1EAD),
+        0x1EAE  => array(0x1EAF),
+        0x1EB0  => array(0x1EB1),
+        0x1EB2  => array(0x1EB3),
+        0x1EB4  => array(0x1EB5),
+        0x1EB6  => array(0x1EB7),
+        0x1EB8  => array(0x1EB9),
+        0x1EBA  => array(0x1EBB),
+        0x1EBC  => array(0x1EBD),
+        0x1EBE  => array(0x1EBF),
+        0x1EC0  => array(0x1EC1),
+        0x1EC2  => array(0x1EC3),
+        0x1EC4  => array(0x1EC5),
+        0x1EC6  => array(0x1EC7),
+        0x1EC8  => array(0x1EC9),
+        0x1ECA  => array(0x1ECB),
+        0x1ECC  => array(0x1ECD),
+        0x1ECE  => array(0x1ECF),
+        0x1ED0  => array(0x1ED1),
+        0x1ED2  => array(0x1ED3),
+        0x1ED4  => array(0x1ED5),
+        0x1ED6  => array(0x1ED7),
+        0x1ED8  => array(0x1ED9),
+        0x1EDA  => array(0x1EDB),
+        0x1EDC  => array(0x1EDD),
+        0x1EDE  => array(0x1EDF),
+        0x1EE0  => array(0x1EE1),
+        0x1EE2  => array(0x1EE3),
+        0x1EE4  => array(0x1EE5),
+        0x1EE6  => array(0x1EE7),
+        0x1EE8  => array(0x1EE9),
+        0x1EEA  => array(0x1EEB),
+        0x1EEC  => array(0x1EED),
+        0x1EEE  => array(0x1EEF),
+        0x1EF0  => array(0x1EF1),
+        0x1EF2  => array(0x1EF3),
+        0x1EF4  => array(0x1EF5),
+        0x1EF6  => array(0x1EF7),
+        0x1EF8  => array(0x1EF9),
+        0x1F08  => array(0x1F00),
+        0x1F09  => array(0x1F01),
+        0x1F0A  => array(0x1F02),
+        0x1F0B  => array(0x1F03),
+        0x1F0C  => array(0x1F04),
+        0x1F0D  => array(0x1F05),
+        0x1F0E  => array(0x1F06),
+        0x1F0F  => array(0x1F07),
+        0x1F18  => array(0x1F10),
+        0x1F19  => array(0x1F11),
+        0x1F1A  => array(0x1F12),
+        0x1F1B  => array(0x1F13),
+        0x1F1C  => array(0x1F14),
+        0x1F1D  => array(0x1F15),
+        0x1F28  => array(0x1F20),
+        0x1F29  => array(0x1F21),
+        0x1F2A  => array(0x1F22),
+        0x1F2B  => array(0x1F23),
+        0x1F2C  => array(0x1F24),
+        0x1F2D  => array(0x1F25),
+        0x1F2E  => array(0x1F26),
+        0x1F2F  => array(0x1F27),
+        0x1F38  => array(0x1F30),
+        0x1F39  => array(0x1F31),
+        0x1F3A  => array(0x1F32),
+        0x1F3B  => array(0x1F33),
+        0x1F3C  => array(0x1F34),
+        0x1F3D  => array(0x1F35),
+        0x1F3E  => array(0x1F36),
+        0x1F3F  => array(0x1F37),
+        0x1F48  => array(0x1F40),
+        0x1F49  => array(0x1F41),
+        0x1F4A  => array(0x1F42),
+        0x1F4B  => array(0x1F43),
+        0x1F4C  => array(0x1F44),
+        0x1F4D  => array(0x1F45),
+        0x1F50  => array(0x3C5, 0x313),
+        0x1F52  => array(0x3C5, 0x313, 0x300),
+        0x1F54  => array(0x3C5, 0x313, 0x301),
+        0x1F56  => array(0x3C5, 0x313, 0x342),
+        0x1F59  => array(0x1F51),
+        0x1F5B  => array(0x1F53),
+        0x1F5D  => array(0x1F55),
+        0x1F5F  => array(0x1F57),
+        0x1F68  => array(0x1F60),
+        0x1F69  => array(0x1F61),
+        0x1F6A  => array(0x1F62),
+        0x1F6B  => array(0x1F63),
+        0x1F6C  => array(0x1F64),
+        0x1F6D  => array(0x1F65),
+        0x1F6E  => array(0x1F66),
+        0x1F6F  => array(0x1F67),
+        0x1F80  => array(0x1F00, 0x3B9),
+        0x1F81  => array(0x1F01, 0x3B9),
+        0x1F82  => array(0x1F02, 0x3B9),
+        0x1F83  => array(0x1F03, 0x3B9),
+        0x1F84  => array(0x1F04, 0x3B9),
+        0x1F85  => array(0x1F05, 0x3B9),
+        0x1F86  => array(0x1F06, 0x3B9),
+        0x1F87  => array(0x1F07, 0x3B9),
+        0x1F88  => array(0x1F00, 0x3B9),
+        0x1F89  => array(0x1F01, 0x3B9),
+        0x1F8A  => array(0x1F02, 0x3B9),
+        0x1F8B  => array(0x1F03, 0x3B9),
+        0x1F8C  => array(0x1F04, 0x3B9),
+        0x1F8D  => array(0x1F05, 0x3B9),
+        0x1F8E  => array(0x1F06, 0x3B9),
+        0x1F8F  => array(0x1F07, 0x3B9),
+        0x1F90  => array(0x1F20, 0x3B9),
+        0x1F91  => array(0x1F21, 0x3B9),
+        0x1F92  => array(0x1F22, 0x3B9),
+        0x1F93  => array(0x1F23, 0x3B9),
+        0x1F94  => array(0x1F24, 0x3B9),
+        0x1F95  => array(0x1F25, 0x3B9),
+        0x1F96  => array(0x1F26, 0x3B9),
+        0x1F97  => array(0x1F27, 0x3B9),
+        0x1F98  => array(0x1F20, 0x3B9),
+        0x1F99  => array(0x1F21, 0x3B9),
+        0x1F9A  => array(0x1F22, 0x3B9),
+        0x1F9B  => array(0x1F23, 0x3B9),
+        0x1F9C  => array(0x1F24, 0x3B9),
+        0x1F9D  => array(0x1F25, 0x3B9),
+        0x1F9E  => array(0x1F26, 0x3B9),
+        0x1F9F  => array(0x1F27, 0x3B9),
+        0x1FA0  => array(0x1F60, 0x3B9),
+        0x1FA1  => array(0x1F61, 0x3B9),
+        0x1FA2  => array(0x1F62, 0x3B9),
+        0x1FA3  => array(0x1F63, 0x3B9),
+        0x1FA4  => array(0x1F64, 0x3B9),
+        0x1FA5  => array(0x1F65, 0x3B9),
+        0x1FA6  => array(0x1F66, 0x3B9),
+        0x1FA7  => array(0x1F67, 0x3B9),
+        0x1FA8  => array(0x1F60, 0x3B9),
+        0x1FA9  => array(0x1F61, 0x3B9),
+        0x1FAA  => array(0x1F62, 0x3B9),
+        0x1FAB  => array(0x1F63, 0x3B9),
+        0x1FAC  => array(0x1F64, 0x3B9),
+        0x1FAD  => array(0x1F65, 0x3B9),
+        0x1FAE  => array(0x1F66, 0x3B9),
+        0x1FAF  => array(0x1F67, 0x3B9),
+        0x1FB2  => array(0x1F70, 0x3B9),
+        0x1FB3  => array(0x3B1, 0x3B9),
+        0x1FB4  => array(0x3AC, 0x3B9),
+        0x1FB6  => array(0x3B1, 0x342),
+        0x1FB7  => array(0x3B1, 0x342, 0x3B9),
+        0x1FB8  => array(0x1FB0),
+        0x1FB9  => array(0x1FB1),
+        0x1FBA  => array(0x1F70),
+        0x1FBB  => array(0x1F71),
+        0x1FBC  => array(0x3B1, 0x3B9),
+        0x1FBE  => array(0x3B9),
+        0x1FC2  => array(0x1F74, 0x3B9),
+        0x1FC3  => array(0x3B7, 0x3B9),
+        0x1FC4  => array(0x3AE, 0x3B9),
+        0x1FC6  => array(0x3B7, 0x342),
+        0x1FC7  => array(0x3B7, 0x342, 0x3B9),
+        0x1FC8  => array(0x1F72),
+        0x1FC9  => array(0x1F73),
+        0x1FCA  => array(0x1F74),
+        0x1FCB  => array(0x1F75),
+        0x1FCC  => array(0x3B7, 0x3B9),
+        0x1FD2  => array(0x3B9, 0x308, 0x300),
+        0x1FD3  => array(0x3B9, 0x308, 0x301),
+        0x1FD6  => array(0x3B9, 0x342),
+        0x1FD7  => array(0x3B9, 0x308, 0x342),
+        0x1FD8  => array(0x1FD0),
+        0x1FD9  => array(0x1FD1),
+        0x1FDA  => array(0x1F76),
+        0x1FDB  => array(0x1F77),
+        0x1FE2  => array(0x3C5, 0x308, 0x300),
+        0x1FE3  => array(0x3C5, 0x308, 0x301),
+        0x1FE4  => array(0x3C1, 0x313),
+        0x1FE6  => array(0x3C5, 0x342),
+        0x1FE7  => array(0x3C5, 0x308, 0x342),
+        0x1FE8  => array(0x1FE0),
+        0x1FE9  => array(0x1FE1),
+        0x1FEA  => array(0x1F7A),
+        0x1FEB  => array(0x1F7B),
+        0x1FEC  => array(0x1FE5),
+        0x1FF2  => array(0x1F7C, 0x3B9),
+        0x1FF3  => array(0x3C9, 0x3B9),
+        0x1FF4  => array(0x3CE, 0x3B9),
+        0x1FF6  => array(0x3C9, 0x342),
+        0x1FF7  => array(0x3C9, 0x342, 0x3B9),
+        0x1FF8  => array(0x1F78),
+        0x1FF9  => array(0x1F79),
+        0x1FFA  => array(0x1F7C),
+        0x1FFB  => array(0x1F7D),
+        0x1FFC  => array(0x3C9, 0x3B9),
+        0x20A8  => array(0x72, 0x73),
+        0x2102  => array(0x63),
+        0x2103  => array(0xB0, 0x63),
+        0x2107  => array(0x25B),
+        0x2109  => array(0xB0, 0x66),
+        0x210B  => array(0x68),
+        0x210C  => array(0x68),
+        0x210D  => array(0x68),
+        0x2110  => array(0x69),
+        0x2111  => array(0x69),
+        0x2112  => array(0x6C),
+        0x2115  => array(0x6E),
+        0x2116  => array(0x6E, 0x6F),
+        0x2119  => array(0x70),
+        0x211A  => array(0x71),
+        0x211B  => array(0x72),
+        0x211C  => array(0x72),
+        0x211D  => array(0x72),
+        0x2120  => array(0x73, 0x6D),
+        0x2121  => array(0x74, 0x65, 0x6C),
+        0x2122  => array(0x74, 0x6D),
+        0x2124  => array(0x7A),
+        0x2126  => array(0x3C9),
+        0x2128  => array(0x7A),
+        0x212A  => array(0x6B),
+        0x212B  => array(0xE5),
+        0x212C  => array(0x62),
+        0x212D  => array(0x63),
+        0x2130  => array(0x65),
+        0x2131  => array(0x66),
+        0x2133  => array(0x6D),
+        0x213E  => array(0x3B3),
+        0x213F  => array(0x3C0),
+        0x2145  => array(0x64),
+        0x2160  => array(0x2170),
+        0x2161  => array(0x2171),
+        0x2162  => array(0x2172),
+        0x2163  => array(0x2173),
+        0x2164  => array(0x2174),
+        0x2165  => array(0x2175),
+        0x2166  => array(0x2176),
+        0x2167  => array(0x2177),
+        0x2168  => array(0x2178),
+        0x2169  => array(0x2179),
+        0x216A  => array(0x217A),
+        0x216B  => array(0x217B),
+        0x216C  => array(0x217C),
+        0x216D  => array(0x217D),
+        0x216E  => array(0x217E),
+        0x216F  => array(0x217F),
+        0x24B6  => array(0x24D0),
+        0x24B7  => array(0x24D1),
+        0x24B8  => array(0x24D2),
+        0x24B9  => array(0x24D3),
+        0x24BA  => array(0x24D4),
+        0x24BB  => array(0x24D5),
+        0x24BC  => array(0x24D6),
+        0x24BD  => array(0x24D7),
+        0x24BE  => array(0x24D8),
+        0x24BF  => array(0x24D9),
+        0x24C0  => array(0x24DA),
+        0x24C1  => array(0x24DB),
+        0x24C2  => array(0x24DC),
+        0x24C3  => array(0x24DD),
+        0x24C4  => array(0x24DE),
+        0x24C5  => array(0x24DF),
+        0x24C6  => array(0x24E0),
+        0x24C7  => array(0x24E1),
+        0x24C8  => array(0x24E2),
+        0x24C9  => array(0x24E3),
+        0x24CA  => array(0x24E4),
+        0x24CB  => array(0x24E5),
+        0x24CC  => array(0x24E6),
+        0x24CD  => array(0x24E7),
+        0x24CE  => array(0x24E8),
+        0x24CF  => array(0x24E9),
+        0x3371  => array(0x68, 0x70, 0x61),
+        0x3373  => array(0x61, 0x75),
+        0x3375  => array(0x6F, 0x76),
+        0x3380  => array(0x70, 0x61),
+        0x3381  => array(0x6E, 0x61),
+        0x3382  => array(0x3BC, 0x61),
+        0x3383  => array(0x6D, 0x61),
+        0x3384  => array(0x6B, 0x61),
+        0x3385  => array(0x6B, 0x62),
+        0x3386  => array(0x6D, 0x62),
+        0x3387  => array(0x67, 0x62),
+        0x338A  => array(0x70, 0x66),
+        0x338B  => array(0x6E, 0x66),
+        0x338C  => array(0x3BC, 0x66),
+        0x3390  => array(0x68, 0x7A),
+        0x3391  => array(0x6B, 0x68, 0x7A),
+        0x3392  => array(0x6D, 0x68, 0x7A),
+        0x3393  => array(0x67, 0x68, 0x7A),
+        0x3394  => array(0x74, 0x68, 0x7A),
+        0x33A9  => array(0x70, 0x61),
+        0x33AA  => array(0x6B, 0x70, 0x61),
+        0x33AB  => array(0x6D, 0x70, 0x61),
+        0x33AC  => array(0x67, 0x70, 0x61),
+        0x33B4  => array(0x70, 0x76),
+        0x33B5  => array(0x6E, 0x76),
+        0x33B6  => array(0x3BC, 0x76),
+        0x33B7  => array(0x6D, 0x76),
+        0x33B8  => array(0x6B, 0x76),
+        0x33B9  => array(0x6D, 0x76),
+        0x33BA  => array(0x70, 0x77),
+        0x33BB  => array(0x6E, 0x77),
+        0x33BC  => array(0x3BC, 0x77),
+        0x33BD  => array(0x6D, 0x77),
+        0x33BE  => array(0x6B, 0x77),
+        0x33BF  => array(0x6D, 0x77),
+        0x33C0  => array(0x6B, 0x3C9),
+        0x33C1  => array(0x6D, 0x3C9), /*
+        0x33C2  => array(0x61, 0x2E, 0x6D, 0x2E), */
+        0x33C3  => array(0x62, 0x71),
+        0x33C6  => array(0x63, 0x2215, 0x6B, 0x67),
+        0x33C7  => array(0x63, 0x6F, 0x2E),
+        0x33C8  => array(0x64, 0x62),
+        0x33C9  => array(0x67, 0x79),
+        0x33CB  => array(0x68, 0x70),
+        0x33CD  => array(0x6B, 0x6B),
+        0x33CE  => array(0x6B, 0x6D),
+        0x33D7  => array(0x70, 0x68),
+        0x33D9  => array(0x70, 0x70, 0x6D),
+        0x33DA  => array(0x70, 0x72),
+        0x33DC  => array(0x73, 0x76),
+        0x33DD  => array(0x77, 0x62),
+        0xFB00  => array(0x66, 0x66),
+        0xFB01  => array(0x66, 0x69),
+        0xFB02  => array(0x66, 0x6C),
+        0xFB03  => array(0x66, 0x66, 0x69),
+        0xFB04  => array(0x66, 0x66, 0x6C),
+        0xFB05  => array(0x73, 0x74),
+        0xFB06  => array(0x73, 0x74),
+        0xFB13  => array(0x574, 0x576),
+        0xFB14  => array(0x574, 0x565),
+        0xFB15  => array(0x574, 0x56B),
+        0xFB16  => array(0x57E, 0x576),
+        0xFB17  => array(0x574, 0x56D),
+        0xFF21  => array(0xFF41),
+        0xFF22  => array(0xFF42),
+        0xFF23  => array(0xFF43),
+        0xFF24  => array(0xFF44),
+        0xFF25  => array(0xFF45),
+        0xFF26  => array(0xFF46),
+        0xFF27  => array(0xFF47),
+        0xFF28  => array(0xFF48),
+        0xFF29  => array(0xFF49),
+        0xFF2A  => array(0xFF4A),
+        0xFF2B  => array(0xFF4B),
+        0xFF2C  => array(0xFF4C),
+        0xFF2D  => array(0xFF4D),
+        0xFF2E  => array(0xFF4E),
+        0xFF2F  => array(0xFF4F),
+        0xFF30  => array(0xFF50),
+        0xFF31  => array(0xFF51),
+        0xFF32  => array(0xFF52),
+        0xFF33  => array(0xFF53),
+        0xFF34  => array(0xFF54),
+        0xFF35  => array(0xFF55),
+        0xFF36  => array(0xFF56),
+        0xFF37  => array(0xFF57),
+        0xFF38  => array(0xFF58),
+        0xFF39  => array(0xFF59),
+        0xFF3A  => array(0xFF5A),
+        0x10400 => array(0x10428),
+        0x10401 => array(0x10429),
+        0x10402 => array(0x1042A),
+        0x10403 => array(0x1042B),
+        0x10404 => array(0x1042C),
+        0x10405 => array(0x1042D),
+        0x10406 => array(0x1042E),
+        0x10407 => array(0x1042F),
+        0x10408 => array(0x10430),
+        0x10409 => array(0x10431),
+        0x1040A => array(0x10432),
+        0x1040B => array(0x10433),
+        0x1040C => array(0x10434),
+        0x1040D => array(0x10435),
+        0x1040E => array(0x10436),
+        0x1040F => array(0x10437),
+        0x10410 => array(0x10438),
+        0x10411 => array(0x10439),
+        0x10412 => array(0x1043A),
+        0x10413 => array(0x1043B),
+        0x10414 => array(0x1043C),
+        0x10415 => array(0x1043D),
+        0x10416 => array(0x1043E),
+        0x10417 => array(0x1043F),
+        0x10418 => array(0x10440),
+        0x10419 => array(0x10441),
+        0x1041A => array(0x10442),
+        0x1041B => array(0x10443),
+        0x1041C => array(0x10444),
+        0x1041D => array(0x10445),
+        0x1041E => array(0x10446),
+        0x1041F => array(0x10447),
+        0x10420 => array(0x10448),
+        0x10421 => array(0x10449),
+        0x10422 => array(0x1044A),
+        0x10423 => array(0x1044B),
+        0x10424 => array(0x1044C),
+        0x10425 => array(0x1044D),
+        0x1D400 => array(0x61),
+        0x1D401 => array(0x62),
+        0x1D402 => array(0x63),
+        0x1D403 => array(0x64),
+        0x1D404 => array(0x65),
+        0x1D405 => array(0x66),
+        0x1D406 => array(0x67),
+        0x1D407 => array(0x68),
+        0x1D408 => array(0x69),
+        0x1D409 => array(0x6A),
+        0x1D40A => array(0x6B),
+        0x1D40B => array(0x6C),
+        0x1D40C => array(0x6D),
+        0x1D40D => array(0x6E),
+        0x1D40E => array(0x6F),
+        0x1D40F => array(0x70),
+        0x1D410 => array(0x71),
+        0x1D411 => array(0x72),
+        0x1D412 => array(0x73),
+        0x1D413 => array(0x74),
+        0x1D414 => array(0x75),
+        0x1D415 => array(0x76),
+        0x1D416 => array(0x77),
+        0x1D417 => array(0x78),
+        0x1D418 => array(0x79),
+        0x1D419 => array(0x7A),
+        0x1D434 => array(0x61),
+        0x1D435 => array(0x62),
+        0x1D436 => array(0x63),
+        0x1D437 => array(0x64),
+        0x1D438 => array(0x65),
+        0x1D439 => array(0x66),
+        0x1D43A => array(0x67),
+        0x1D43B => array(0x68),
+        0x1D43C => array(0x69),
+        0x1D43D => array(0x6A),
+        0x1D43E => array(0x6B),
+        0x1D43F => array(0x6C),
+        0x1D440 => array(0x6D),
+        0x1D441 => array(0x6E),
+        0x1D442 => array(0x6F),
+        0x1D443 => array(0x70),
+        0x1D444 => array(0x71),
+        0x1D445 => array(0x72),
+        0x1D446 => array(0x73),
+        0x1D447 => array(0x74),
+        0x1D448 => array(0x75),
+        0x1D449 => array(0x76),
+        0x1D44A => array(0x77),
+        0x1D44B => array(0x78),
+        0x1D44C => array(0x79),
+        0x1D44D => array(0x7A),
+        0x1D468 => array(0x61),
+        0x1D469 => array(0x62),
+        0x1D46A => array(0x63),
+        0x1D46B => array(0x64),
+        0x1D46C => array(0x65),
+        0x1D46D => array(0x66),
+        0x1D46E => array(0x67),
+        0x1D46F => array(0x68),
+        0x1D470 => array(0x69),
+        0x1D471 => array(0x6A),
+        0x1D472 => array(0x6B),
+        0x1D473 => array(0x6C),
+        0x1D474 => array(0x6D),
+        0x1D475 => array(0x6E),
+        0x1D476 => array(0x6F),
+        0x1D477 => array(0x70),
+        0x1D478 => array(0x71),
+        0x1D479 => array(0x72),
+        0x1D47A => array(0x73),
+        0x1D47B => array(0x74),
+        0x1D47C => array(0x75),
+        0x1D47D => array(0x76),
+        0x1D47E => array(0x77),
+        0x1D47F => array(0x78),
+        0x1D480 => array(0x79),
+        0x1D481 => array(0x7A),
+        0x1D49C => array(0x61),
+        0x1D49E => array(0x63),
+        0x1D49F => array(0x64),
+        0x1D4A2 => array(0x67),
+        0x1D4A5 => array(0x6A),
+        0x1D4A6 => array(0x6B),
+        0x1D4A9 => array(0x6E),
+        0x1D4AA => array(0x6F),
+        0x1D4AB => array(0x70),
+        0x1D4AC => array(0x71),
+        0x1D4AE => array(0x73),
+        0x1D4AF => array(0x74),
+        0x1D4B0 => array(0x75),
+        0x1D4B1 => array(0x76),
+        0x1D4B2 => array(0x77),
+        0x1D4B3 => array(0x78),
+        0x1D4B4 => array(0x79),
+        0x1D4B5 => array(0x7A),
+        0x1D4D0 => array(0x61),
+        0x1D4D1 => array(0x62),
+        0x1D4D2 => array(0x63),
+        0x1D4D3 => array(0x64),
+        0x1D4D4 => array(0x65),
+        0x1D4D5 => array(0x66),
+        0x1D4D6 => array(0x67),
+        0x1D4D7 => array(0x68),
+        0x1D4D8 => array(0x69),
+        0x1D4D9 => array(0x6A),
+        0x1D4DA => array(0x6B),
+        0x1D4DB => array(0x6C),
+        0x1D4DC => array(0x6D),
+        0x1D4DD => array(0x6E),
+        0x1D4DE => array(0x6F),
+        0x1D4DF => array(0x70),
+        0x1D4E0 => array(0x71),
+        0x1D4E1 => array(0x72),
+        0x1D4E2 => array(0x73),
+        0x1D4E3 => array(0x74),
+        0x1D4E4 => array(0x75),
+        0x1D4E5 => array(0x76),
+        0x1D4E6 => array(0x77),
+        0x1D4E7 => array(0x78),
+        0x1D4E8 => array(0x79),
+        0x1D4E9 => array(0x7A),
+        0x1D504 => array(0x61),
+        0x1D505 => array(0x62),
+        0x1D507 => array(0x64),
+        0x1D508 => array(0x65),
+        0x1D509 => array(0x66),
+        0x1D50A => array(0x67),
+        0x1D50D => array(0x6A),
+        0x1D50E => array(0x6B),
+        0x1D50F => array(0x6C),
+        0x1D510 => array(0x6D),
+        0x1D511 => array(0x6E),
+        0x1D512 => array(0x6F),
+        0x1D513 => array(0x70),
+        0x1D514 => array(0x71),
+        0x1D516 => array(0x73),
+        0x1D517 => array(0x74),
+        0x1D518 => array(0x75),
+        0x1D519 => array(0x76),
+        0x1D51A => array(0x77),
+        0x1D51B => array(0x78),
+        0x1D51C => array(0x79),
+        0x1D538 => array(0x61),
+        0x1D539 => array(0x62),
+        0x1D53B => array(0x64),
+        0x1D53C => array(0x65),
+        0x1D53D => array(0x66),
+        0x1D53E => array(0x67),
+        0x1D540 => array(0x69),
+        0x1D541 => array(0x6A),
+        0x1D542 => array(0x6B),
+        0x1D543 => array(0x6C),
+        0x1D544 => array(0x6D),
+        0x1D546 => array(0x6F),
+        0x1D54A => array(0x73),
+        0x1D54B => array(0x74),
+        0x1D54C => array(0x75),
+        0x1D54D => array(0x76),
+        0x1D54E => array(0x77),
+        0x1D54F => array(0x78),
+        0x1D550 => array(0x79),
+        0x1D56C => array(0x61),
+        0x1D56D => array(0x62),
+        0x1D56E => array(0x63),
+        0x1D56F => array(0x64),
+        0x1D570 => array(0x65),
+        0x1D571 => array(0x66),
+        0x1D572 => array(0x67),
+        0x1D573 => array(0x68),
+        0x1D574 => array(0x69),
+        0x1D575 => array(0x6A),
+        0x1D576 => array(0x6B),
+        0x1D577 => array(0x6C),
+        0x1D578 => array(0x6D),
+        0x1D579 => array(0x6E),
+        0x1D57A => array(0x6F),
+        0x1D57B => array(0x70),
+        0x1D57C => array(0x71),
+        0x1D57D => array(0x72),
+        0x1D57E => array(0x73),
+        0x1D57F => array(0x74),
+        0x1D580 => array(0x75),
+        0x1D581 => array(0x76),
+        0x1D582 => array(0x77),
+        0x1D583 => array(0x78),
+        0x1D584 => array(0x79),
+        0x1D585 => array(0x7A),
+        0x1D5A0 => array(0x61),
+        0x1D5A1 => array(0x62),
+        0x1D5A2 => array(0x63),
+        0x1D5A3 => array(0x64),
+        0x1D5A4 => array(0x65),
+        0x1D5A5 => array(0x66),
+        0x1D5A6 => array(0x67),
+        0x1D5A7 => array(0x68),
+        0x1D5A8 => array(0x69),
+        0x1D5A9 => array(0x6A),
+        0x1D5AA => array(0x6B),
+        0x1D5AB => array(0x6C),
+        0x1D5AC => array(0x6D),
+        0x1D5AD => array(0x6E),
+        0x1D5AE => array(0x6F),
+        0x1D5AF => array(0x70),
+        0x1D5B0 => array(0x71),
+        0x1D5B1 => array(0x72),
+        0x1D5B2 => array(0x73),
+        0x1D5B3 => array(0x74),
+        0x1D5B4 => array(0x75),
+        0x1D5B5 => array(0x76),
+        0x1D5B6 => array(0x77),
+        0x1D5B7 => array(0x78),
+        0x1D5B8 => array(0x79),
+        0x1D5B9 => array(0x7A),
+        0x1D5D4 => array(0x61),
+        0x1D5D5 => array(0x62),
+        0x1D5D6 => array(0x63),
+        0x1D5D7 => array(0x64),
+        0x1D5D8 => array(0x65),
+        0x1D5D9 => array(0x66),
+        0x1D5DA => array(0x67),
+        0x1D5DB => array(0x68),
+        0x1D5DC => array(0x69),
+        0x1D5DD => array(0x6A),
+        0x1D5DE => array(0x6B),
+        0x1D5DF => array(0x6C),
+        0x1D5E0 => array(0x6D),
+        0x1D5E1 => array(0x6E),
+        0x1D5E2 => array(0x6F),
+        0x1D5E3 => array(0x70),
+        0x1D5E4 => array(0x71),
+        0x1D5E5 => array(0x72),
+        0x1D5E6 => array(0x73),
+        0x1D5E7 => array(0x74),
+        0x1D5E8 => array(0x75),
+        0x1D5E9 => array(0x76),
+        0x1D5EA => array(0x77),
+        0x1D5EB => array(0x78),
+        0x1D5EC => array(0x79),
+        0x1D5ED => array(0x7A),
+        0x1D608 => array(0x61),
+        0x1D609 => array(0x62),
+        0x1D60A => array(0x63),
+        0x1D60B => array(0x64),
+        0x1D60C => array(0x65),
+        0x1D60D => array(0x66),
+        0x1D60E => array(0x67),
+        0x1D60F => array(0x68),
+        0x1D610 => array(0x69),
+        0x1D611 => array(0x6A),
+        0x1D612 => array(0x6B),
+        0x1D613 => array(0x6C),
+        0x1D614 => array(0x6D),
+        0x1D615 => array(0x6E),
+        0x1D616 => array(0x6F),
+        0x1D617 => array(0x70),
+        0x1D618 => array(0x71),
+        0x1D619 => array(0x72),
+        0x1D61A => array(0x73),
+        0x1D61B => array(0x74),
+        0x1D61C => array(0x75),
+        0x1D61D => array(0x76),
+        0x1D61E => array(0x77),
+        0x1D61F => array(0x78),
+        0x1D620 => array(0x79),
+        0x1D621 => array(0x7A),
+        0x1D63C => array(0x61),
+        0x1D63D => array(0x62),
+        0x1D63E => array(0x63),
+        0x1D63F => array(0x64),
+        0x1D640 => array(0x65),
+        0x1D641 => array(0x66),
+        0x1D642 => array(0x67),
+        0x1D643 => array(0x68),
+        0x1D644 => array(0x69),
+        0x1D645 => array(0x6A),
+        0x1D646 => array(0x6B),
+        0x1D647 => array(0x6C),
+        0x1D648 => array(0x6D),
+        0x1D649 => array(0x6E),
+        0x1D64A => array(0x6F),
+        0x1D64B => array(0x70),
+        0x1D64C => array(0x71),
+        0x1D64D => array(0x72),
+        0x1D64E => array(0x73),
+        0x1D64F => array(0x74),
+        0x1D650 => array(0x75),
+        0x1D651 => array(0x76),
+        0x1D652 => array(0x77),
+        0x1D653 => array(0x78),
+        0x1D654 => array(0x79),
+        0x1D655 => array(0x7A),
+        0x1D670 => array(0x61),
+        0x1D671 => array(0x62),
+        0x1D672 => array(0x63),
+        0x1D673 => array(0x64),
+        0x1D674 => array(0x65),
+        0x1D675 => array(0x66),
+        0x1D676 => array(0x67),
+        0x1D677 => array(0x68),
+        0x1D678 => array(0x69),
+        0x1D679 => array(0x6A),
+        0x1D67A => array(0x6B),
+        0x1D67B => array(0x6C),
+        0x1D67C => array(0x6D),
+        0x1D67D => array(0x6E),
+        0x1D67E => array(0x6F),
+        0x1D67F => array(0x70),
+        0x1D680 => array(0x71),
+        0x1D681 => array(0x72),
+        0x1D682 => array(0x73),
+        0x1D683 => array(0x74),
+        0x1D684 => array(0x75),
+        0x1D685 => array(0x76),
+        0x1D686 => array(0x77),
+        0x1D687 => array(0x78),
+        0x1D688 => array(0x79),
+        0x1D689 => array(0x7A),
+        0x1D6A8 => array(0x3B1),
+        0x1D6A9 => array(0x3B2),
+        0x1D6AA => array(0x3B3),
+        0x1D6AB => array(0x3B4),
+        0x1D6AC => array(0x3B5),
+        0x1D6AD => array(0x3B6),
+        0x1D6AE => array(0x3B7),
+        0x1D6AF => array(0x3B8),
+        0x1D6B0 => array(0x3B9),
+        0x1D6B1 => array(0x3BA),
+        0x1D6B2 => array(0x3BB),
+        0x1D6B3 => array(0x3BC),
+        0x1D6B4 => array(0x3BD),
+        0x1D6B5 => array(0x3BE),
+        0x1D6B6 => array(0x3BF),
+        0x1D6B7 => array(0x3C0),
+        0x1D6B8 => array(0x3C1),
+        0x1D6B9 => array(0x3B8),
+        0x1D6BA => array(0x3C3),
+        0x1D6BB => array(0x3C4),
+        0x1D6BC => array(0x3C5),
+        0x1D6BD => array(0x3C6),
+        0x1D6BE => array(0x3C7),
+        0x1D6BF => array(0x3C8),
+        0x1D6C0 => array(0x3C9),
+        0x1D6D3 => array(0x3C3),
+        0x1D6E2 => array(0x3B1),
+        0x1D6E3 => array(0x3B2),
+        0x1D6E4 => array(0x3B3),
+        0x1D6E5 => array(0x3B4),
+        0x1D6E6 => array(0x3B5),
+        0x1D6E7 => array(0x3B6),
+        0x1D6E8 => array(0x3B7),
+        0x1D6E9 => array(0x3B8),
+        0x1D6EA => array(0x3B9),
+        0x1D6EB => array(0x3BA),
+        0x1D6EC => array(0x3BB),
+        0x1D6ED => array(0x3BC),
+        0x1D6EE => array(0x3BD),
+        0x1D6EF => array(0x3BE),
+        0x1D6F0 => array(0x3BF),
+        0x1D6F1 => array(0x3C0),
+        0x1D6F2 => array(0x3C1),
+        0x1D6F3 => array(0x3B8),
+        0x1D6F4 => array(0x3C3),
+        0x1D6F5 => array(0x3C4),
+        0x1D6F6 => array(0x3C5),
+        0x1D6F7 => array(0x3C6),
+        0x1D6F8 => array(0x3C7),
+        0x1D6F9 => array(0x3C8),
+        0x1D6FA => array(0x3C9),
+        0x1D70D => array(0x3C3),
+        0x1D71C => array(0x3B1),
+        0x1D71D => array(0x3B2),
+        0x1D71E => array(0x3B3),
+        0x1D71F => array(0x3B4),
+        0x1D720 => array(0x3B5),
+        0x1D721 => array(0x3B6),
+        0x1D722 => array(0x3B7),
+        0x1D723 => array(0x3B8),
+        0x1D724 => array(0x3B9),
+        0x1D725 => array(0x3BA),
+        0x1D726 => array(0x3BB),
+        0x1D727 => array(0x3BC),
+        0x1D728 => array(0x3BD),
+        0x1D729 => array(0x3BE),
+        0x1D72A => array(0x3BF),
+        0x1D72B => array(0x3C0),
+        0x1D72C => array(0x3C1),
+        0x1D72D => array(0x3B8),
+        0x1D72E => array(0x3C3),
+        0x1D72F => array(0x3C4),
+        0x1D730 => array(0x3C5),
+        0x1D731 => array(0x3C6),
+        0x1D732 => array(0x3C7),
+        0x1D733 => array(0x3C8),
+        0x1D734 => array(0x3C9),
+        0x1D747 => array(0x3C3),
+        0x1D756 => array(0x3B1),
+        0x1D757 => array(0x3B2),
+        0x1D758 => array(0x3B3),
+        0x1D759 => array(0x3B4),
+        0x1D75A => array(0x3B5),
+        0x1D75B => array(0x3B6),
+        0x1D75C => array(0x3B7),
+        0x1D75D => array(0x3B8),
+        0x1D75E => array(0x3B9),
+        0x1D75F => array(0x3BA),
+        0x1D760 => array(0x3BB),
+        0x1D761 => array(0x3BC),
+        0x1D762 => array(0x3BD),
+        0x1D763 => array(0x3BE),
+        0x1D764 => array(0x3BF),
+        0x1D765 => array(0x3C0),
+        0x1D766 => array(0x3C1),
+        0x1D767 => array(0x3B8),
+        0x1D768 => array(0x3C3),
+        0x1D769 => array(0x3C4),
+        0x1D76A => array(0x3C5),
+        0x1D76B => array(0x3C6),
+        0x1D76C => array(0x3C7),
+        0x1D76D => array(0x3C8),
+        0x1D76E => array(0x3C9),
+        0x1D781 => array(0x3C3),
+        0x1D790 => array(0x3B1),
+        0x1D791 => array(0x3B2),
+        0x1D792 => array(0x3B3),
+        0x1D793 => array(0x3B4),
+        0x1D794 => array(0x3B5),
+        0x1D795 => array(0x3B6),
+        0x1D796 => array(0x3B7),
+        0x1D797 => array(0x3B8),
+        0x1D798 => array(0x3B9),
+        0x1D799 => array(0x3BA),
+        0x1D79A => array(0x3BB),
+        0x1D79B => array(0x3BC),
+        0x1D79C => array(0x3BD),
+        0x1D79D => array(0x3BE),
+        0x1D79E => array(0x3BF),
+        0x1D79F => array(0x3C0),
+        0x1D7A0 => array(0x3C1),
+        0x1D7A1 => array(0x3B8),
+        0x1D7A2 => array(0x3C3),
+        0x1D7A3 => array(0x3C4),
+        0x1D7A4 => array(0x3C5),
+        0x1D7A5 => array(0x3C6),
+        0x1D7A6 => array(0x3C7),
+        0x1D7A7 => array(0x3C8),
+        0x1D7A8 => array(0x3C9),
+        0x1D7BB => array(0x3C3),
+        0x3F9   => array(0x3C3),
+        0x1D2C  => array(0x61),
+        0x1D2D  => array(0xE6),
+        0x1D2E  => array(0x62),
+        0x1D30  => array(0x64),
+        0x1D31  => array(0x65),
+        0x1D32  => array(0x1DD),
+        0x1D33  => array(0x67),
+        0x1D34  => array(0x68),
+        0x1D35  => array(0x69),
+        0x1D36  => array(0x6A),
+        0x1D37  => array(0x6B),
+        0x1D38  => array(0x6C),
+        0x1D39  => array(0x6D),
+        0x1D3A  => array(0x6E),
+        0x1D3C  => array(0x6F),
+        0x1D3D  => array(0x223),
+        0x1D3E  => array(0x70),
+        0x1D3F  => array(0x72),
+        0x1D40  => array(0x74),
+        0x1D41  => array(0x75),
+        0x1D42  => array(0x77),
+        0x213B  => array(0x66, 0x61, 0x78),
+        0x3250  => array(0x70, 0x74, 0x65),
+        0x32CC  => array(0x68, 0x67),
+        0x32CE  => array(0x65, 0x76),
+        0x32CF  => array(0x6C, 0x74, 0x64),
+        0x337A  => array(0x69, 0x75),
+        0x33DE  => array(0x76, 0x2215, 0x6D),
+        0x33DF  => array(0x61, 0x2215, 0x6D)
+    );
+
+    /**
+     * Normalization Combining Classes; Code Points not listed
+     * got Combining Class 0.
+     *
+     * @static
+     * @var array
+     * @access private
+     */
+    private static $_np_norm_combcls = array(
+        0x334   => 1,
+        0x335   => 1,
+        0x336   => 1,
+        0x337   => 1,
+        0x338   => 1,
+        0x93C   => 7,
+        0x9BC   => 7,
+        0xA3C   => 7,
+        0xABC   => 7,
+        0xB3C   => 7,
+        0xCBC   => 7,
+        0x1037  => 7,
+        0x3099  => 8,
+        0x309A  => 8,
+        0x94D   => 9,
+        0x9CD   => 9,
+        0xA4D   => 9,
+        0xACD   => 9,
+        0xB4D   => 9,
+        0xBCD   => 9,
+        0xC4D   => 9,
+        0xCCD   => 9,
+        0xD4D   => 9,
+        0xDCA   => 9,
+        0xE3A   => 9,
+        0xF84   => 9,
+        0x1039  => 9,
+        0x1714  => 9,
+        0x1734  => 9,
+        0x17D2  => 9,
+        0x5B0   => 10,
+        0x5B1   => 11,
+        0x5B2   => 12,
+        0x5B3   => 13,
+        0x5B4   => 14,
+        0x5B5   => 15,
+        0x5B6   => 16,
+        0x5B7   => 17,
+        0x5B8   => 18,
+        0x5B9   => 19,
+        0x5BB   => 20,
+        0x5Bc   => 21,
+        0x5BD   => 22,
+        0x5BF   => 23,
+        0x5C1   => 24,
+        0x5C2   => 25,
+        0xFB1E  => 26,
+        0x64B   => 27,
+        0x64C   => 28,
+        0x64D   => 29,
+        0x64E   => 30,
+        0x64F   => 31,
+        0x650   => 32,
+        0x651   => 33,
+        0x652   => 34,
+        0x670   => 35,
+        0x711   => 36,
+        0xC55   => 84,
+        0xC56   => 91,
+        0xE38   => 103,
+        0xE39   => 103,
+        0xE48   => 107,
+        0xE49   => 107,
+        0xE4A   => 107,
+        0xE4B   => 107,
+        0xEB8   => 118,
+        0xEB9   => 118,
+        0xEC8   => 122,
+        0xEC9   => 122,
+        0xECA   => 122,
+        0xECB   => 122,
+        0xF71   => 129,
+        0xF72   => 130,
+        0xF7A   => 130,
+        0xF7B   => 130,
+        0xF7C   => 130,
+        0xF7D   => 130,
+        0xF80   => 130,
+        0xF74   => 132,
+        0x321   => 202,
+        0x322   => 202,
+        0x327   => 202,
+        0x328   => 202,
+        0x31B   => 216,
+        0xF39   => 216,
+        0x1D165 => 216,
+        0x1D166 => 216,
+        0x1D16E => 216,
+        0x1D16F => 216,
+        0x1D170 => 216,
+        0x1D171 => 216,
+        0x1D172 => 216,
+        0x302A  => 218,
+        0x316   => 220,
+        0x317   => 220,
+        0x318   => 220,
+        0x319   => 220,
+        0x31C   => 220,
+        0x31D   => 220,
+        0x31E   => 220,
+        0x31F   => 220,
+        0x320   => 220,
+        0x323   => 220,
+        0x324   => 220,
+        0x325   => 220,
+        0x326   => 220,
+        0x329   => 220,
+        0x32A   => 220,
+        0x32B   => 220,
+        0x32C   => 220,
+        0x32D   => 220,
+        0x32E   => 220,
+        0x32F   => 220,
+        0x330   => 220,
+        0x331   => 220,
+        0x332   => 220,
+        0x333   => 220,
+        0x339   => 220,
+        0x33A   => 220,
+        0x33B   => 220,
+        0x33C   => 220,
+        0x347   => 220,
+        0x348   => 220,
+        0x349   => 220,
+        0x34D   => 220,
+        0x34E   => 220,
+        0x353   => 220,
+        0x354   => 220,
+        0x355   => 220,
+        0x356   => 220,
+        0x591   => 220,
+        0x596   => 220,
+        0x59B   => 220,
+        0x5A3   => 220,
+        0x5A4   => 220,
+        0x5A5   => 220,
+        0x5A6   => 220,
+        0x5A7   => 220,
+        0x5AA   => 220,
+        0x655   => 220,
+        0x656   => 220,
+        0x6E3   => 220,
+        0x6EA   => 220,
+        0x6ED   => 220,
+        0x731   => 220,
+        0x734   => 220,
+        0x737   => 220,
+        0x738   => 220,
+        0x739   => 220,
+        0x73B   => 220,
+        0x73C   => 220,
+        0x73E   => 220,
+        0x742   => 220,
+        0x744   => 220,
+        0x746   => 220,
+        0x748   => 220,
+        0x952   => 220,
+        0xF18   => 220,
+        0xF19   => 220,
+        0xF35   => 220,
+        0xF37   => 220,
+        0xFC6   => 220,
+        0x193B  => 220,
+        0x20E8  => 220,
+        0x1D17B => 220,
+        0x1D17C => 220,
+        0x1D17D => 220,
+        0x1D17E => 220,
+        0x1D17F => 220,
+        0x1D180 => 220,
+        0x1D181 => 220,
+        0x1D182 => 220,
+        0x1D18A => 220,
+        0x1D18B => 220,
+        0x59A   => 222,
+        0x5AD   => 222,
+        0x1929  => 222,
+        0x302D  => 222,
+        0x302E  => 224,
+        0x302F  => 224,
+        0x1D16D => 226,
+        0x5AE   => 228,
+        0x18A9  => 228,
+        0x302B  => 228,
+        0x300   => 230,
+        0x301   => 230,
+        0x302   => 230,
+        0x303   => 230,
+        0x304   => 230,
+        0x305   => 230,
+        0x306   => 230,
+        0x307   => 230,
+        0x308   => 230,
+        0x309   => 230,
+        0x30A   => 230,
+        0x30B   => 230,
+        0x30C   => 230,
+        0x30D   => 230,
+        0x30E   => 230,
+        0x30F   => 230,
+        0x310   => 230,
+        0x311   => 230,
+        0x312   => 230,
+        0x313   => 230,
+        0x314   => 230,
+        0x33D   => 230,
+        0x33E   => 230,
+        0x33F   => 230,
+        0x340   => 230,
+        0x341   => 230,
+        0x342   => 230,
+        0x343   => 230,
+        0x344   => 230,
+        0x346   => 230,
+        0x34A   => 230,
+        0x34B   => 230,
+        0x34C   => 230,
+        0x350   => 230,
+        0x351   => 230,
+        0x352   => 230,
+        0x357   => 230,
+        0x363   => 230,
+        0x364   => 230,
+        0x365   => 230,
+        0x366   => 230,
+        0x367   => 230,
+        0x368   => 230,
+        0x369   => 230,
+        0x36A   => 230,
+        0x36B   => 230,
+        0x36C   => 230,
+        0x36D   => 230,
+        0x36E   => 230,
+        0x36F   => 230,
+        0x483   => 230,
+        0x484   => 230,
+        0x485   => 230,
+        0x486   => 230,
+        0x592   => 230,
+        0x593   => 230,
+        0x594   => 230,
+        0x595   => 230,
+        0x597   => 230,
+        0x598   => 230,
+        0x599   => 230,
+        0x59C   => 230,
+        0x59D   => 230,
+        0x59E   => 230,
+        0x59F   => 230,
+        0x5A0   => 230,
+        0x5A1   => 230,
+        0x5A8   => 230,
+        0x5A9   => 230,
+        0x5AB   => 230,
+        0x5AC   => 230,
+        0x5AF   => 230,
+        0x5C4   => 230,
+        0x610   => 230,
+        0x611   => 230,
+        0x612   => 230,
+        0x613   => 230,
+        0x614   => 230,
+        0x615   => 230,
+        0x653   => 230,
+        0x654   => 230,
+        0x657   => 230,
+        0x658   => 230,
+        0x6D6   => 230,
+        0x6D7   => 230,
+        0x6D8   => 230,
+        0x6D9   => 230,
+        0x6DA   => 230,
+        0x6DB   => 230,
+        0x6DC   => 230,
+        0x6DF   => 230,
+        0x6E0   => 230,
+        0x6E1   => 230,
+        0x6E2   => 230,
+        0x6E4   => 230,
+        0x6E7   => 230,
+        0x6E8   => 230,
+        0x6EB   => 230,
+        0x6EC   => 230,
+        0x730   => 230,
+        0x732   => 230,
+        0x733   => 230,
+        0x735   => 230,
+        0x736   => 230,
+        0x73A   => 230,
+        0x73D   => 230,
+        0x73F   => 230,
+        0x740   => 230,
+        0x741   => 230,
+        0x743   => 230,
+        0x745   => 230,
+        0x747   => 230,
+        0x749   => 230,
+        0x74A   => 230,
+        0x951   => 230,
+        0x953   => 230,
+        0x954   => 230,
+        0xF82   => 230,
+        0xF83   => 230,
+        0xF86   => 230,
+        0xF87   => 230,
+        0x170D  => 230,
+        0x193A  => 230,
+        0x20D0  => 230,
+        0x20D1  => 230,
+        0x20D4  => 230,
+        0x20D5  => 230,
+        0x20D6  => 230,
+        0x20D7  => 230,
+        0x20DB  => 230,
+        0x20DC  => 230,
+        0x20E1  => 230,
+        0x20E7  => 230,
+        0x20E9  => 230,
+        0xFE20  => 230,
+        0xFE21  => 230,
+        0xFE22  => 230,
+        0xFE23  => 230,
+        0x1D185 => 230,
+        0x1D186 => 230,
+        0x1D187 => 230,
+        0x1D189 => 230,
+        0x1D188 => 230,
+        0x1D1AA => 230,
+        0x1D1AB => 230,
+        0x1D1AC => 230,
+        0x1D1AD => 230,
+        0x315   => 232,
+        0x31A   => 232,
+        0x302C  => 232,
+        0x35F   => 233,
+        0x362   => 233,
+        0x35D   => 234,
+        0x35E   => 234,
+        0x360   => 234,
+        0x361   => 234,
+        0x345   => 240
+    );
+    // }}}
+
+    // {{{ properties
+    /**
+     * @var string
+     * @access private
+     */
+    private $_punycode_prefix = 'xn--';
+
+    /**
+     * @access private
+     */
+    private $_invalid_ucs = 0x80000000;
+
+    /**
+     * @access private
+     */
+    private $_max_ucs = 0x10FFFF;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_base = 36;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_tmin = 1;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_tmax = 26;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_skew = 38;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_damp = 700;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_initial_bias = 72;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_initial_n = 0x80;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_slast;
+
+    /**
+     * @access private
+     */
+    private $_sbase = 0xAC00;
+
+    /**
+     * @access private
+     */
+    private $_lbase = 0x1100;
+
+    /**
+     * @access private
+     */
+    private $_vbase = 0x1161;
+
+    /**
+     * @access private
+     */
+    private $_tbase = 0x11a7;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_lcount = 19;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_vcount = 21;
+
+    /**
+     * @var int
+     * @access private
+     */
+    private $_tcount = 28;
+
+    /**
+     * vcount * tcount
+     *
+     * @var int
+     * @access private
+     */
+    private $_ncount = 588;
+
+    /**
+     * lcount * tcount * vcount
+     *
+     * @var int
+     * @access private
+     */
+    private $_scount = 11172;
+
+    /**
+     * Default encoding for encode()'s input and decode()'s output is UTF-8;
+     * Other possible encodings are ucs4_string and ucs4_array
+     * See {@link setParams()} for how to select these
+     *
+     * @var bool
+     * @access private
+     */
+    private $_api_encoding = 'utf8';
+
+    /**
+     * Overlong UTF-8 encodings are forbidden
+     *
+     * @var bool
+     * @access private
+     */
+    private $_allow_overlong = false;
+
+    /**
+     * Behave strict or not
+     *
+     * @var bool
+     * @access private
+     */
+    private $_strict_mode = false;
+
+    /**
+     * Cached value indicating whether or not mbstring function overloading is
+     * on for strlen
+     *
+     * This is cached for optimal performance.
+     *
+     * @var boolean
+     * @see Net_IDNA_php5::_byteLength()
+     */
+    private static $_mb_string_overload = null;
+    // }}}
+
+
+    // {{{ constructor
+    /**
+     * Constructor
+     *
+     * @param  array  $options
+     * @access public
+     * @see    setParams()
+     */
+    public function __construct($options = null)
+    {
+        $this->_slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
+
+        if (is_array($options)) {
+            $this->setParams($options);
+        }
+
+        // populate mbstring overloading cache if not set
+        if (self::$_mb_string_overload === null) {
+            self::$_mb_string_overload = (extension_loaded('mbstring')
+                && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
+        }
+    }
+    // }}}
+
+
+    /**
+     * Sets a new option value. Available options and values:
+     *
+     * [utf8 -     Use either UTF-8 or ISO-8859-1 as input (true for UTF-8, false
+     *             otherwise); The output is always UTF-8]
+     * [overlong - Unicode does not allow unnecessarily long encodings of chars,
+     *             to allow this, set this parameter to true, else to false;
+     *             default is false.]
+     * [strict -   true: strict mode, good for registration purposes - Causes errors
+     *             on failures; false: loose mode, ideal for "wildlife" applications
+     *             by silently ignoring errors and returning the original input instead]
+     *
+     * @param    mixed     $option      Parameter to set (string: single parameter; array of Parameter => Value pairs)
+     * @param    string    $value       Value to use (if parameter 1 is a string)
+     * @return   boolean                true on success, false otherwise
+     * @access   public
+     */
+    public function setParams($option, $value = false)
+    {
+        if (!is_array($option)) {
+            $option = array($option => $value);
+        }
+
+        foreach ($option as $k => $v) {
+            switch ($k) {
+            case 'encoding':
+                switch ($v) {
+                case 'utf8':
+                case 'ucs4_string':
+                case 'ucs4_array':
+                    $this->_api_encoding = $v;
+                    break;
+
+                default:
+                    throw new Exception('Set Parameter: Unknown parameter '.$v.' for option '.$k);
+                }
+
+                break;
+
+            case 'overlong':
+                $this->_allow_overlong = ($v) ? true : false;
+                break;
+
+            case 'strict':
+                $this->_strict_mode = ($v) ? true : false;
+                break;
+
+            default:
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Encode a given UTF-8 domain name.
+     *
+     * @param    string     $decoded     Domain name (UTF-8 or UCS-4)
+     * [@param    string     $encoding    Desired input encoding, see {@link set_parameter}]
+     * @return   string                  Encoded Domain name (ACE string)
+     * @return   mixed                   processed string
+     * @throws   Exception
+     * @access   public
+     */
+    public function encode($decoded, $one_time_encoding = false)
+    {
+        // Forcing conversion of input to UCS4 array
+        // If one time encoding is given, use this, else the objects property
+        switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
+        case 'utf8':
+            $decoded = $this->_utf8_to_ucs4($decoded);
+            break;
+        case 'ucs4_string':
+           $decoded = $this->_ucs4_string_to_ucs4($decoded);
+        case 'ucs4_array': // No break; before this line. Catch case, but do nothing
+           break;
+        default:
+            throw new Exception('Unsupported input format');
+        }
+
+        // No input, no output, what else did you expect?
+        if (empty($decoded)) return '';
+
+        // Anchors for iteration
+        $last_begin = 0;
+        // Output string
+        $output = '';
+
+        foreach ($decoded as $k => $v) {
+            // Make sure to use just the plain dot
+            switch($v) {
+            case 0x3002:
+            case 0xFF0E:
+            case 0xFF61:
+                $decoded[$k] = 0x2E;
+                // It's right, no break here
+                // The codepoints above have to be converted to dots anyway
+
+            // Stumbling across an anchoring character
+            case 0x2E:
+            case 0x2F:
+            case 0x3A:
+            case 0x3F:
+            case 0x40:
+                // Neither email addresses nor URLs allowed in strict mode
+                if ($this->_strict_mode) {
+                   throw new Exception('Neither email addresses nor URLs are allowed in strict mode.');
+                } else {
+                    // Skip first char
+                    if ($k) {
+                        $encoded = '';
+                        $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
+                        if ($encoded) {
+                            $output .= $encoded;
+                        } else {
+                            $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
+                        }
+                        $output .= chr($decoded[$k]);
+                    }
+                    $last_begin = $k + 1;
+                }
+            }
+        }
+        // Catch the rest of the string
+        if ($last_begin) {
+            $inp_len = sizeof($decoded);
+            $encoded = '';
+            $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
+            if ($encoded) {
+                $output .= $encoded;
+            } else {
+                $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
+            }
+            return $output;
+        } else {
+            if ($output = $this->_encode($decoded)) {
+                return $output;
+            } else {
+                return $this->_ucs4_to_utf8($decoded);
+            }
+        }
+    }
+
+    /**
+     * Decode a given ACE domain name.
+     *
+     * @param    string     $encoded     Domain name (ACE string)
+     * @param    string     $encoding    Desired output encoding, see {@link set_parameter}
+     * @return   string                  Decoded Domain name (UTF-8 or UCS-4)
+     * @throws   Exception
+     * @access   public
+     */
+    public function decode($input, $one_time_encoding = false)
+    {
+        // Optionally set
+        if ($one_time_encoding) {
+            switch ($one_time_encoding) {
+            case 'utf8':
+            case 'ucs4_string':
+            case 'ucs4_array':
+                break;
+            default:
+                throw new Exception('Unknown encoding '.$one_time_encoding);
+                return false;
+            }
+        }
+        // Make sure to drop any newline characters around
+        $input = trim($input);
+
+        // Negotiate input and try to determine, wether it is a plain string,
+        // an email address or something like a complete URL
+        if (strpos($input, '@')) { // Maybe it is an email address
+            // No no in strict mode
+            if ($this->_strict_mode) {
+                throw new Exception('Only simple domain name parts can be handled in strict mode');
+            }
+            list($email_pref, $input) = explode('@', $input, 2);
+            $arr = explode('.', $input);
+            foreach ($arr as $k => $v) {
+                $conv = $this->_decode($v);
+                if ($conv) $arr[$k] = $conv;
+            }
+            $return = $email_pref . '@' . join('.', $arr);
+        } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
+            // No no in strict mode
+            if ($this->_strict_mode) {
+                throw new Exception('Only simple domain name parts can be handled in strict mode');
+            }
+            $parsed = parse_url($input);
+            if (isset($parsed['host'])) {
+                $arr = explode('.', $parsed['host']);
+                foreach ($arr as $k => $v) {
+                    $conv = $this->_decode($v);
+                    if ($conv) $arr[$k] = $conv;
+                }
+                $parsed['host'] = join('.', $arr);
+                if (isset($parsed['scheme'])) {
+                    $parsed['scheme'] .= (strtolower($parsed['scheme']) == 'mailto') ? ':' : '://';
+                }
+                $return = join('', $parsed);
+            } else { // parse_url seems to have failed, try without it
+                $arr = explode('.', $input);
+                foreach ($arr as $k => $v) {
+                    $conv = $this->_decode($v);
+                    if ($conv) $arr[$k] = $conv;
+                }
+                $return = join('.', $arr);
+            }
+        } else { // Otherwise we consider it being a pure domain name string
+            $return = $this->_decode($input);
+        }
+        // The output is UTF-8 by default, other output formats need conversion here
+        // If one time encoding is given, use this, else the objects property
+        switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
+        case 'utf8':
+            return $return;
+            break;
+        case 'ucs4_string':
+           return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
+           break;
+        case 'ucs4_array':
+            return $this->_utf8_to_ucs4($return);
+            break;
+        default:
+            throw new Exception('Unsupported output format');
+        }
+    }
+
+
+    // {{{ private
+    /**
+     * The actual encoding algorithm.
+     *
+     * @return   string
+     * @throws   Exception
+     * @access   private
+     */
+    private function _encode($decoded)
+    {
+        // We cannot encode a domain name containing the Punycode prefix
+        $extract = self::_byteLength($this->_punycode_prefix);
+        $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
+        $check_deco = array_slice($decoded, 0, $extract);
+
+        if ($check_pref == $check_deco) {
+            throw new Exception('This is already a punycode string');
+        }
+        // We will not try to encode strings consisting of basic code points only
+        $encodable = false;
+        foreach ($decoded as $k => $v) {
+            if ($v > 0x7a) {
+                $encodable = true;
+                break;
+            }
+        }
+        if (!$encodable) {
+            if ($this->_strict_mode) {
+                throw new Exception('The given string does not contain encodable chars');
+            } else {
+                return false;
+            }
+        }
+
+        // Do NAMEPREP
+        try {
+            $decoded = $this->_nameprep($decoded);
+        } catch (Exception $e) {
+            // hmm, serious - rethrow
+            throw $e;
+        }
+
+        $deco_len = count($decoded);
+
+        // Empty array
+        if (!$deco_len) {
+            return false;
+        }
+
+        // How many chars have been consumed
+        $codecount = 0;
+
+        // Start with the prefix; copy it to output
+        $encoded = $this->_punycode_prefix;
+
+        $encoded = '';
+        // Copy all basic code points to output
+        for ($i = 0; $i < $deco_len; ++$i) {
+            $test = $decoded[$i];
+            // Will match [0-9a-zA-Z-]
+            if ((0x2F < $test && $test < 0x40)
+                    || (0x40 < $test && $test < 0x5B)
+                    || (0x60 < $test && $test <= 0x7B)
+                    || (0x2D == $test)) {
+                $encoded .= chr($decoded[$i]);
+                $codecount++;
+            }
+        }
+
+        // All codepoints were basic ones
+        if ($codecount == $deco_len) {
+            return $encoded;
+        }
+
+        // Start with the prefix; copy it to output
+        $encoded = $this->_punycode_prefix . $encoded;
+
+        // If we have basic code points in output, add an hyphen to the end
+        if ($codecount) {
+            $encoded .= '-';
+        }
+
+        // Now find and encode all non-basic code points
+        $is_first  = true;
+        $cur_code  = $this->_initial_n;
+        $bias      = $this->_initial_bias;
+        $delta     = 0;
+
+        while ($codecount < $deco_len) {
+            // Find the smallest code point >= the current code point and
+            // remember the last ouccrence of it in the input
+            for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
+                if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
+                    $next_code = $decoded[$i];
+                }
+            }
+
+            $delta += ($next_code - $cur_code) * ($codecount + 1);
+            $cur_code = $next_code;
+
+            // Scan input again and encode all characters whose code point is $cur_code
+            for ($i = 0; $i < $deco_len; $i++) {
+                if ($decoded[$i] < $cur_code) {
+                    $delta++;
+                } else if ($decoded[$i] == $cur_code) {
+                    for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
+                        $t = ($k <= $bias)?
+                            $this->_tmin :
+                            (($k >= $bias + $this->_tmax)? $this->_tmax : $k - $bias);
+
+                        if ($q < $t) {
+                            break;
+                        }
+
+                        $encoded .= $this->_encodeDigit(ceil($t + (($q - $t) % ($this->_base - $t))));
+                        $q = ($q - $t) / ($this->_base - $t);
+                    }
+
+                    $encoded .= $this->_encodeDigit($q);
+                    $bias = $this->_adapt($delta, $codecount + 1, $is_first);
+                    $codecount++;
+                    $delta = 0;
+                    $is_first = false;
+                }
+            }
+
+            $delta++;
+            $cur_code++;
+        }
+
+        return $encoded;
+    }
+
+    /**
+     * The actual decoding algorithm.
+     *
+     * @return   string
+     * @throws   Exception
+     * @access   private
+     */
+    private function _decode($encoded)
+    {
+        // We do need to find the Punycode prefix
+        if (!preg_match('!^' . preg_quote($this->_punycode_prefix, '!') . '!', $encoded)) {
+            return false;
+        }
+
+        $encode_test = preg_replace('!^' . preg_quote($this->_punycode_prefix, '!') . '!', '', $encoded);
+
+        // If nothing left after removing the prefix, it is hopeless
+        if (!$encode_test) {
+            return false;
+        }
+
+        // Find last occurence of the delimiter
+        $delim_pos = strrpos($encoded, '-');
+
+        if ($delim_pos > self::_byteLength($this->_punycode_prefix)) {
+            for ($k = self::_byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
+                $decoded[] = ord($encoded{$k});
+            }
+        } else {
+            $decoded = array();
+        }
+
+        $deco_len = count($decoded);
+        $enco_len = self::_byteLength($encoded);
+
+        // Wandering through the strings; init
+        $is_first = true;
+        $bias     = $this->_initial_bias;
+        $idx      = 0;
+        $char     = $this->_initial_n;
+
+        for ($enco_idx = ($delim_pos)? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
+            for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
+                $digit = $this->_decodeDigit($encoded{$enco_idx++});
+                $idx += $digit * $w;
+
+                $t = ($k <= $bias) ?
+                    $this->_tmin :
+                    (($k >= $bias + $this->_tmax)? $this->_tmax : ($k - $bias));
+
+                if ($digit < $t) {
+                    break;
+                }
+
+                $w = (int)($w * ($this->_base - $t));
+            }
+
+            $bias      = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
+            $is_first  = false;
+            $char     += (int) ($idx / ($deco_len + 1));
+            $idx      %= ($deco_len + 1);
+
+            if ($deco_len > 0) {
+                // Make room for the decoded char
+                for ($i = $deco_len; $i > $idx; $i--) {
+                    $decoded[$i] = $decoded[($i - 1)];
+                }
+            }
+
+            $decoded[$idx++] = $char;
+        }
+
+        try {
+            return $this->_ucs4_to_utf8($decoded);
+        } catch (Exception $e) {
+            // rethrow
+            throw $e;
+        }
+    }
+
+    /**
+     * Adapt the bias according to the current code point and position.
+     *
+     * @access   private
+     */
+    private function _adapt($delta, $npoints, $is_first)
+    {
+        $delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
+        $delta += (int) ($delta / $npoints);
+
+        for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
+            $delta = (int) ($delta / ($this->_base - $this->_tmin));
+        }
+
+        return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
+    }
+
+    /**
+     * Encoding a certain digit.
+     *
+     * @access   private
+     */
+    private function _encodeDigit($d)
+    {
+        return chr($d + 22 + 75 * ($d < 26));
+    }
+
+    /**
+     * Decode a certain digit.
+     *
+     * @access   private
+     */
+    private function _decodeDigit($cp)
+    {
+        $cp = ord($cp);
+        return ($cp - 48 < 10)? $cp - 22 : (($cp - 65 < 26)? $cp - 65 : (($cp - 97 < 26)? $cp - 97 : $this->_base));
+    }
+
+    /**
+     * Do Nameprep according to RFC3491 and RFC3454.
+     *
+     * @param    array      $input       Unicode Characters
+     * @return   string                  Unicode Characters, Nameprep'd
+     * @throws   Exception
+     * @access   private
+     */
+    private function _nameprep($input)
+    {
+        $output = array();
+
+        // Walking through the input array, performing the required steps on each of
+        // the input chars and putting the result into the output array
+        // While mapping required chars we apply the cannonical ordering
+
+        foreach ($input as $v) {
+            // Map to nothing == skip that code point
+            if (in_array($v, self::$_np_map_nothing)) {
+                continue;
+            }
+
+            // Try to find prohibited input
+            if (in_array($v, self::$_np_prohibit) || in_array($v, self::$_general_prohibited)) {
+                throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
+            }
+
+            foreach (self::$_np_prohibit_ranges as $range) {
+                if ($range[0] <= $v && $v <= $range[1]) {
+                    throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
+                }
+            }
+
+            // Hangul syllable decomposition
+            if (0xAC00 <= $v && $v <= 0xD7AF) {
+                foreach ($this->_hangulDecompose($v) as $out) {
+                    $output[] = $out;
+                }
+            } else if (isset(self::$_np_replacemaps[$v])) { // There's a decomposition mapping for that code point
+                foreach ($this->_applyCannonicalOrdering(self::$_np_replacemaps[$v]) as $out) {
+                    $output[] = $out;
+                }
+            } else {
+                $output[] = $v;
+            }
+        }
+
+        // Combine code points
+
+        $last_class   = 0;
+        $last_starter = 0;
+        $out_len      = count($output);
+
+        for ($i = 0; $i < $out_len; ++$i) {
+            $class = $this->_getCombiningClass($output[$i]);
+
+            if ((!$last_class || $last_class != $class) && $class) {
+                // Try to match
+                $seq_len = $i - $last_starter;
+                $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
+
+                // On match: Replace the last starter with the composed character and remove
+                // the now redundant non-starter(s)
+                if ($out) {
+                    $output[$last_starter] = $out;
+
+                    if (count($out) != $seq_len) {
+                        for ($j = $i + 1; $j < $out_len; ++$j) {
+                            $output[$j - 1] = $output[$j];
+                        }
+
+                        unset($output[$out_len]);
+                    }
+
+                    // Rewind the for loop by one, since there can be more possible compositions
+                    $i--;
+                    $out_len--;
+                    $last_class = ($i == $last_starter)? 0 : $this->_getCombiningClass($output[$i - 1]);
+
+                    continue;
+                }
+            }
+
+            // The current class is 0
+            if (!$class) {
+                $last_starter = $i;
+            }
+
+            $last_class = $class;
+        }
+
+        return $output;
+    }
+
+    /**
+     * Decomposes a Hangul syllable
+     * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
+     *
+     * @param    integer    $char        32bit UCS4 code point
+     * @return   array                   Either Hangul Syllable decomposed or original 32bit
+     *                                   value as one value array
+     * @access   private
+     */
+    private function _hangulDecompose($char)
+    {
+        $sindex = $char - $this->_sbase;
+
+        if ($sindex < 0 || $sindex >= $this->_scount) {
+            return array($char);
+        }
+
+        $result   = array();
+        $T        = $this->_tbase + $sindex % $this->_tcount;
+        $result[] = (int)($this->_lbase +  $sindex / $this->_ncount);
+        $result[] = (int)($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
+
+        if ($T != $this->_tbase) {
+            $result[] = $T;
+        }
+
+        return $result;
+    }
+
+    /**
+     * Ccomposes a Hangul syllable
+     * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
+     *
+     * @param    array      $input       Decomposed UCS4 sequence
+     * @return   array                   UCS4 sequence with syllables composed
+     * @access   private
+     */
+    private function _hangulCompose($input)
+    {
+        $inp_len = count($input);
+
+        if (!$inp_len) {
+            return array();
+        }
+
+        $result   = array();
+        $last     = $input[0];
+        $result[] = $last; // copy first char from input to output
+
+        for ($i = 1; $i < $inp_len; ++$i) {
+            $char = $input[$i];
+
+            // Find out, wether two current characters from L and V
+            $lindex = $last - $this->_lbase;
+
+            if (0 <= $lindex && $lindex < $this->_lcount) {
+                $vindex = $char - $this->_vbase;
+
+                if (0 <= $vindex && $vindex < $this->_vcount) {
+                    // create syllable of form LV
+                    $last    = ($this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount);
+                    $out_off = count($result) - 1;
+                    $result[$out_off] = $last; // reset last
+
+                    // discard char
+                    continue;
+                }
+            }
+
+            // Find out, wether two current characters are LV and T
+            $sindex = $last - $this->_sbase;
+
+            if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount) == 0) {
+                $tindex = $char - $this->_tbase;
+
+                if (0 <= $tindex && $tindex <= $this->_tcount) {
+                    // create syllable of form LVT
+                    $last += $tindex;
+                    $out_off = count($result) - 1;
+                    $result[$out_off] = $last; // reset last
+
+                    // discard char
+                    continue;
+                }
+            }
+
+            // if neither case was true, just add the character
+            $last = $char;
+            $result[] = $char;
+        }
+
+        return $result;
+    }
+
+    /**
+     * Returns the combining class of a certain wide char.
+     *
+     * @param    integer    $char        Wide char to check (32bit integer)
+     * @return   integer                 Combining class if found, else 0
+     * @access   private
+     */
+    private function _getCombiningClass($char)
+    {
+        return isset(self::$_np_norm_combcls[$char])? self::$_np_norm_combcls[$char] : 0;
+    }
+
+    /**
+     * Apllies the cannonical ordering of a decomposed UCS4 sequence.
+     *
+     * @param    array      $input       Decomposed UCS4 sequence
+     * @return   array                   Ordered USC4 sequence
+     * @access   private
+     */
+    private function _applyCannonicalOrdering($input)
+    {
+        $swap = true;
+        $size = count($input);
+
+        while ($swap) {
+            $swap = false;
+            $last = $this->_getCombiningClass($input[0]);
+
+            for ($i = 0; $i < $size - 1; ++$i) {
+                $next = $this->_getCombiningClass($input[$i + 1]);
+
+                if ($next != 0 && $last > $next) {
+                    // Move item leftward until it fits
+                    for ($j = $i + 1; $j > 0; --$j) {
+                        if ($this->_getCombiningClass($input[$j - 1]) <= $next) {
+                            break;
+                        }
+
+                        $t = $input[$j];
+                        $input[$j] = $input[$j - 1];
+                        $input[$j - 1] = $t;
+                        $swap = 1;
+                    }
+
+                    // Reentering the loop looking at the old character again
+                    $next = $last;
+                }
+
+                $last = $next;
+            }
+        }
+
+        return $input;
+    }
+
+    /**
+     * Do composition of a sequence of starter and non-starter.
+     *
+     * @param    array      $input       UCS4 Decomposed sequence
+     * @return   array                   Ordered USC4 sequence
+     * @access   private
+     */
+    private function _combine($input)
+    {
+        $inp_len = count($input);
+
+        // Is it a Hangul syllable?
+        if (1 != $inp_len) {
+            $hangul = $this->_hangulCompose($input);
+
+            // This place is probably wrong
+            if (count($hangul) != $inp_len) {
+                return $hangul;
+            }
+        }
+
+        foreach (self::$_np_replacemaps as $np_src => $np_target) {
+            if ($np_target[0] != $input[0]) {
+                continue;
+            }
+
+            if (count($np_target) != $inp_len) {
+                continue;
+            }
+
+            $hit = false;
+
+            foreach ($input as $k2 => $v2) {
+                if ($v2 == $np_target[$k2]) {
+                    $hit = true;
+                } else {
+                    $hit = false;
+                    break;
+                }
+            }
+
+            if ($hit) {
+                return $np_src;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * This converts an UTF-8 encoded string to its UCS-4 (array) representation
+     * By talking about UCS-4 we mean arrays of 32bit integers representing
+     * each of the "chars". This is due to PHP not being able to handle strings with
+     * bit depth different from 8. This applies to the reverse method _ucs4_to_utf8(), too.
+     * The following UTF-8 encodings are supported:
+     *
+     * bytes bits  representation
+     * 1        7  0xxxxxxx
+     * 2       11  110xxxxx 10xxxxxx
+     * 3       16  1110xxxx 10xxxxxx 10xxxxxx
+     * 4       21  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+     * 5       26  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+     * 6       31  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+     *
+     * Each x represents a bit that can be used to store character data.
+     *
+     * @access   private
+     */
+    private function _utf8_to_ucs4($input)
+    {
+        $output = array();
+        $out_len = 0;
+        $inp_len = self::_byteLength($input, '8bit');
+        $mode = 'next';
+        $test = 'none';
+        for ($k = 0; $k < $inp_len; ++$k) {
+            $v = ord($input{$k}); // Extract byte from input string
+
+            if ($v < 128) { // We found an ASCII char - put into stirng as is
+                $output[$out_len] = $v;
+                ++$out_len;
+                if ('add' == $mode) {
+                    throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
+                    return false;
+                }
+                continue;
+            }
+            if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char
+                $start_byte = $v;
+                $mode = 'add';
+                $test = 'range';
+                if ($v >> 5 == 6) { // &110xxxxx 10xxxxx
+                    $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left
+                    $v = ($v - 192) << 6;
+                } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx
+                    $next_byte = 1;
+                    $v = ($v - 224) << 12;
+                } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    $next_byte = 2;
+                    $v = ($v - 240) << 18;
+                } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    $next_byte = 3;
+                    $v = ($v - 248) << 24;
+                } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    $next_byte = 4;
+                    $v = ($v - 252) << 30;
+                } else {
+                    throw new Exception('This might be UTF-8, but I don\'t understand it at byte '.$k);
+                    return false;
+                }
+                if ('add' == $mode) {
+                    $output[$out_len] = (int) $v;
+                    ++$out_len;
+                    continue;
+                }
+            }
+            if ('add' == $mode) {
+                if (!$this->_allow_overlong && $test == 'range') {
+                    $test = 'none';
+                    if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
+                        throw new Exception('Bogus UTF-8 character detected (out of legal range) at byte '.$k);
+                        return false;
+                    }
+                }
+                if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx
+                    $v = ($v - 128) << ($next_byte * 6);
+                    $output[($out_len - 1)] += $v;
+                    --$next_byte;
+                } else {
+                    throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
+                    return false;
+                }
+                if ($next_byte < 0) {
+                    $mode = 'next';
+                }
+            }
+        } // for
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 array into UTF-8 string.
+     *
+     * @throws   Exception
+     * @access   private
+     */
+    private function _ucs4_to_utf8($input)
+    {
+        $output = '';
+
+        foreach ($input as $v) {
+            // $v = ord($v);
+
+            if ($v < 128) {
+                // 7bit are transferred literally
+                $output .= chr($v);
+            } else if ($v < 1 << 11) {
+                // 2 bytes
+                $output .= chr(192 + ($v >> 6))
+                    . chr(128 + ($v & 63));
+            } else if ($v < 1 << 16) {
+                // 3 bytes
+                $output .= chr(224 + ($v >> 12))
+                    . chr(128 + (($v >> 6) & 63))
+                    . chr(128 + ($v & 63));
+            } else if ($v < 1 << 21) {
+                // 4 bytes
+                $output .= chr(240 + ($v >> 18))
+                    . chr(128 + (($v >> 12) & 63))
+                    . chr(128 + (($v >>  6) & 63))
+                    . chr(128 + ($v & 63));
+            } else if ($v < 1 << 26) {
+                // 5 bytes
+                $output .= chr(248 + ($v >> 24))
+                    . chr(128 + (($v >> 18) & 63))
+                    . chr(128 + (($v >> 12) & 63))
+                    . chr(128 + (($v >>  6) & 63))
+                    . chr(128 + ($v & 63));
+            } else if ($v < 1 << 31) {
+                // 6 bytes
+                $output .= chr(252 + ($v >> 30))
+                    . chr(128 + (($v >> 24) & 63))
+                    . chr(128 + (($v >> 18) & 63))
+                    . chr(128 + (($v >> 12) & 63))
+                    . chr(128 + (($v >>  6) & 63))
+                    . chr(128 + ($v & 63));
+            } else {
+                throw new Exception('Conversion from UCS-4 to UTF-8 failed: malformed input at byte ' . $k);
+            }
+        }
+
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 array into UCS-4 string
+     *
+     * @throws   Exception
+     * @access   private
+     */
+    private function _ucs4_to_ucs4_string($input)
+    {
+        $output = '';
+        // Take array values and split output to 4 bytes per value
+        // The bit mask is 255, which reads &11111111
+        foreach ($input as $v) {
+            $output .= ($v & (255 << 24) >> 24) . ($v & (255 << 16) >> 16) . ($v & (255 << 8) >> 8) . ($v & 255);
+        }
+        return $output;
+    }
+
+    /**
+     * Convert UCS-4 strin into UCS-4 garray
+     *
+     * @throws   Exception
+     * @access   private
+     */
+    private function _ucs4_string_to_ucs4($input)
+    {
+        $output = array();
+
+        $inp_len = self::_byteLength($input);
+        // Input length must be dividable by 4
+        if ($inp_len % 4) {
+            throw new Exception('Input UCS4 string is broken');
+            return false;
+        }
+
+        // Empty input - return empty output
+        if (!$inp_len) return $output;
+
+        for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
+            // Increment output position every 4 input bytes
+            if (!$i % 4) {
+                $out_len++;
+                $output[$out_len] = 0;
+            }
+            $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
+        }
+        return $output;
+    }
+
+    /**
+     * Echo hex representation of UCS4 sequence.
+     *
+     * @param    array      $input       UCS4 sequence
+     * @param    boolean    $include_bit Include bitmask in output
+     * @return   void
+     * @static
+     * @access   private
+     */
+    private static function _showHex($input, $include_bit = false)
+    {
+        foreach ($input as $k => $v) {
+            echo '[', $k, '] => ', sprintf('%X', $v);
+
+            if ($include_bit) {
+                echo ' (', Net_IDNA::_showBitmask($v), ')';
+            }
+
+            echo "\n";
+        }
+    }
+
+    /**
+     * Gives you a bit representation of given Byte (8 bits), Word (16 bits) or DWord (32 bits)
+     * Output width is automagically determined
+     *
+     * @static
+     * @access   private
+     */
+    private static function _showBitmask($octet)
+    {
+        if ($octet >= (1 << 16)) {
+            $w = 31;
+        } else if ($octet >= (1 << 8)) {
+            $w = 15;
+        } else {
+            $w = 7;
+        }
+
+        $return = '';
+
+        for ($i = $w; $i > -1; $i--) {
+            $return .= ($octet & (1 << $i))? 1 : '0';
+        }
+
+        return $return;
+    }
+
+    /**
+     * Gets the length of a string in bytes even if mbstring function
+     * overloading is turned on
+     *
+     * @param string $string the string for which to get the length.
+     *
+     * @return integer the length of the string in bytes.
+     *
+     * @see Net_IDNA_php5::$_mb_string_overload
+     */
+    private static function _byteLength($string)
+    {
+        if (self::$_mb_string_overload) {
+            return mb_strlen($string, '8bit');
+        }
+        return strlen((binary)$string);
+    }
+
+    // }}}}
+}
+
+?>
index db4e2e9a706a62f8bf905e4b34167f37416492f3..cdcfc44232b8f659d325bbedabbc665ae2510e70 100644 (file)
@@ -34,38 +34,197 @@ if (!defined('STATUSNET') && !defined('LACONICA')) {
 require_once 'XMPPHP/XMPP.php';
 
 /**
- * checks whether a string is a syntactically valid Jabber ID (JID)
+ * Splits a Jabber ID (JID) into node, domain, and resource portions.
+ * 
+ * Based on validation routine submitted by:
+ * @copyright 2009 Patrick Georgi <patrick@georgi-clan.de>
+ * @license Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact. 
  *
  * @param string $jid string to check
  *
+ * @return array with "node", "domain", and "resource" indices
+ * @throws Exception if input is not valid
+ */
+
+function jabber_split_jid($jid)
+{
+    $chars = '';
+    /* the following definitions come from stringprep, Appendix C,
+       which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */
+    /* C1.1 ASCII space characters */
+    $chars .= "\x{20}";
+    /* C1.2 Non-ASCII space characters */
+    $chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}";
+    /* C2.1 ASCII control characters */
+    $chars .= "\x{00}-\x{1f}\x{7f}";
+    /* C2.2 Non-ASCII control characters */
+    $chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}";
+    /* C3 - Private Use */
+    $chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}";
+    /* C4 - Non-character code points */
+    $chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}";
+    /* C5 - Surrogate codes */
+    $chars .= "\x{d800}-\x{dfff}";
+    /* C6 - Inappropriate for plain text */
+    $chars .= "\x{fff9}-\x{fffd}";
+    /* C7 - Inappropriate for canonical representation */
+    $chars .= "\x{2ff0}-\x{2ffb}";
+    /* C8 - Change display properties or are deprecated */
+    $chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}";
+    /* C9 - Tagging characters */
+    $chars .= "\x{e0001}\x{e0020}-\x{e007f}";
+
+    /* Nodeprep forbids some more characters */
+    $nodeprepchars = $chars;
+    $nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}";
+
+    $parts = explode("/", $jid, 2);
+    if (count($parts) > 1) {
+        $resource = $parts[1];
+        if ($resource == '') {
+            // Warning: empty resource isn't legit.
+            // But if we're normalizing, we may as well take it...
+        }
+    } else {
+        $resource = null;
+    }
+
+    $node = explode("@", $parts[0]);
+    if ((count($node) > 2) || (count($node) == 0)) {
+        throw new Exception("Invalid JID: too many @s");
+    } else if (count($node) == 1) {
+        $domain = $node[0];
+        $node = null;
+    } else {
+        $domain = $node[1];
+        $node = $node[0];
+        if ($node == '') {
+            throw new Exception("Invalid JID: @ but no node");
+        }
+    }
+
+    // Length limits per http://xmpp.org/rfcs/rfc3920.html#addressing
+    if ($node !== null) {
+        if (strlen($node) > 1023) {
+            throw new Exception("Invalid JID: node too long.");
+        }
+        if (preg_match("/[".$nodeprepchars."]/u", $node)) {
+            throw new Exception("Invalid JID node '$node'");
+        }
+    }
+
+    if (strlen($domain) > 1023) {
+        throw new Exception("Invalid JID: domain too long.");
+    }
+    if (!common_valid_domain($domain)) {
+        throw new Exception("Invalid JID domain name '$domain'");
+    }
+
+    if ($resource !== null) {
+        if (strlen($resource) > 1023) {
+            throw new Exception("Invalid JID: resource too long.");
+        }
+        if (preg_match("/[".$chars."]/u", $resource)) {
+            throw new Exception("Invalid JID resource '$resource'");
+        }
+    }
+
+    return array('node' => is_null($node) ? null : mb_strtolower($node),
+                 'domain' => is_null($domain) ? null : mb_strtolower($domain),
+                 'resource' => $resource);
+}
+
+/**
+ * Checks whether a string is a syntactically valid Jabber ID (JID),
+ * either with or without a resource.
+ * 
+ * Note that a bare domain can be a valid JID.
+ * 
+ * @param string $jid string to check
+ * @param bool $check_domain whether we should validate that domain...
+ *
  * @return     boolean whether the string is a valid JID
  */
+function jabber_valid_full_jid($jid, $check_domain=false)
+{
+    try {
+        $parts = jabber_split_jid($jid);
+        if ($check_domain) {
+            if (!jabber_check_domain($parts['domain'])) {
+                return false;
+            }
+        }
+        return $parts['resource'] !== ''; // missing or present; empty ain't kosher
+    } catch (Exception $e) {
+        return false;
+    }
+}
 
-function jabber_valid_base_jid($jid)
+/**
+ * Checks whether a string is a syntactically valid base Jabber ID (JID).
+ * A base JID won't include a resource specifier on the end; since we
+ * take it off when reading input we can't really use them reliably
+ * to direct outgoing messages yet (sorry guys!)
+ * 
+ * Note that a bare domain can be a valid JID.
+ * 
+ * @param string $jid string to check
+ * @param bool $check_domain whether we should validate that domain...
+ *
+ * @return     boolean whether the string is a valid JID
+ */
+function jabber_valid_base_jid($jid, $check_domain=false)
 {
-    // Cheap but effective
-    return Validate::email($jid);
+    try {
+        $parts = jabber_split_jid($jid);
+        if ($check_domain) {
+            if (!jabber_check_domain($parts['domain'])) {
+                return false;
+            }
+        }
+        return ($parts['resource'] === null); // missing; empty ain't kosher
+    } catch (Exception $e) {
+        return false;
+    }
 }
 
 /**
- * normalizes a Jabber ID for comparison
+ * Normalizes a Jabber ID for comparison, dropping the resource component if any.
  *
  * @param string $jid JID to check
+ * @param bool $check_domain if true, reject if the domain isn't findable
  *
  * @return string an equivalent JID in normalized (lowercase) form
  */
 
 function jabber_normalize_jid($jid)
 {
-    if (preg_match("/(?:([^\@]+)\@)?([^\/]+)(?:\/(.*))?$/", $jid, $matches)) {
-        $node   = $matches[1];
-        $server = $matches[2];
-        return strtolower($node.'@'.$server);
-    } else {
+    try {
+        $parts = jabber_split_jid($jid);
+        if ($parts['node'] !== null) {
+            return $parts['node'] . '@' . $parts['domain'];
+        } else {
+            return $parts['domain'];
+        }
+    } catch (Exception $e) {
         return null;
     }
 }
 
+/**
+ * Check if this domain's got some legit DNS record
+ */
+function jabber_check_domain($domain)
+{
+    if (checkdnsrr("_xmpp-server._tcp." . $domain, "SRV")) {
+        return true;
+    }
+    if (checkdnsrr($domain, "ANY")) {
+        return true;
+    }
+    return false;
+}
+
 /**
  * the JID of the Jabber daemon for this StatusNet instance
  *
index 795997868305ff09b2ada3614cf7a9a8d1089cf3..f4ee26bbf0ecde3bd6692a2ea21f5bc1e11c0d23 100644 (file)
@@ -1397,6 +1397,55 @@ function common_valid_tag($tag)
     return false;
 }
 
+/**
+ * Determine if given domain or address literal is valid
+ * eg for use in JIDs and URLs. Does not check if the domain
+ * exists!
+ * 
+ * @param string $domain
+ * @return boolean valid or not
+ */
+function common_valid_domain($domain)
+{
+    $octet = "(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])";
+    $ipv4 = "(?:$octet(?:\.$octet){3})";
+    if (preg_match("/^$ipv4$/u", $domain)) return true;
+
+    $group = "(?:[0-9a-f]{1,4})";
+    $ipv6 = "(?:\[($group(?::$group){0,7})?(::)?($group(?::$group){0,7})?\])"; // http://tools.ietf.org/html/rfc3513#section-2.2
+
+    if (preg_match("/^$ipv6$/ui", $domain, $matches)) {
+        $before = explode(":", $matches[1]);
+        $zeroes = $matches[2];
+        $after = explode(":", $matches[3]);
+        if ($zeroes) {
+            $min = 0;
+            $max = 7;
+        } else {
+            $min = 1;
+            $max = 8;
+        }
+        $explicit = count($before) + count($after);
+        if ($explicit < $min || $explicit > $max) {
+            return false;
+        }
+        return true;
+    }
+
+    try {
+        require_once "Net/IDNA.php";
+        $idn = Net_IDNA::getInstance();
+        $domain = $idn->encode($domain);
+    } catch (Exception $e) {
+        return false;
+    }
+
+    $subdomain = "(?:[a-z0-9][a-z0-9-]*)"; // @fixme
+    $fqdn = "(?:$subdomain(?:\.$subdomain)*\.?)";
+
+    return preg_match("/^$fqdn$/ui", $domain);
+}
+
 /* Following functions are copied from MediaWiki GlobalFunctions.php
  * and written by Evan Prodromou. */
 
diff --git a/tests/JidValidateTest.php b/tests/JidValidateTest.php
new file mode 100644 (file)
index 0000000..9f59011
--- /dev/null
@@ -0,0 +1,146 @@
+<?php
+
+if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) {
+    print "This script must be run from the command line\n";
+    exit();
+}
+
+define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
+define('STATUSNET', true);
+define('LACONICA', true);
+
+mb_internal_encoding('UTF-8'); // @fixme this probably belongs in common.php?
+
+require_once INSTALLDIR . '/lib/common.php';
+require_once INSTALLDIR . '/lib/jabber.php';
+
+class JidValidateTest extends PHPUnit_Framework_TestCase
+{
+    /**
+     * @dataProvider validationCases
+     *
+     */
+    public function testValidate($jid, $validFull, $validBase)
+    {
+        $this->assertEquals($validFull, jabber_valid_full_jid($jid), "validating as full or base JID");
+
+        $this->assertEquals($validBase, jabber_valid_base_jid($jid), "validating as base JID only");
+    }
+
+    /**
+     * @dataProvider normalizationCases
+     *
+     */
+    public function testNormalize($jid, $expected)
+    {
+        $this->assertEquals($expected, jabber_normalize_jid($jid));
+    }
+
+    /**
+     * @dataProvider domainCheckCases()
+     */
+    public function testDomainCheck($domain, $expected, $note)
+    {
+        $this->assertEquals($expected, jabber_check_domain($domain), $note);
+    }
+
+    static public function validationCases()
+    {
+        $long1023 = "long1023" . str_repeat('x', 1023 - 8);
+        $long1024 = "long1024" . str_repeat('x', 1024 - 8);
+        return array(
+            // Our own test cases for standard things & those mentioned in bug reports
+            // (jid, valid_full, valid_base)
+            array('user@example.com', true, true),
+            array('user@example.com/resource', true, false),
+            array('user with spaces@example.com', false, false), // not kosher
+
+            array('user.@example.com', true, true), // "common in intranets"
+            array('example.com', true, true),
+            array('example.com/resource', true, false),
+            array('jabchat', true, true),
+            
+            array("$long1023@$long1023/$long1023", true, false), // max 1023 "bytes" per portion per spec. Do they really mean bytes though?
+            array("$long1024@$long1023/$long1023", false, false),
+            array("$long1023@$long1024/$long1023", false, false),
+            array("$long1023@$long1023/$long1024", false, false),
+
+            // Borrowed from test_jabber_jutil.c in libpurple
+            array("gmail.com", true, true),
+            array("gmail.com/Test", true, false),
+            array("gmail.com/Test@", true, false),
+            array("gmail.com/@", true, false),
+            array("gmail.com/Test@alkjaweflkj", true, false),
+            array("mark.doliner@gmail.com", true, true),
+            array("mark.doliner@gmail.com/Test12345", true, false),
+            array("mark.doliner@gmail.com/Test@12345", true, false),
+            array("mark.doliner@gmail.com/Te/st@12@//345", true, false),
+            array("わいど@conference.jabber.org", true, true),
+            array("まりるーむ@conference.jabber.org", true, true),
+            array("mark.doliner@gmail.com/まりるーむ", true, false),
+            array("mark.doliner@gmail/stuff.org", true, false),
+            array("stuart@nödåtXäYZ.se", true, true),
+            array("stuart@nödåtXäYZ.se/まりるーむ", true, false),
+            array("mark.doliner@わいど.org", true, true),
+            array("nick@まつ.おおかみ.net", true, true),
+            array("paul@10.0.42.230/s", true, false),
+            array("paul@[::1]", true, true), /* IPv6 */
+            array("paul@[2001:470:1f05:d58::2]", true, true),
+            array("paul@[2001:470:1f05:d58::2]/foo", true, false),
+            array("pa=ul@10.0.42.230", true, true),
+            array("pa,ul@10.0.42.230", true, true),
+
+            array("@gmail.com", false, false),
+            array("@@gmail.com", false, false),
+            array("mark.doliner@@gmail.com/Test12345", false, false),
+            array("mark@doliner@gmail.com/Test12345", false, false),
+            array("@gmail.com/Test@12345", false, false),
+            array("/Test@12345", false, false),
+            array("mark.doliner@", false, false),
+            array("mark.doliner/", false, false),
+            array("mark.doliner@gmail_stuff.org", false, false),
+            array("mark.doliner@gmail[stuff.org", false, false),
+            array("mark.doliner@gmail\\stuff.org", false, false),
+            array("paul@[::1]124", false, false),
+            array("paul@2[::1]124/as", false, false),
+            array("paul@まつ.おおかみ/\x01", false, false),
+
+            /*
+             * RFC 3454 Section 6 reads, in part,
+             * "If a string contains any RandALCat character, the
+             *  string MUST NOT contain any LCat character."
+             * The character is U+066D (ARABIC FIVE POINTED STAR).
+             */
+            // Leaving this one commented out for the moment
+            // as it shouldn't hurt anything for our purposes.
+            //array("foo@example.com/٭simplexe٭", false, false)
+        );
+    }
+    
+    static public function normalizationCases()
+    {
+        return array(
+            // Borrowed from test_jabber_jutil.c in libpurple
+            array('PaUL@DaRkRain42.org', 'paul@darkrain42.org'),
+            array('PaUL@DaRkRain42.org/', 'paul@darkrain42.org'),
+            array('PaUL@DaRkRain42.org/resource', 'paul@darkrain42.org'),
+
+            // Also adapted from libpurple tests...
+            array('Ф@darkrain42.org', 'ф@darkrain42.org'),
+            array('paul@Өarkrain.org', 'paul@өarkrain.org'),
+        );
+    }
+
+    static public function domainCheckCases()
+    {
+        return array(
+            array('gmail.com', true, 'known SRV record'),
+            array('jabber.org', true, 'known SRV record'),
+            array('status.net', true, 'known SRV record'),
+            array('status.leuksman.com', true, 'known no SRV record but valid domain'),
+        );
+    }
+
+
+}
+