]> git.mxchange.org Git - quix0rs-gnu-social.git/blob - extlib/Net/IDNA/php5.php
Ticket #1281: JID validation now more or less follows spec instead of calling e-mail...
[quix0rs-gnu-social.git] / extlib / Net / IDNA / php5.php
1 <?php
2
3 // {{{ license
4
5 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
6 //
7 // +----------------------------------------------------------------------+
8 // | This library is free software; you can redistribute it and/or modify |
9 // | it under the terms of the GNU Lesser General Public License as       |
10 // | published by the Free Software Foundation; either version 2.1 of the |
11 // | License, or (at your option) any later version.                      |
12 // |                                                                      |
13 // | This library is distributed in the hope that it will be useful, but  |
14 // | WITHOUT ANY WARRANTY; without even the implied warranty of           |
15 // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
16 // | Lesser General Public License for more details.                      |
17 // |                                                                      |
18 // | You should have received a copy of the GNU Lesser General Public     |
19 // | License along with this library; if not, write to the Free Software  |
20 // | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 |
21 // | USA.                                                                 |
22 // +----------------------------------------------------------------------+
23 //
24
25 // }}}
26
27
28 /**
29  * Encode/decode Internationalized Domain Names.
30  *
31  * The class allows to convert internationalized domain names
32  * (see RFC 3490 for details) as they can be used with various registries worldwide
33  * to be translated between their original (localized) form and their encoded form
34  * as it will be used in the DNS (Domain Name System).
35  *
36  * The class provides two public methods, encode() and decode(), which do exactly
37  * what you would expect them to do. You are allowed to use complete domain names,
38  * simple strings and complete email addresses as well. That means, that you might
39  * use any of the following notations:
40  *
41  * - www.n�rgler.com
42  * - xn--nrgler-wxa
43  * - xn--brse-5qa.xn--knrz-1ra.info
44  *
45  * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
46  * array. Unicode output is available in the same formats.
47  * You can select your preferred format via {@link set_paramter()}.
48  *
49  * ACE input and output is always expected to be ASCII.
50  *
51  * @author  Markus Nix <mnix@docuverse.de>
52  * @author  Matthias Sommerfeld <mso@phlylabs.de>
53  * @author  Stefan Neufeind <pear.neufeind@speedpartner.de>
54  * @package Net
55  * @version $Id: php5.php 284682 2009-07-24 04:27:35Z clockwerx $
56  */
57
58 class Net_IDNA_php5
59 {
60     // {{{ npdata
61     /**
62      * These Unicode codepoints are
63      * mapped to nothing, See RFC3454 for details
64      *
65      * @static
66      * @var array
67      * @access private
68      */
69     private static $_np_map_nothing = array(
70         0xAD,
71         0x34F,
72         0x1806,
73         0x180B,
74         0x180C,
75         0x180D,
76         0x200B,
77         0x200C,
78         0x200D,
79         0x2060,
80         0xFE00,
81         0xFE01,
82         0xFE02,
83         0xFE03,
84         0xFE04,
85         0xFE05,
86         0xFE06,
87         0xFE07,
88         0xFE08,
89         0xFE09,
90         0xFE0A,
91         0xFE0B,
92         0xFE0C,
93         0xFE0D,
94         0xFE0E,
95         0xFE0F,
96         0xFEFF
97     );
98
99     /**
100      * Prohibited codepints
101      *
102      * @static
103      * @var array
104      * @access private
105      */
106     private static $_general_prohibited = array(
107         0,
108         1,
109         2,
110         3,
111         4,
112         5,
113         6,
114         7,
115         8,
116         9,
117         0xA,
118         0xB,
119         0xC,
120         0xD,
121         0xE,
122         0xF,
123         0x10,
124         0x11,
125         0x12,
126         0x13,
127         0x14,
128         0x15,
129         0x16,
130         0x17,
131         0x18,
132         0x19,
133         0x1A,
134         0x1B,
135         0x1C,
136         0x1D,
137         0x1E,
138         0x1F,
139         0x20,
140         0x21,
141         0x22,
142         0x23,
143         0x24,
144         0x25,
145         0x26,
146         0x27,
147         0x28,
148         0x29,
149         0x2A,
150         0x2B,
151         0x2C,
152         0x2F,
153         0x3B,
154         0x3C,
155         0x3D,
156         0x3E,
157         0x3F,
158         0x40,
159         0x5B,
160         0x5C,
161         0x5D,
162         0x5E,
163         0x5F,
164         0x60,
165         0x7B,
166         0x7C,
167         0x7D,
168         0x7E,
169         0x7F,
170         0x3002
171     );
172
173     /**
174      * Codepints prohibited by Nameprep
175      * @static
176      * @var array
177      * @access private
178      */
179     private static $_np_prohibit = array(
180         0xA0,
181         0x1680,
182         0x2000,
183         0x2001,
184         0x2002,
185         0x2003,
186         0x2004,
187         0x2005,
188         0x2006,
189         0x2007,
190         0x2008,
191         0x2009,
192         0x200A,
193         0x200B,
194         0x202F,
195         0x205F,
196         0x3000,
197         0x6DD,
198         0x70F,
199         0x180E,
200         0x200C,
201         0x200D,
202         0x2028,
203         0x2029,
204         0xFEFF,
205         0xFFF9,
206         0xFFFA,
207         0xFFFB,
208         0xFFFC,
209         0xFFFE,
210         0xFFFF,
211         0x1FFFE,
212         0x1FFFF,
213         0x2FFFE,
214         0x2FFFF,
215         0x3FFFE,
216         0x3FFFF,
217         0x4FFFE,
218         0x4FFFF,
219         0x5FFFE,
220         0x5FFFF,
221         0x6FFFE,
222         0x6FFFF,
223         0x7FFFE,
224         0x7FFFF,
225         0x8FFFE,
226         0x8FFFF,
227         0x9FFFE,
228         0x9FFFF,
229         0xAFFFE,
230         0xAFFFF,
231         0xBFFFE,
232         0xBFFFF,
233         0xCFFFE,
234         0xCFFFF,
235         0xDFFFE,
236         0xDFFFF,
237         0xEFFFE,
238         0xEFFFF,
239         0xFFFFE,
240         0xFFFFF,
241         0x10FFFE,
242         0x10FFFF,
243         0xFFF9,
244         0xFFFA,
245         0xFFFB,
246         0xFFFC,
247         0xFFFD,
248         0x340,
249         0x341,
250         0x200E,
251         0x200F,
252         0x202A,
253         0x202B,
254         0x202C,
255         0x202D,
256         0x202E,
257         0x206A,
258         0x206B,
259         0x206C,
260         0x206D,
261         0x206E,
262         0x206F,
263         0xE0001
264     );
265
266     /**
267      * Codepoint ranges prohibited by nameprep
268      *
269      * @static
270      * @var array
271      * @access private
272      */
273     private static $_np_prohibit_ranges = array(
274         array(0x80,     0x9F    ),
275         array(0x2060,   0x206F  ),
276         array(0x1D173,  0x1D17A ),
277         array(0xE000,   0xF8FF  ),
278         array(0xF0000,  0xFFFFD ),
279         array(0x100000, 0x10FFFD),
280         array(0xFDD0,   0xFDEF  ),
281         array(0xD800,   0xDFFF  ),
282         array(0x2FF0,   0x2FFB  ),
283         array(0xE0020,  0xE007F )
284     );
285
286     /**
287      * Replacement mappings (casemapping, replacement sequences, ...)
288      *
289      * @static
290      * @var array
291      * @access private
292      */
293     private static $_np_replacemaps = array(
294         0x41    => array(0x61),
295         0x42    => array(0x62),
296         0x43    => array(0x63),
297         0x44    => array(0x64),
298         0x45    => array(0x65),
299         0x46    => array(0x66),
300         0x47    => array(0x67),
301         0x48    => array(0x68),
302         0x49    => array(0x69),
303         0x4A    => array(0x6A),
304         0x4B    => array(0x6B),
305         0x4C    => array(0x6C),
306         0x4D    => array(0x6D),
307         0x4E    => array(0x6E),
308         0x4F    => array(0x6F),
309         0x50    => array(0x70),
310         0x51    => array(0x71),
311         0x52    => array(0x72),
312         0x53    => array(0x73),
313         0x54    => array(0x74),
314         0x55    => array(0x75),
315         0x56    => array(0x76),
316         0x57    => array(0x77),
317         0x58    => array(0x78),
318         0x59    => array(0x79),
319         0x5A    => array(0x7A),
320         0xB5    => array(0x3BC),
321         0xC0    => array(0xE0),
322         0xC1    => array(0xE1),
323         0xC2    => array(0xE2),
324         0xC3    => array(0xE3),
325         0xC4    => array(0xE4),
326         0xC5    => array(0xE5),
327         0xC6    => array(0xE6),
328         0xC7    => array(0xE7),
329         0xC8    => array(0xE8),
330         0xC9    => array(0xE9),
331         0xCA    => array(0xEA),
332         0xCB    => array(0xEB),
333         0xCC    => array(0xEC),
334         0xCD    => array(0xED),
335         0xCE    => array(0xEE),
336         0xCF    => array(0xEF),
337         0xD0    => array(0xF0),
338         0xD1    => array(0xF1),
339         0xD2    => array(0xF2),
340         0xD3    => array(0xF3),
341         0xD4    => array(0xF4),
342         0xD5    => array(0xF5),
343         0xD6    => array(0xF6),
344         0xD8    => array(0xF8),
345         0xD9    => array(0xF9),
346         0xDA    => array(0xFA),
347         0xDB    => array(0xFB),
348         0xDC    => array(0xFC),
349         0xDD    => array(0xFD),
350         0xDE    => array(0xFE),
351         0xDF    => array(0x73, 0x73),
352         0x100   => array(0x101),
353         0x102   => array(0x103),
354         0x104   => array(0x105),
355         0x106   => array(0x107),
356         0x108   => array(0x109),
357         0x10A   => array(0x10B),
358         0x10C   => array(0x10D),
359         0x10E   => array(0x10F),
360         0x110   => array(0x111),
361         0x112   => array(0x113),
362         0x114   => array(0x115),
363         0x116   => array(0x117),
364         0x118   => array(0x119),
365         0x11A   => array(0x11B),
366         0x11C   => array(0x11D),
367         0x11E   => array(0x11F),
368         0x120   => array(0x121),
369         0x122   => array(0x123),
370         0x124   => array(0x125),
371         0x126   => array(0x127),
372         0x128   => array(0x129),
373         0x12A   => array(0x12B),
374         0x12C   => array(0x12D),
375         0x12E   => array(0x12F),
376         0x130   => array(0x69, 0x307),
377         0x132   => array(0x133),
378         0x134   => array(0x135),
379         0x136   => array(0x137),
380         0x139   => array(0x13A),
381         0x13B   => array(0x13C),
382         0x13D   => array(0x13E),
383         0x13F   => array(0x140),
384         0x141   => array(0x142),
385         0x143   => array(0x144),
386         0x145   => array(0x146),
387         0x147   => array(0x148),
388         0x149   => array(0x2BC, 0x6E),
389         0x14A   => array(0x14B),
390         0x14C   => array(0x14D),
391         0x14E   => array(0x14F),
392         0x150   => array(0x151),
393         0x152   => array(0x153),
394         0x154   => array(0x155),
395         0x156   => array(0x157),
396         0x158   => array(0x159),
397         0x15A   => array(0x15B),
398         0x15C   => array(0x15D),
399         0x15E   => array(0x15F),
400         0x160   => array(0x161),
401         0x162   => array(0x163),
402         0x164   => array(0x165),
403         0x166   => array(0x167),
404         0x168   => array(0x169),
405         0x16A   => array(0x16B),
406         0x16C   => array(0x16D),
407         0x16E   => array(0x16F),
408         0x170   => array(0x171),
409         0x172   => array(0x173),
410         0x174   => array(0x175),
411         0x176   => array(0x177),
412         0x178   => array(0xFF),
413         0x179   => array(0x17A),
414         0x17B   => array(0x17C),
415         0x17D   => array(0x17E),
416         0x17F   => array(0x73),
417         0x181   => array(0x253),
418         0x182   => array(0x183),
419         0x184   => array(0x185),
420         0x186   => array(0x254),
421         0x187   => array(0x188),
422         0x189   => array(0x256),
423         0x18A   => array(0x257),
424         0x18B   => array(0x18C),
425         0x18E   => array(0x1DD),
426         0x18F   => array(0x259),
427         0x190   => array(0x25B),
428         0x191   => array(0x192),
429         0x193   => array(0x260),
430         0x194   => array(0x263),
431         0x196   => array(0x269),
432         0x197   => array(0x268),
433         0x198   => array(0x199),
434         0x19C   => array(0x26F),
435         0x19D   => array(0x272),
436         0x19F   => array(0x275),
437         0x1A0   => array(0x1A1),
438         0x1A2   => array(0x1A3),
439         0x1A4   => array(0x1A5),
440         0x1A6   => array(0x280),
441         0x1A7   => array(0x1A8),
442         0x1A9   => array(0x283),
443         0x1AC   => array(0x1AD),
444         0x1AE   => array(0x288),
445         0x1AF   => array(0x1B0),
446         0x1B1   => array(0x28A),
447         0x1B2   => array(0x28B),
448         0x1B3   => array(0x1B4),
449         0x1B5   => array(0x1B6),
450         0x1B7   => array(0x292),
451         0x1B8   => array(0x1B9),
452         0x1BC   => array(0x1BD),
453         0x1C4   => array(0x1C6),
454         0x1C5   => array(0x1C6),
455         0x1C7   => array(0x1C9),
456         0x1C8   => array(0x1C9),
457         0x1CA   => array(0x1CC),
458         0x1CB   => array(0x1CC),
459         0x1CD   => array(0x1CE),
460         0x1CF   => array(0x1D0),
461         0x1D1   => array(0x1D2),
462         0x1D3   => array(0x1D4),
463         0x1D5   => array(0x1D6),
464         0x1D7   => array(0x1D8),
465         0x1D9   => array(0x1DA),
466         0x1DB   => array(0x1DC),
467         0x1DE   => array(0x1DF),
468         0x1E0   => array(0x1E1),
469         0x1E2   => array(0x1E3),
470         0x1E4   => array(0x1E5),
471         0x1E6   => array(0x1E7),
472         0x1E8   => array(0x1E9),
473         0x1EA   => array(0x1EB),
474         0x1EC   => array(0x1ED),
475         0x1EE   => array(0x1EF),
476         0x1F0   => array(0x6A, 0x30C),
477         0x1F1   => array(0x1F3),
478         0x1F2   => array(0x1F3),
479         0x1F4   => array(0x1F5),
480         0x1F6   => array(0x195),
481         0x1F7   => array(0x1BF),
482         0x1F8   => array(0x1F9),
483         0x1FA   => array(0x1FB),
484         0x1FC   => array(0x1FD),
485         0x1FE   => array(0x1FF),
486         0x200   => array(0x201),
487         0x202   => array(0x203),
488         0x204   => array(0x205),
489         0x206   => array(0x207),
490         0x208   => array(0x209),
491         0x20A   => array(0x20B),
492         0x20C   => array(0x20D),
493         0x20E   => array(0x20F),
494         0x210   => array(0x211),
495         0x212   => array(0x213),
496         0x214   => array(0x215),
497         0x216   => array(0x217),
498         0x218   => array(0x219),
499         0x21A   => array(0x21B),
500         0x21C   => array(0x21D),
501         0x21E   => array(0x21F),
502         0x220   => array(0x19E),
503         0x222   => array(0x223),
504         0x224   => array(0x225),
505         0x226   => array(0x227),
506         0x228   => array(0x229),
507         0x22A   => array(0x22B),
508         0x22C   => array(0x22D),
509         0x22E   => array(0x22F),
510         0x230   => array(0x231),
511         0x232   => array(0x233),
512         0x345   => array(0x3B9),
513         0x37A   => array(0x20, 0x3B9),
514         0x386   => array(0x3AC),
515         0x388   => array(0x3AD),
516         0x389   => array(0x3AE),
517         0x38A   => array(0x3AF),
518         0x38C   => array(0x3CC),
519         0x38E   => array(0x3CD),
520         0x38F   => array(0x3CE),
521         0x390   => array(0x3B9, 0x308, 0x301),
522         0x391   => array(0x3B1),
523         0x392   => array(0x3B2),
524         0x393   => array(0x3B3),
525         0x394   => array(0x3B4),
526         0x395   => array(0x3B5),
527         0x396   => array(0x3B6),
528         0x397   => array(0x3B7),
529         0x398   => array(0x3B8),
530         0x399   => array(0x3B9),
531         0x39A   => array(0x3BA),
532         0x39B   => array(0x3BB),
533         0x39C   => array(0x3BC),
534         0x39D   => array(0x3BD),
535         0x39E   => array(0x3BE),
536         0x39F   => array(0x3BF),
537         0x3A0   => array(0x3C0),
538         0x3A1   => array(0x3C1),
539         0x3A3   => array(0x3C3),
540         0x3A4   => array(0x3C4),
541         0x3A5   => array(0x3C5),
542         0x3A6   => array(0x3C6),
543         0x3A7   => array(0x3C7),
544         0x3A8   => array(0x3C8),
545         0x3A9   => array(0x3C9),
546         0x3AA   => array(0x3CA),
547         0x3AB   => array(0x3CB),
548         0x3B0   => array(0x3C5, 0x308, 0x301),
549         0x3C2   => array(0x3C3),
550         0x3D0   => array(0x3B2),
551         0x3D1   => array(0x3B8),
552         0x3D2   => array(0x3C5),
553         0x3D3   => array(0x3CD),
554         0x3D4   => array(0x3CB),
555         0x3D5   => array(0x3C6),
556         0x3D6   => array(0x3C0),
557         0x3D8   => array(0x3D9),
558         0x3DA   => array(0x3DB),
559         0x3DC   => array(0x3DD),
560         0x3DE   => array(0x3DF),
561         0x3E0   => array(0x3E1),
562         0x3E2   => array(0x3E3),
563         0x3E4   => array(0x3E5),
564         0x3E6   => array(0x3E7),
565         0x3E8   => array(0x3E9),
566         0x3EA   => array(0x3EB),
567         0x3EC   => array(0x3ED),
568         0x3EE   => array(0x3EF),
569         0x3F0   => array(0x3BA),
570         0x3F1   => array(0x3C1),
571         0x3F2   => array(0x3C3),
572         0x3F4   => array(0x3B8),
573         0x3F5   => array(0x3B5),
574         0x400   => array(0x450),
575         0x401   => array(0x451),
576         0x402   => array(0x452),
577         0x403   => array(0x453),
578         0x404   => array(0x454),
579         0x405   => array(0x455),
580         0x406   => array(0x456),
581         0x407   => array(0x457),
582         0x408   => array(0x458),
583         0x409   => array(0x459),
584         0x40A   => array(0x45A),
585         0x40B   => array(0x45B),
586         0x40C   => array(0x45C),
587         0x40D   => array(0x45D),
588         0x40E   => array(0x45E),
589         0x40F   => array(0x45F),
590         0x410   => array(0x430),
591         0x411   => array(0x431),
592         0x412   => array(0x432),
593         0x413   => array(0x433),
594         0x414   => array(0x434),
595         0x415   => array(0x435),
596         0x416   => array(0x436),
597         0x417   => array(0x437),
598         0x418   => array(0x438),
599         0x419   => array(0x439),
600         0x41A   => array(0x43A),
601         0x41B   => array(0x43B),
602         0x41C   => array(0x43C),
603         0x41D   => array(0x43D),
604         0x41E   => array(0x43E),
605         0x41F   => array(0x43F),
606         0x420   => array(0x440),
607         0x421   => array(0x441),
608         0x422   => array(0x442),
609         0x423   => array(0x443),
610         0x424   => array(0x444),
611         0x425   => array(0x445),
612         0x426   => array(0x446),
613         0x427   => array(0x447),
614         0x428   => array(0x448),
615         0x429   => array(0x449),
616         0x42A   => array(0x44A),
617         0x42B   => array(0x44B),
618         0x42C   => array(0x44C),
619         0x42D   => array(0x44D),
620         0x42E   => array(0x44E),
621         0x42F   => array(0x44F),
622         0x460   => array(0x461),
623         0x462   => array(0x463),
624         0x464   => array(0x465),
625         0x466   => array(0x467),
626         0x468   => array(0x469),
627         0x46A   => array(0x46B),
628         0x46C   => array(0x46D),
629         0x46E   => array(0x46F),
630         0x470   => array(0x471),
631         0x472   => array(0x473),
632         0x474   => array(0x475),
633         0x476   => array(0x477),
634         0x478   => array(0x479),
635         0x47A   => array(0x47B),
636         0x47C   => array(0x47D),
637         0x47E   => array(0x47F),
638         0x480   => array(0x481),
639         0x48A   => array(0x48B),
640         0x48C   => array(0x48D),
641         0x48E   => array(0x48F),
642         0x490   => array(0x491),
643         0x492   => array(0x493),
644         0x494   => array(0x495),
645         0x496   => array(0x497),
646         0x498   => array(0x499),
647         0x49A   => array(0x49B),
648         0x49C   => array(0x49D),
649         0x49E   => array(0x49F),
650         0x4A0   => array(0x4A1),
651         0x4A2   => array(0x4A3),
652         0x4A4   => array(0x4A5),
653         0x4A6   => array(0x4A7),
654         0x4A8   => array(0x4A9),
655         0x4AA   => array(0x4AB),
656         0x4AC   => array(0x4AD),
657         0x4AE   => array(0x4AF),
658         0x4B0   => array(0x4B1),
659         0x4B2   => array(0x4B3),
660         0x4B4   => array(0x4B5),
661         0x4B6   => array(0x4B7),
662         0x4B8   => array(0x4B9),
663         0x4BA   => array(0x4BB),
664         0x4BC   => array(0x4BD),
665         0x4BE   => array(0x4BF),
666         0x4C1   => array(0x4C2),
667         0x4C3   => array(0x4C4),
668         0x4C5   => array(0x4C6),
669         0x4C7   => array(0x4C8),
670         0x4C9   => array(0x4CA),
671         0x4CB   => array(0x4CC),
672         0x4CD   => array(0x4CE),
673         0x4D0   => array(0x4D1),
674         0x4D2   => array(0x4D3),
675         0x4D4   => array(0x4D5),
676         0x4D6   => array(0x4D7),
677         0x4D8   => array(0x4D9),
678         0x4DA   => array(0x4DB),
679         0x4DC   => array(0x4DD),
680         0x4DE   => array(0x4DF),
681         0x4E0   => array(0x4E1),
682         0x4E2   => array(0x4E3),
683         0x4E4   => array(0x4E5),
684         0x4E6   => array(0x4E7),
685         0x4E8   => array(0x4E9),
686         0x4EA   => array(0x4EB),
687         0x4EC   => array(0x4ED),
688         0x4EE   => array(0x4EF),
689         0x4F0   => array(0x4F1),
690         0x4F2   => array(0x4F3),
691         0x4F4   => array(0x4F5),
692         0x4F8   => array(0x4F9),
693         0x500   => array(0x501),
694         0x502   => array(0x503),
695         0x504   => array(0x505),
696         0x506   => array(0x507),
697         0x508   => array(0x509),
698         0x50A   => array(0x50B),
699         0x50C   => array(0x50D),
700         0x50E   => array(0x50F),
701         0x531   => array(0x561),
702         0x532   => array(0x562),
703         0x533   => array(0x563),
704         0x534   => array(0x564),
705         0x535   => array(0x565),
706         0x536   => array(0x566),
707         0x537   => array(0x567),
708         0x538   => array(0x568),
709         0x539   => array(0x569),
710         0x53A   => array(0x56A),
711         0x53B   => array(0x56B),
712         0x53C   => array(0x56C),
713         0x53D   => array(0x56D),
714         0x53E   => array(0x56E),
715         0x53F   => array(0x56F),
716         0x540   => array(0x570),
717         0x541   => array(0x571),
718         0x542   => array(0x572),
719         0x543   => array(0x573),
720         0x544   => array(0x574),
721         0x545   => array(0x575),
722         0x546   => array(0x576),
723         0x547   => array(0x577),
724         0x548   => array(0x578),
725         0x549   => array(0x579),
726         0x54A   => array(0x57A),
727         0x54B   => array(0x57B),
728         0x54C   => array(0x57C),
729         0x54D   => array(0x57D),
730         0x54E   => array(0x57E),
731         0x54F   => array(0x57F),
732         0x550   => array(0x580),
733         0x551   => array(0x581),
734         0x552   => array(0x582),
735         0x553   => array(0x583),
736         0x554   => array(0x584),
737         0x555   => array(0x585),
738         0x556   => array(0x586),
739         0x587   => array(0x565, 0x582),
740         0x1E00  => array(0x1E01),
741         0x1E02  => array(0x1E03),
742         0x1E04  => array(0x1E05),
743         0x1E06  => array(0x1E07),
744         0x1E08  => array(0x1E09),
745         0x1E0A  => array(0x1E0B),
746         0x1E0C  => array(0x1E0D),
747         0x1E0E  => array(0x1E0F),
748         0x1E10  => array(0x1E11),
749         0x1E12  => array(0x1E13),
750         0x1E14  => array(0x1E15),
751         0x1E16  => array(0x1E17),
752         0x1E18  => array(0x1E19),
753         0x1E1A  => array(0x1E1B),
754         0x1E1C  => array(0x1E1D),
755         0x1E1E  => array(0x1E1F),
756         0x1E20  => array(0x1E21),
757         0x1E22  => array(0x1E23),
758         0x1E24  => array(0x1E25),
759         0x1E26  => array(0x1E27),
760         0x1E28  => array(0x1E29),
761         0x1E2A  => array(0x1E2B),
762         0x1E2C  => array(0x1E2D),
763         0x1E2E  => array(0x1E2F),
764         0x1E30  => array(0x1E31),
765         0x1E32  => array(0x1E33),
766         0x1E34  => array(0x1E35),
767         0x1E36  => array(0x1E37),
768         0x1E38  => array(0x1E39),
769         0x1E3A  => array(0x1E3B),
770         0x1E3C  => array(0x1E3D),
771         0x1E3E  => array(0x1E3F),
772         0x1E40  => array(0x1E41),
773         0x1E42  => array(0x1E43),
774         0x1E44  => array(0x1E45),
775         0x1E46  => array(0x1E47),
776         0x1E48  => array(0x1E49),
777         0x1E4A  => array(0x1E4B),
778         0x1E4C  => array(0x1E4D),
779         0x1E4E  => array(0x1E4F),
780         0x1E50  => array(0x1E51),
781         0x1E52  => array(0x1E53),
782         0x1E54  => array(0x1E55),
783         0x1E56  => array(0x1E57),
784         0x1E58  => array(0x1E59),
785         0x1E5A  => array(0x1E5B),
786         0x1E5C  => array(0x1E5D),
787         0x1E5E  => array(0x1E5F),
788         0x1E60  => array(0x1E61),
789         0x1E62  => array(0x1E63),
790         0x1E64  => array(0x1E65),
791         0x1E66  => array(0x1E67),
792         0x1E68  => array(0x1E69),
793         0x1E6A  => array(0x1E6B),
794         0x1E6C  => array(0x1E6D),
795         0x1E6E  => array(0x1E6F),
796         0x1E70  => array(0x1E71),
797         0x1E72  => array(0x1E73),
798         0x1E74  => array(0x1E75),
799         0x1E76  => array(0x1E77),
800         0x1E78  => array(0x1E79),
801         0x1E7A  => array(0x1E7B),
802         0x1E7C  => array(0x1E7D),
803         0x1E7E  => array(0x1E7F),
804         0x1E80  => array(0x1E81),
805         0x1E82  => array(0x1E83),
806         0x1E84  => array(0x1E85),
807         0x1E86  => array(0x1E87),
808         0x1E88  => array(0x1E89),
809         0x1E8A  => array(0x1E8B),
810         0x1E8C  => array(0x1E8D),
811         0x1E8E  => array(0x1E8F),
812         0x1E90  => array(0x1E91),
813         0x1E92  => array(0x1E93),
814         0x1E94  => array(0x1E95),
815         0x1E96  => array(0x68, 0x331),
816         0x1E97  => array(0x74, 0x308),
817         0x1E98  => array(0x77, 0x30A),
818         0x1E99  => array(0x79, 0x30A),
819         0x1E9A  => array(0x61, 0x2BE),
820         0x1E9B  => array(0x1E61),
821         0x1EA0  => array(0x1EA1),
822         0x1EA2  => array(0x1EA3),
823         0x1EA4  => array(0x1EA5),
824         0x1EA6  => array(0x1EA7),
825         0x1EA8  => array(0x1EA9),
826         0x1EAA  => array(0x1EAB),
827         0x1EAC  => array(0x1EAD),
828         0x1EAE  => array(0x1EAF),
829         0x1EB0  => array(0x1EB1),
830         0x1EB2  => array(0x1EB3),
831         0x1EB4  => array(0x1EB5),
832         0x1EB6  => array(0x1EB7),
833         0x1EB8  => array(0x1EB9),
834         0x1EBA  => array(0x1EBB),
835         0x1EBC  => array(0x1EBD),
836         0x1EBE  => array(0x1EBF),
837         0x1EC0  => array(0x1EC1),
838         0x1EC2  => array(0x1EC3),
839         0x1EC4  => array(0x1EC5),
840         0x1EC6  => array(0x1EC7),
841         0x1EC8  => array(0x1EC9),
842         0x1ECA  => array(0x1ECB),
843         0x1ECC  => array(0x1ECD),
844         0x1ECE  => array(0x1ECF),
845         0x1ED0  => array(0x1ED1),
846         0x1ED2  => array(0x1ED3),
847         0x1ED4  => array(0x1ED5),
848         0x1ED6  => array(0x1ED7),
849         0x1ED8  => array(0x1ED9),
850         0x1EDA  => array(0x1EDB),
851         0x1EDC  => array(0x1EDD),
852         0x1EDE  => array(0x1EDF),
853         0x1EE0  => array(0x1EE1),
854         0x1EE2  => array(0x1EE3),
855         0x1EE4  => array(0x1EE5),
856         0x1EE6  => array(0x1EE7),
857         0x1EE8  => array(0x1EE9),
858         0x1EEA  => array(0x1EEB),
859         0x1EEC  => array(0x1EED),
860         0x1EEE  => array(0x1EEF),
861         0x1EF0  => array(0x1EF1),
862         0x1EF2  => array(0x1EF3),
863         0x1EF4  => array(0x1EF5),
864         0x1EF6  => array(0x1EF7),
865         0x1EF8  => array(0x1EF9),
866         0x1F08  => array(0x1F00),
867         0x1F09  => array(0x1F01),
868         0x1F0A  => array(0x1F02),
869         0x1F0B  => array(0x1F03),
870         0x1F0C  => array(0x1F04),
871         0x1F0D  => array(0x1F05),
872         0x1F0E  => array(0x1F06),
873         0x1F0F  => array(0x1F07),
874         0x1F18  => array(0x1F10),
875         0x1F19  => array(0x1F11),
876         0x1F1A  => array(0x1F12),
877         0x1F1B  => array(0x1F13),
878         0x1F1C  => array(0x1F14),
879         0x1F1D  => array(0x1F15),
880         0x1F28  => array(0x1F20),
881         0x1F29  => array(0x1F21),
882         0x1F2A  => array(0x1F22),
883         0x1F2B  => array(0x1F23),
884         0x1F2C  => array(0x1F24),
885         0x1F2D  => array(0x1F25),
886         0x1F2E  => array(0x1F26),
887         0x1F2F  => array(0x1F27),
888         0x1F38  => array(0x1F30),
889         0x1F39  => array(0x1F31),
890         0x1F3A  => array(0x1F32),
891         0x1F3B  => array(0x1F33),
892         0x1F3C  => array(0x1F34),
893         0x1F3D  => array(0x1F35),
894         0x1F3E  => array(0x1F36),
895         0x1F3F  => array(0x1F37),
896         0x1F48  => array(0x1F40),
897         0x1F49  => array(0x1F41),
898         0x1F4A  => array(0x1F42),
899         0x1F4B  => array(0x1F43),
900         0x1F4C  => array(0x1F44),
901         0x1F4D  => array(0x1F45),
902         0x1F50  => array(0x3C5, 0x313),
903         0x1F52  => array(0x3C5, 0x313, 0x300),
904         0x1F54  => array(0x3C5, 0x313, 0x301),
905         0x1F56  => array(0x3C5, 0x313, 0x342),
906         0x1F59  => array(0x1F51),
907         0x1F5B  => array(0x1F53),
908         0x1F5D  => array(0x1F55),
909         0x1F5F  => array(0x1F57),
910         0x1F68  => array(0x1F60),
911         0x1F69  => array(0x1F61),
912         0x1F6A  => array(0x1F62),
913         0x1F6B  => array(0x1F63),
914         0x1F6C  => array(0x1F64),
915         0x1F6D  => array(0x1F65),
916         0x1F6E  => array(0x1F66),
917         0x1F6F  => array(0x1F67),
918         0x1F80  => array(0x1F00, 0x3B9),
919         0x1F81  => array(0x1F01, 0x3B9),
920         0x1F82  => array(0x1F02, 0x3B9),
921         0x1F83  => array(0x1F03, 0x3B9),
922         0x1F84  => array(0x1F04, 0x3B9),
923         0x1F85  => array(0x1F05, 0x3B9),
924         0x1F86  => array(0x1F06, 0x3B9),
925         0x1F87  => array(0x1F07, 0x3B9),
926         0x1F88  => array(0x1F00, 0x3B9),
927         0x1F89  => array(0x1F01, 0x3B9),
928         0x1F8A  => array(0x1F02, 0x3B9),
929         0x1F8B  => array(0x1F03, 0x3B9),
930         0x1F8C  => array(0x1F04, 0x3B9),
931         0x1F8D  => array(0x1F05, 0x3B9),
932         0x1F8E  => array(0x1F06, 0x3B9),
933         0x1F8F  => array(0x1F07, 0x3B9),
934         0x1F90  => array(0x1F20, 0x3B9),
935         0x1F91  => array(0x1F21, 0x3B9),
936         0x1F92  => array(0x1F22, 0x3B9),
937         0x1F93  => array(0x1F23, 0x3B9),
938         0x1F94  => array(0x1F24, 0x3B9),
939         0x1F95  => array(0x1F25, 0x3B9),
940         0x1F96  => array(0x1F26, 0x3B9),
941         0x1F97  => array(0x1F27, 0x3B9),
942         0x1F98  => array(0x1F20, 0x3B9),
943         0x1F99  => array(0x1F21, 0x3B9),
944         0x1F9A  => array(0x1F22, 0x3B9),
945         0x1F9B  => array(0x1F23, 0x3B9),
946         0x1F9C  => array(0x1F24, 0x3B9),
947         0x1F9D  => array(0x1F25, 0x3B9),
948         0x1F9E  => array(0x1F26, 0x3B9),
949         0x1F9F  => array(0x1F27, 0x3B9),
950         0x1FA0  => array(0x1F60, 0x3B9),
951         0x1FA1  => array(0x1F61, 0x3B9),
952         0x1FA2  => array(0x1F62, 0x3B9),
953         0x1FA3  => array(0x1F63, 0x3B9),
954         0x1FA4  => array(0x1F64, 0x3B9),
955         0x1FA5  => array(0x1F65, 0x3B9),
956         0x1FA6  => array(0x1F66, 0x3B9),
957         0x1FA7  => array(0x1F67, 0x3B9),
958         0x1FA8  => array(0x1F60, 0x3B9),
959         0x1FA9  => array(0x1F61, 0x3B9),
960         0x1FAA  => array(0x1F62, 0x3B9),
961         0x1FAB  => array(0x1F63, 0x3B9),
962         0x1FAC  => array(0x1F64, 0x3B9),
963         0x1FAD  => array(0x1F65, 0x3B9),
964         0x1FAE  => array(0x1F66, 0x3B9),
965         0x1FAF  => array(0x1F67, 0x3B9),
966         0x1FB2  => array(0x1F70, 0x3B9),
967         0x1FB3  => array(0x3B1, 0x3B9),
968         0x1FB4  => array(0x3AC, 0x3B9),
969         0x1FB6  => array(0x3B1, 0x342),
970         0x1FB7  => array(0x3B1, 0x342, 0x3B9),
971         0x1FB8  => array(0x1FB0),
972         0x1FB9  => array(0x1FB1),
973         0x1FBA  => array(0x1F70),
974         0x1FBB  => array(0x1F71),
975         0x1FBC  => array(0x3B1, 0x3B9),
976         0x1FBE  => array(0x3B9),
977         0x1FC2  => array(0x1F74, 0x3B9),
978         0x1FC3  => array(0x3B7, 0x3B9),
979         0x1FC4  => array(0x3AE, 0x3B9),
980         0x1FC6  => array(0x3B7, 0x342),
981         0x1FC7  => array(0x3B7, 0x342, 0x3B9),
982         0x1FC8  => array(0x1F72),
983         0x1FC9  => array(0x1F73),
984         0x1FCA  => array(0x1F74),
985         0x1FCB  => array(0x1F75),
986         0x1FCC  => array(0x3B7, 0x3B9),
987         0x1FD2  => array(0x3B9, 0x308, 0x300),
988         0x1FD3  => array(0x3B9, 0x308, 0x301),
989         0x1FD6  => array(0x3B9, 0x342),
990         0x1FD7  => array(0x3B9, 0x308, 0x342),
991         0x1FD8  => array(0x1FD0),
992         0x1FD9  => array(0x1FD1),
993         0x1FDA  => array(0x1F76),
994         0x1FDB  => array(0x1F77),
995         0x1FE2  => array(0x3C5, 0x308, 0x300),
996         0x1FE3  => array(0x3C5, 0x308, 0x301),
997         0x1FE4  => array(0x3C1, 0x313),
998         0x1FE6  => array(0x3C5, 0x342),
999         0x1FE7  => array(0x3C5, 0x308, 0x342),
1000         0x1FE8  => array(0x1FE0),
1001         0x1FE9  => array(0x1FE1),
1002         0x1FEA  => array(0x1F7A),
1003         0x1FEB  => array(0x1F7B),
1004         0x1FEC  => array(0x1FE5),
1005         0x1FF2  => array(0x1F7C, 0x3B9),
1006         0x1FF3  => array(0x3C9, 0x3B9),
1007         0x1FF4  => array(0x3CE, 0x3B9),
1008         0x1FF6  => array(0x3C9, 0x342),
1009         0x1FF7  => array(0x3C9, 0x342, 0x3B9),
1010         0x1FF8  => array(0x1F78),
1011         0x1FF9  => array(0x1F79),
1012         0x1FFA  => array(0x1F7C),
1013         0x1FFB  => array(0x1F7D),
1014         0x1FFC  => array(0x3C9, 0x3B9),
1015         0x20A8  => array(0x72, 0x73),
1016         0x2102  => array(0x63),
1017         0x2103  => array(0xB0, 0x63),
1018         0x2107  => array(0x25B),
1019         0x2109  => array(0xB0, 0x66),
1020         0x210B  => array(0x68),
1021         0x210C  => array(0x68),
1022         0x210D  => array(0x68),
1023         0x2110  => array(0x69),
1024         0x2111  => array(0x69),
1025         0x2112  => array(0x6C),
1026         0x2115  => array(0x6E),
1027         0x2116  => array(0x6E, 0x6F),
1028         0x2119  => array(0x70),
1029         0x211A  => array(0x71),
1030         0x211B  => array(0x72),
1031         0x211C  => array(0x72),
1032         0x211D  => array(0x72),
1033         0x2120  => array(0x73, 0x6D),
1034         0x2121  => array(0x74, 0x65, 0x6C),
1035         0x2122  => array(0x74, 0x6D),
1036         0x2124  => array(0x7A),
1037         0x2126  => array(0x3C9),
1038         0x2128  => array(0x7A),
1039         0x212A  => array(0x6B),
1040         0x212B  => array(0xE5),
1041         0x212C  => array(0x62),
1042         0x212D  => array(0x63),
1043         0x2130  => array(0x65),
1044         0x2131  => array(0x66),
1045         0x2133  => array(0x6D),
1046         0x213E  => array(0x3B3),
1047         0x213F  => array(0x3C0),
1048         0x2145  => array(0x64),
1049         0x2160  => array(0x2170),
1050         0x2161  => array(0x2171),
1051         0x2162  => array(0x2172),
1052         0x2163  => array(0x2173),
1053         0x2164  => array(0x2174),
1054         0x2165  => array(0x2175),
1055         0x2166  => array(0x2176),
1056         0x2167  => array(0x2177),
1057         0x2168  => array(0x2178),
1058         0x2169  => array(0x2179),
1059         0x216A  => array(0x217A),
1060         0x216B  => array(0x217B),
1061         0x216C  => array(0x217C),
1062         0x216D  => array(0x217D),
1063         0x216E  => array(0x217E),
1064         0x216F  => array(0x217F),
1065         0x24B6  => array(0x24D0),
1066         0x24B7  => array(0x24D1),
1067         0x24B8  => array(0x24D2),
1068         0x24B9  => array(0x24D3),
1069         0x24BA  => array(0x24D4),
1070         0x24BB  => array(0x24D5),
1071         0x24BC  => array(0x24D6),
1072         0x24BD  => array(0x24D7),
1073         0x24BE  => array(0x24D8),
1074         0x24BF  => array(0x24D9),
1075         0x24C0  => array(0x24DA),
1076         0x24C1  => array(0x24DB),
1077         0x24C2  => array(0x24DC),
1078         0x24C3  => array(0x24DD),
1079         0x24C4  => array(0x24DE),
1080         0x24C5  => array(0x24DF),
1081         0x24C6  => array(0x24E0),
1082         0x24C7  => array(0x24E1),
1083         0x24C8  => array(0x24E2),
1084         0x24C9  => array(0x24E3),
1085         0x24CA  => array(0x24E4),
1086         0x24CB  => array(0x24E5),
1087         0x24CC  => array(0x24E6),
1088         0x24CD  => array(0x24E7),
1089         0x24CE  => array(0x24E8),
1090         0x24CF  => array(0x24E9),
1091         0x3371  => array(0x68, 0x70, 0x61),
1092         0x3373  => array(0x61, 0x75),
1093         0x3375  => array(0x6F, 0x76),
1094         0x3380  => array(0x70, 0x61),
1095         0x3381  => array(0x6E, 0x61),
1096         0x3382  => array(0x3BC, 0x61),
1097         0x3383  => array(0x6D, 0x61),
1098         0x3384  => array(0x6B, 0x61),
1099         0x3385  => array(0x6B, 0x62),
1100         0x3386  => array(0x6D, 0x62),
1101         0x3387  => array(0x67, 0x62),
1102         0x338A  => array(0x70, 0x66),
1103         0x338B  => array(0x6E, 0x66),
1104         0x338C  => array(0x3BC, 0x66),
1105         0x3390  => array(0x68, 0x7A),
1106         0x3391  => array(0x6B, 0x68, 0x7A),
1107         0x3392  => array(0x6D, 0x68, 0x7A),
1108         0x3393  => array(0x67, 0x68, 0x7A),
1109         0x3394  => array(0x74, 0x68, 0x7A),
1110         0x33A9  => array(0x70, 0x61),
1111         0x33AA  => array(0x6B, 0x70, 0x61),
1112         0x33AB  => array(0x6D, 0x70, 0x61),
1113         0x33AC  => array(0x67, 0x70, 0x61),
1114         0x33B4  => array(0x70, 0x76),
1115         0x33B5  => array(0x6E, 0x76),
1116         0x33B6  => array(0x3BC, 0x76),
1117         0x33B7  => array(0x6D, 0x76),
1118         0x33B8  => array(0x6B, 0x76),
1119         0x33B9  => array(0x6D, 0x76),
1120         0x33BA  => array(0x70, 0x77),
1121         0x33BB  => array(0x6E, 0x77),
1122         0x33BC  => array(0x3BC, 0x77),
1123         0x33BD  => array(0x6D, 0x77),
1124         0x33BE  => array(0x6B, 0x77),
1125         0x33BF  => array(0x6D, 0x77),
1126         0x33C0  => array(0x6B, 0x3C9),
1127         0x33C1  => array(0x6D, 0x3C9), /*
1128         0x33C2  => array(0x61, 0x2E, 0x6D, 0x2E), */
1129         0x33C3  => array(0x62, 0x71),
1130         0x33C6  => array(0x63, 0x2215, 0x6B, 0x67),
1131         0x33C7  => array(0x63, 0x6F, 0x2E),
1132         0x33C8  => array(0x64, 0x62),
1133         0x33C9  => array(0x67, 0x79),
1134         0x33CB  => array(0x68, 0x70),
1135         0x33CD  => array(0x6B, 0x6B),
1136         0x33CE  => array(0x6B, 0x6D),
1137         0x33D7  => array(0x70, 0x68),
1138         0x33D9  => array(0x70, 0x70, 0x6D),
1139         0x33DA  => array(0x70, 0x72),
1140         0x33DC  => array(0x73, 0x76),
1141         0x33DD  => array(0x77, 0x62),
1142         0xFB00  => array(0x66, 0x66),
1143         0xFB01  => array(0x66, 0x69),
1144         0xFB02  => array(0x66, 0x6C),
1145         0xFB03  => array(0x66, 0x66, 0x69),
1146         0xFB04  => array(0x66, 0x66, 0x6C),
1147         0xFB05  => array(0x73, 0x74),
1148         0xFB06  => array(0x73, 0x74),
1149         0xFB13  => array(0x574, 0x576),
1150         0xFB14  => array(0x574, 0x565),
1151         0xFB15  => array(0x574, 0x56B),
1152         0xFB16  => array(0x57E, 0x576),
1153         0xFB17  => array(0x574, 0x56D),
1154         0xFF21  => array(0xFF41),
1155         0xFF22  => array(0xFF42),
1156         0xFF23  => array(0xFF43),
1157         0xFF24  => array(0xFF44),
1158         0xFF25  => array(0xFF45),
1159         0xFF26  => array(0xFF46),
1160         0xFF27  => array(0xFF47),
1161         0xFF28  => array(0xFF48),
1162         0xFF29  => array(0xFF49),
1163         0xFF2A  => array(0xFF4A),
1164         0xFF2B  => array(0xFF4B),
1165         0xFF2C  => array(0xFF4C),
1166         0xFF2D  => array(0xFF4D),
1167         0xFF2E  => array(0xFF4E),
1168         0xFF2F  => array(0xFF4F),
1169         0xFF30  => array(0xFF50),
1170         0xFF31  => array(0xFF51),
1171         0xFF32  => array(0xFF52),
1172         0xFF33  => array(0xFF53),
1173         0xFF34  => array(0xFF54),
1174         0xFF35  => array(0xFF55),
1175         0xFF36  => array(0xFF56),
1176         0xFF37  => array(0xFF57),
1177         0xFF38  => array(0xFF58),
1178         0xFF39  => array(0xFF59),
1179         0xFF3A  => array(0xFF5A),
1180         0x10400 => array(0x10428),
1181         0x10401 => array(0x10429),
1182         0x10402 => array(0x1042A),
1183         0x10403 => array(0x1042B),
1184         0x10404 => array(0x1042C),
1185         0x10405 => array(0x1042D),
1186         0x10406 => array(0x1042E),
1187         0x10407 => array(0x1042F),
1188         0x10408 => array(0x10430),
1189         0x10409 => array(0x10431),
1190         0x1040A => array(0x10432),
1191         0x1040B => array(0x10433),
1192         0x1040C => array(0x10434),
1193         0x1040D => array(0x10435),
1194         0x1040E => array(0x10436),
1195         0x1040F => array(0x10437),
1196         0x10410 => array(0x10438),
1197         0x10411 => array(0x10439),
1198         0x10412 => array(0x1043A),
1199         0x10413 => array(0x1043B),
1200         0x10414 => array(0x1043C),
1201         0x10415 => array(0x1043D),
1202         0x10416 => array(0x1043E),
1203         0x10417 => array(0x1043F),
1204         0x10418 => array(0x10440),
1205         0x10419 => array(0x10441),
1206         0x1041A => array(0x10442),
1207         0x1041B => array(0x10443),
1208         0x1041C => array(0x10444),
1209         0x1041D => array(0x10445),
1210         0x1041E => array(0x10446),
1211         0x1041F => array(0x10447),
1212         0x10420 => array(0x10448),
1213         0x10421 => array(0x10449),
1214         0x10422 => array(0x1044A),
1215         0x10423 => array(0x1044B),
1216         0x10424 => array(0x1044C),
1217         0x10425 => array(0x1044D),
1218         0x1D400 => array(0x61),
1219         0x1D401 => array(0x62),
1220         0x1D402 => array(0x63),
1221         0x1D403 => array(0x64),
1222         0x1D404 => array(0x65),
1223         0x1D405 => array(0x66),
1224         0x1D406 => array(0x67),
1225         0x1D407 => array(0x68),
1226         0x1D408 => array(0x69),
1227         0x1D409 => array(0x6A),
1228         0x1D40A => array(0x6B),
1229         0x1D40B => array(0x6C),
1230         0x1D40C => array(0x6D),
1231         0x1D40D => array(0x6E),
1232         0x1D40E => array(0x6F),
1233         0x1D40F => array(0x70),
1234         0x1D410 => array(0x71),
1235         0x1D411 => array(0x72),
1236         0x1D412 => array(0x73),
1237         0x1D413 => array(0x74),
1238         0x1D414 => array(0x75),
1239         0x1D415 => array(0x76),
1240         0x1D416 => array(0x77),
1241         0x1D417 => array(0x78),
1242         0x1D418 => array(0x79),
1243         0x1D419 => array(0x7A),
1244         0x1D434 => array(0x61),
1245         0x1D435 => array(0x62),
1246         0x1D436 => array(0x63),
1247         0x1D437 => array(0x64),
1248         0x1D438 => array(0x65),
1249         0x1D439 => array(0x66),
1250         0x1D43A => array(0x67),
1251         0x1D43B => array(0x68),
1252         0x1D43C => array(0x69),
1253         0x1D43D => array(0x6A),
1254         0x1D43E => array(0x6B),
1255         0x1D43F => array(0x6C),
1256         0x1D440 => array(0x6D),
1257         0x1D441 => array(0x6E),
1258         0x1D442 => array(0x6F),
1259         0x1D443 => array(0x70),
1260         0x1D444 => array(0x71),
1261         0x1D445 => array(0x72),
1262         0x1D446 => array(0x73),
1263         0x1D447 => array(0x74),
1264         0x1D448 => array(0x75),
1265         0x1D449 => array(0x76),
1266         0x1D44A => array(0x77),
1267         0x1D44B => array(0x78),
1268         0x1D44C => array(0x79),
1269         0x1D44D => array(0x7A),
1270         0x1D468 => array(0x61),
1271         0x1D469 => array(0x62),
1272         0x1D46A => array(0x63),
1273         0x1D46B => array(0x64),
1274         0x1D46C => array(0x65),
1275         0x1D46D => array(0x66),
1276         0x1D46E => array(0x67),
1277         0x1D46F => array(0x68),
1278         0x1D470 => array(0x69),
1279         0x1D471 => array(0x6A),
1280         0x1D472 => array(0x6B),
1281         0x1D473 => array(0x6C),
1282         0x1D474 => array(0x6D),
1283         0x1D475 => array(0x6E),
1284         0x1D476 => array(0x6F),
1285         0x1D477 => array(0x70),
1286         0x1D478 => array(0x71),
1287         0x1D479 => array(0x72),
1288         0x1D47A => array(0x73),
1289         0x1D47B => array(0x74),
1290         0x1D47C => array(0x75),
1291         0x1D47D => array(0x76),
1292         0x1D47E => array(0x77),
1293         0x1D47F => array(0x78),
1294         0x1D480 => array(0x79),
1295         0x1D481 => array(0x7A),
1296         0x1D49C => array(0x61),
1297         0x1D49E => array(0x63),
1298         0x1D49F => array(0x64),
1299         0x1D4A2 => array(0x67),
1300         0x1D4A5 => array(0x6A),
1301         0x1D4A6 => array(0x6B),
1302         0x1D4A9 => array(0x6E),
1303         0x1D4AA => array(0x6F),
1304         0x1D4AB => array(0x70),
1305         0x1D4AC => array(0x71),
1306         0x1D4AE => array(0x73),
1307         0x1D4AF => array(0x74),
1308         0x1D4B0 => array(0x75),
1309         0x1D4B1 => array(0x76),
1310         0x1D4B2 => array(0x77),
1311         0x1D4B3 => array(0x78),
1312         0x1D4B4 => array(0x79),
1313         0x1D4B5 => array(0x7A),
1314         0x1D4D0 => array(0x61),
1315         0x1D4D1 => array(0x62),
1316         0x1D4D2 => array(0x63),
1317         0x1D4D3 => array(0x64),
1318         0x1D4D4 => array(0x65),
1319         0x1D4D5 => array(0x66),
1320         0x1D4D6 => array(0x67),
1321         0x1D4D7 => array(0x68),
1322         0x1D4D8 => array(0x69),
1323         0x1D4D9 => array(0x6A),
1324         0x1D4DA => array(0x6B),
1325         0x1D4DB => array(0x6C),
1326         0x1D4DC => array(0x6D),
1327         0x1D4DD => array(0x6E),
1328         0x1D4DE => array(0x6F),
1329         0x1D4DF => array(0x70),
1330         0x1D4E0 => array(0x71),
1331         0x1D4E1 => array(0x72),
1332         0x1D4E2 => array(0x73),
1333         0x1D4E3 => array(0x74),
1334         0x1D4E4 => array(0x75),
1335         0x1D4E5 => array(0x76),
1336         0x1D4E6 => array(0x77),
1337         0x1D4E7 => array(0x78),
1338         0x1D4E8 => array(0x79),
1339         0x1D4E9 => array(0x7A),
1340         0x1D504 => array(0x61),
1341         0x1D505 => array(0x62),
1342         0x1D507 => array(0x64),
1343         0x1D508 => array(0x65),
1344         0x1D509 => array(0x66),
1345         0x1D50A => array(0x67),
1346         0x1D50D => array(0x6A),
1347         0x1D50E => array(0x6B),
1348         0x1D50F => array(0x6C),
1349         0x1D510 => array(0x6D),
1350         0x1D511 => array(0x6E),
1351         0x1D512 => array(0x6F),
1352         0x1D513 => array(0x70),
1353         0x1D514 => array(0x71),
1354         0x1D516 => array(0x73),
1355         0x1D517 => array(0x74),
1356         0x1D518 => array(0x75),
1357         0x1D519 => array(0x76),
1358         0x1D51A => array(0x77),
1359         0x1D51B => array(0x78),
1360         0x1D51C => array(0x79),
1361         0x1D538 => array(0x61),
1362         0x1D539 => array(0x62),
1363         0x1D53B => array(0x64),
1364         0x1D53C => array(0x65),
1365         0x1D53D => array(0x66),
1366         0x1D53E => array(0x67),
1367         0x1D540 => array(0x69),
1368         0x1D541 => array(0x6A),
1369         0x1D542 => array(0x6B),
1370         0x1D543 => array(0x6C),
1371         0x1D544 => array(0x6D),
1372         0x1D546 => array(0x6F),
1373         0x1D54A => array(0x73),
1374         0x1D54B => array(0x74),
1375         0x1D54C => array(0x75),
1376         0x1D54D => array(0x76),
1377         0x1D54E => array(0x77),
1378         0x1D54F => array(0x78),
1379         0x1D550 => array(0x79),
1380         0x1D56C => array(0x61),
1381         0x1D56D => array(0x62),
1382         0x1D56E => array(0x63),
1383         0x1D56F => array(0x64),
1384         0x1D570 => array(0x65),
1385         0x1D571 => array(0x66),
1386         0x1D572 => array(0x67),
1387         0x1D573 => array(0x68),
1388         0x1D574 => array(0x69),
1389         0x1D575 => array(0x6A),
1390         0x1D576 => array(0x6B),
1391         0x1D577 => array(0x6C),
1392         0x1D578 => array(0x6D),
1393         0x1D579 => array(0x6E),
1394         0x1D57A => array(0x6F),
1395         0x1D57B => array(0x70),
1396         0x1D57C => array(0x71),
1397         0x1D57D => array(0x72),
1398         0x1D57E => array(0x73),
1399         0x1D57F => array(0x74),
1400         0x1D580 => array(0x75),
1401         0x1D581 => array(0x76),
1402         0x1D582 => array(0x77),
1403         0x1D583 => array(0x78),
1404         0x1D584 => array(0x79),
1405         0x1D585 => array(0x7A),
1406         0x1D5A0 => array(0x61),
1407         0x1D5A1 => array(0x62),
1408         0x1D5A2 => array(0x63),
1409         0x1D5A3 => array(0x64),
1410         0x1D5A4 => array(0x65),
1411         0x1D5A5 => array(0x66),
1412         0x1D5A6 => array(0x67),
1413         0x1D5A7 => array(0x68),
1414         0x1D5A8 => array(0x69),
1415         0x1D5A9 => array(0x6A),
1416         0x1D5AA => array(0x6B),
1417         0x1D5AB => array(0x6C),
1418         0x1D5AC => array(0x6D),
1419         0x1D5AD => array(0x6E),
1420         0x1D5AE => array(0x6F),
1421         0x1D5AF => array(0x70),
1422         0x1D5B0 => array(0x71),
1423         0x1D5B1 => array(0x72),
1424         0x1D5B2 => array(0x73),
1425         0x1D5B3 => array(0x74),
1426         0x1D5B4 => array(0x75),
1427         0x1D5B5 => array(0x76),
1428         0x1D5B6 => array(0x77),
1429         0x1D5B7 => array(0x78),
1430         0x1D5B8 => array(0x79),
1431         0x1D5B9 => array(0x7A),
1432         0x1D5D4 => array(0x61),
1433         0x1D5D5 => array(0x62),
1434         0x1D5D6 => array(0x63),
1435         0x1D5D7 => array(0x64),
1436         0x1D5D8 => array(0x65),
1437         0x1D5D9 => array(0x66),
1438         0x1D5DA => array(0x67),
1439         0x1D5DB => array(0x68),
1440         0x1D5DC => array(0x69),
1441         0x1D5DD => array(0x6A),
1442         0x1D5DE => array(0x6B),
1443         0x1D5DF => array(0x6C),
1444         0x1D5E0 => array(0x6D),
1445         0x1D5E1 => array(0x6E),
1446         0x1D5E2 => array(0x6F),
1447         0x1D5E3 => array(0x70),
1448         0x1D5E4 => array(0x71),
1449         0x1D5E5 => array(0x72),
1450         0x1D5E6 => array(0x73),
1451         0x1D5E7 => array(0x74),
1452         0x1D5E8 => array(0x75),
1453         0x1D5E9 => array(0x76),
1454         0x1D5EA => array(0x77),
1455         0x1D5EB => array(0x78),
1456         0x1D5EC => array(0x79),
1457         0x1D5ED => array(0x7A),
1458         0x1D608 => array(0x61),
1459         0x1D609 => array(0x62),
1460         0x1D60A => array(0x63),
1461         0x1D60B => array(0x64),
1462         0x1D60C => array(0x65),
1463         0x1D60D => array(0x66),
1464         0x1D60E => array(0x67),
1465         0x1D60F => array(0x68),
1466         0x1D610 => array(0x69),
1467         0x1D611 => array(0x6A),
1468         0x1D612 => array(0x6B),
1469         0x1D613 => array(0x6C),
1470         0x1D614 => array(0x6D),
1471         0x1D615 => array(0x6E),
1472         0x1D616 => array(0x6F),
1473         0x1D617 => array(0x70),
1474         0x1D618 => array(0x71),
1475         0x1D619 => array(0x72),
1476         0x1D61A => array(0x73),
1477         0x1D61B => array(0x74),
1478         0x1D61C => array(0x75),
1479         0x1D61D => array(0x76),
1480         0x1D61E => array(0x77),
1481         0x1D61F => array(0x78),
1482         0x1D620 => array(0x79),
1483         0x1D621 => array(0x7A),
1484         0x1D63C => array(0x61),
1485         0x1D63D => array(0x62),
1486         0x1D63E => array(0x63),
1487         0x1D63F => array(0x64),
1488         0x1D640 => array(0x65),
1489         0x1D641 => array(0x66),
1490         0x1D642 => array(0x67),
1491         0x1D643 => array(0x68),
1492         0x1D644 => array(0x69),
1493         0x1D645 => array(0x6A),
1494         0x1D646 => array(0x6B),
1495         0x1D647 => array(0x6C),
1496         0x1D648 => array(0x6D),
1497         0x1D649 => array(0x6E),
1498         0x1D64A => array(0x6F),
1499         0x1D64B => array(0x70),
1500         0x1D64C => array(0x71),
1501         0x1D64D => array(0x72),
1502         0x1D64E => array(0x73),
1503         0x1D64F => array(0x74),
1504         0x1D650 => array(0x75),
1505         0x1D651 => array(0x76),
1506         0x1D652 => array(0x77),
1507         0x1D653 => array(0x78),
1508         0x1D654 => array(0x79),
1509         0x1D655 => array(0x7A),
1510         0x1D670 => array(0x61),
1511         0x1D671 => array(0x62),
1512         0x1D672 => array(0x63),
1513         0x1D673 => array(0x64),
1514         0x1D674 => array(0x65),
1515         0x1D675 => array(0x66),
1516         0x1D676 => array(0x67),
1517         0x1D677 => array(0x68),
1518         0x1D678 => array(0x69),
1519         0x1D679 => array(0x6A),
1520         0x1D67A => array(0x6B),
1521         0x1D67B => array(0x6C),
1522         0x1D67C => array(0x6D),
1523         0x1D67D => array(0x6E),
1524         0x1D67E => array(0x6F),
1525         0x1D67F => array(0x70),
1526         0x1D680 => array(0x71),
1527         0x1D681 => array(0x72),
1528         0x1D682 => array(0x73),
1529         0x1D683 => array(0x74),
1530         0x1D684 => array(0x75),
1531         0x1D685 => array(0x76),
1532         0x1D686 => array(0x77),
1533         0x1D687 => array(0x78),
1534         0x1D688 => array(0x79),
1535         0x1D689 => array(0x7A),
1536         0x1D6A8 => array(0x3B1),
1537         0x1D6A9 => array(0x3B2),
1538         0x1D6AA => array(0x3B3),
1539         0x1D6AB => array(0x3B4),
1540         0x1D6AC => array(0x3B5),
1541         0x1D6AD => array(0x3B6),
1542         0x1D6AE => array(0x3B7),
1543         0x1D6AF => array(0x3B8),
1544         0x1D6B0 => array(0x3B9),
1545         0x1D6B1 => array(0x3BA),
1546         0x1D6B2 => array(0x3BB),
1547         0x1D6B3 => array(0x3BC),
1548         0x1D6B4 => array(0x3BD),
1549         0x1D6B5 => array(0x3BE),
1550         0x1D6B6 => array(0x3BF),
1551         0x1D6B7 => array(0x3C0),
1552         0x1D6B8 => array(0x3C1),
1553         0x1D6B9 => array(0x3B8),
1554         0x1D6BA => array(0x3C3),
1555         0x1D6BB => array(0x3C4),
1556         0x1D6BC => array(0x3C5),
1557         0x1D6BD => array(0x3C6),
1558         0x1D6BE => array(0x3C7),
1559         0x1D6BF => array(0x3C8),
1560         0x1D6C0 => array(0x3C9),
1561         0x1D6D3 => array(0x3C3),
1562         0x1D6E2 => array(0x3B1),
1563         0x1D6E3 => array(0x3B2),
1564         0x1D6E4 => array(0x3B3),
1565         0x1D6E5 => array(0x3B4),
1566         0x1D6E6 => array(0x3B5),
1567         0x1D6E7 => array(0x3B6),
1568         0x1D6E8 => array(0x3B7),
1569         0x1D6E9 => array(0x3B8),
1570         0x1D6EA => array(0x3B9),
1571         0x1D6EB => array(0x3BA),
1572         0x1D6EC => array(0x3BB),
1573         0x1D6ED => array(0x3BC),
1574         0x1D6EE => array(0x3BD),
1575         0x1D6EF => array(0x3BE),
1576         0x1D6F0 => array(0x3BF),
1577         0x1D6F1 => array(0x3C0),
1578         0x1D6F2 => array(0x3C1),
1579         0x1D6F3 => array(0x3B8),
1580         0x1D6F4 => array(0x3C3),
1581         0x1D6F5 => array(0x3C4),
1582         0x1D6F6 => array(0x3C5),
1583         0x1D6F7 => array(0x3C6),
1584         0x1D6F8 => array(0x3C7),
1585         0x1D6F9 => array(0x3C8),
1586         0x1D6FA => array(0x3C9),
1587         0x1D70D => array(0x3C3),
1588         0x1D71C => array(0x3B1),
1589         0x1D71D => array(0x3B2),
1590         0x1D71E => array(0x3B3),
1591         0x1D71F => array(0x3B4),
1592         0x1D720 => array(0x3B5),
1593         0x1D721 => array(0x3B6),
1594         0x1D722 => array(0x3B7),
1595         0x1D723 => array(0x3B8),
1596         0x1D724 => array(0x3B9),
1597         0x1D725 => array(0x3BA),
1598         0x1D726 => array(0x3BB),
1599         0x1D727 => array(0x3BC),
1600         0x1D728 => array(0x3BD),
1601         0x1D729 => array(0x3BE),
1602         0x1D72A => array(0x3BF),
1603         0x1D72B => array(0x3C0),
1604         0x1D72C => array(0x3C1),
1605         0x1D72D => array(0x3B8),
1606         0x1D72E => array(0x3C3),
1607         0x1D72F => array(0x3C4),
1608         0x1D730 => array(0x3C5),
1609         0x1D731 => array(0x3C6),
1610         0x1D732 => array(0x3C7),
1611         0x1D733 => array(0x3C8),
1612         0x1D734 => array(0x3C9),
1613         0x1D747 => array(0x3C3),
1614         0x1D756 => array(0x3B1),
1615         0x1D757 => array(0x3B2),
1616         0x1D758 => array(0x3B3),
1617         0x1D759 => array(0x3B4),
1618         0x1D75A => array(0x3B5),
1619         0x1D75B => array(0x3B6),
1620         0x1D75C => array(0x3B7),
1621         0x1D75D => array(0x3B8),
1622         0x1D75E => array(0x3B9),
1623         0x1D75F => array(0x3BA),
1624         0x1D760 => array(0x3BB),
1625         0x1D761 => array(0x3BC),
1626         0x1D762 => array(0x3BD),
1627         0x1D763 => array(0x3BE),
1628         0x1D764 => array(0x3BF),
1629         0x1D765 => array(0x3C0),
1630         0x1D766 => array(0x3C1),
1631         0x1D767 => array(0x3B8),
1632         0x1D768 => array(0x3C3),
1633         0x1D769 => array(0x3C4),
1634         0x1D76A => array(0x3C5),
1635         0x1D76B => array(0x3C6),
1636         0x1D76C => array(0x3C7),
1637         0x1D76D => array(0x3C8),
1638         0x1D76E => array(0x3C9),
1639         0x1D781 => array(0x3C3),
1640         0x1D790 => array(0x3B1),
1641         0x1D791 => array(0x3B2),
1642         0x1D792 => array(0x3B3),
1643         0x1D793 => array(0x3B4),
1644         0x1D794 => array(0x3B5),
1645         0x1D795 => array(0x3B6),
1646         0x1D796 => array(0x3B7),
1647         0x1D797 => array(0x3B8),
1648         0x1D798 => array(0x3B9),
1649         0x1D799 => array(0x3BA),
1650         0x1D79A => array(0x3BB),
1651         0x1D79B => array(0x3BC),
1652         0x1D79C => array(0x3BD),
1653         0x1D79D => array(0x3BE),
1654         0x1D79E => array(0x3BF),
1655         0x1D79F => array(0x3C0),
1656         0x1D7A0 => array(0x3C1),
1657         0x1D7A1 => array(0x3B8),
1658         0x1D7A2 => array(0x3C3),
1659         0x1D7A3 => array(0x3C4),
1660         0x1D7A4 => array(0x3C5),
1661         0x1D7A5 => array(0x3C6),
1662         0x1D7A6 => array(0x3C7),
1663         0x1D7A7 => array(0x3C8),
1664         0x1D7A8 => array(0x3C9),
1665         0x1D7BB => array(0x3C3),
1666         0x3F9   => array(0x3C3),
1667         0x1D2C  => array(0x61),
1668         0x1D2D  => array(0xE6),
1669         0x1D2E  => array(0x62),
1670         0x1D30  => array(0x64),
1671         0x1D31  => array(0x65),
1672         0x1D32  => array(0x1DD),
1673         0x1D33  => array(0x67),
1674         0x1D34  => array(0x68),
1675         0x1D35  => array(0x69),
1676         0x1D36  => array(0x6A),
1677         0x1D37  => array(0x6B),
1678         0x1D38  => array(0x6C),
1679         0x1D39  => array(0x6D),
1680         0x1D3A  => array(0x6E),
1681         0x1D3C  => array(0x6F),
1682         0x1D3D  => array(0x223),
1683         0x1D3E  => array(0x70),
1684         0x1D3F  => array(0x72),
1685         0x1D40  => array(0x74),
1686         0x1D41  => array(0x75),
1687         0x1D42  => array(0x77),
1688         0x213B  => array(0x66, 0x61, 0x78),
1689         0x3250  => array(0x70, 0x74, 0x65),
1690         0x32CC  => array(0x68, 0x67),
1691         0x32CE  => array(0x65, 0x76),
1692         0x32CF  => array(0x6C, 0x74, 0x64),
1693         0x337A  => array(0x69, 0x75),
1694         0x33DE  => array(0x76, 0x2215, 0x6D),
1695         0x33DF  => array(0x61, 0x2215, 0x6D)
1696     );
1697
1698     /**
1699      * Normalization Combining Classes; Code Points not listed
1700      * got Combining Class 0.
1701      *
1702      * @static
1703      * @var array
1704      * @access private
1705      */
1706     private static $_np_norm_combcls = array(
1707         0x334   => 1,
1708         0x335   => 1,
1709         0x336   => 1,
1710         0x337   => 1,
1711         0x338   => 1,
1712         0x93C   => 7,
1713         0x9BC   => 7,
1714         0xA3C   => 7,
1715         0xABC   => 7,
1716         0xB3C   => 7,
1717         0xCBC   => 7,
1718         0x1037  => 7,
1719         0x3099  => 8,
1720         0x309A  => 8,
1721         0x94D   => 9,
1722         0x9CD   => 9,
1723         0xA4D   => 9,
1724         0xACD   => 9,
1725         0xB4D   => 9,
1726         0xBCD   => 9,
1727         0xC4D   => 9,
1728         0xCCD   => 9,
1729         0xD4D   => 9,
1730         0xDCA   => 9,
1731         0xE3A   => 9,
1732         0xF84   => 9,
1733         0x1039  => 9,
1734         0x1714  => 9,
1735         0x1734  => 9,
1736         0x17D2  => 9,
1737         0x5B0   => 10,
1738         0x5B1   => 11,
1739         0x5B2   => 12,
1740         0x5B3   => 13,
1741         0x5B4   => 14,
1742         0x5B5   => 15,
1743         0x5B6   => 16,
1744         0x5B7   => 17,
1745         0x5B8   => 18,
1746         0x5B9   => 19,
1747         0x5BB   => 20,
1748         0x5Bc   => 21,
1749         0x5BD   => 22,
1750         0x5BF   => 23,
1751         0x5C1   => 24,
1752         0x5C2   => 25,
1753         0xFB1E  => 26,
1754         0x64B   => 27,
1755         0x64C   => 28,
1756         0x64D   => 29,
1757         0x64E   => 30,
1758         0x64F   => 31,
1759         0x650   => 32,
1760         0x651   => 33,
1761         0x652   => 34,
1762         0x670   => 35,
1763         0x711   => 36,
1764         0xC55   => 84,
1765         0xC56   => 91,
1766         0xE38   => 103,
1767         0xE39   => 103,
1768         0xE48   => 107,
1769         0xE49   => 107,
1770         0xE4A   => 107,
1771         0xE4B   => 107,
1772         0xEB8   => 118,
1773         0xEB9   => 118,
1774         0xEC8   => 122,
1775         0xEC9   => 122,
1776         0xECA   => 122,
1777         0xECB   => 122,
1778         0xF71   => 129,
1779         0xF72   => 130,
1780         0xF7A   => 130,
1781         0xF7B   => 130,
1782         0xF7C   => 130,
1783         0xF7D   => 130,
1784         0xF80   => 130,
1785         0xF74   => 132,
1786         0x321   => 202,
1787         0x322   => 202,
1788         0x327   => 202,
1789         0x328   => 202,
1790         0x31B   => 216,
1791         0xF39   => 216,
1792         0x1D165 => 216,
1793         0x1D166 => 216,
1794         0x1D16E => 216,
1795         0x1D16F => 216,
1796         0x1D170 => 216,
1797         0x1D171 => 216,
1798         0x1D172 => 216,
1799         0x302A  => 218,
1800         0x316   => 220,
1801         0x317   => 220,
1802         0x318   => 220,
1803         0x319   => 220,
1804         0x31C   => 220,
1805         0x31D   => 220,
1806         0x31E   => 220,
1807         0x31F   => 220,
1808         0x320   => 220,
1809         0x323   => 220,
1810         0x324   => 220,
1811         0x325   => 220,
1812         0x326   => 220,
1813         0x329   => 220,
1814         0x32A   => 220,
1815         0x32B   => 220,
1816         0x32C   => 220,
1817         0x32D   => 220,
1818         0x32E   => 220,
1819         0x32F   => 220,
1820         0x330   => 220,
1821         0x331   => 220,
1822         0x332   => 220,
1823         0x333   => 220,
1824         0x339   => 220,
1825         0x33A   => 220,
1826         0x33B   => 220,
1827         0x33C   => 220,
1828         0x347   => 220,
1829         0x348   => 220,
1830         0x349   => 220,
1831         0x34D   => 220,
1832         0x34E   => 220,
1833         0x353   => 220,
1834         0x354   => 220,
1835         0x355   => 220,
1836         0x356   => 220,
1837         0x591   => 220,
1838         0x596   => 220,
1839         0x59B   => 220,
1840         0x5A3   => 220,
1841         0x5A4   => 220,
1842         0x5A5   => 220,
1843         0x5A6   => 220,
1844         0x5A7   => 220,
1845         0x5AA   => 220,
1846         0x655   => 220,
1847         0x656   => 220,
1848         0x6E3   => 220,
1849         0x6EA   => 220,
1850         0x6ED   => 220,
1851         0x731   => 220,
1852         0x734   => 220,
1853         0x737   => 220,
1854         0x738   => 220,
1855         0x739   => 220,
1856         0x73B   => 220,
1857         0x73C   => 220,
1858         0x73E   => 220,
1859         0x742   => 220,
1860         0x744   => 220,
1861         0x746   => 220,
1862         0x748   => 220,
1863         0x952   => 220,
1864         0xF18   => 220,
1865         0xF19   => 220,
1866         0xF35   => 220,
1867         0xF37   => 220,
1868         0xFC6   => 220,
1869         0x193B  => 220,
1870         0x20E8  => 220,
1871         0x1D17B => 220,
1872         0x1D17C => 220,
1873         0x1D17D => 220,
1874         0x1D17E => 220,
1875         0x1D17F => 220,
1876         0x1D180 => 220,
1877         0x1D181 => 220,
1878         0x1D182 => 220,
1879         0x1D18A => 220,
1880         0x1D18B => 220,
1881         0x59A   => 222,
1882         0x5AD   => 222,
1883         0x1929  => 222,
1884         0x302D  => 222,
1885         0x302E  => 224,
1886         0x302F  => 224,
1887         0x1D16D => 226,
1888         0x5AE   => 228,
1889         0x18A9  => 228,
1890         0x302B  => 228,
1891         0x300   => 230,
1892         0x301   => 230,
1893         0x302   => 230,
1894         0x303   => 230,
1895         0x304   => 230,
1896         0x305   => 230,
1897         0x306   => 230,
1898         0x307   => 230,
1899         0x308   => 230,
1900         0x309   => 230,
1901         0x30A   => 230,
1902         0x30B   => 230,
1903         0x30C   => 230,
1904         0x30D   => 230,
1905         0x30E   => 230,
1906         0x30F   => 230,
1907         0x310   => 230,
1908         0x311   => 230,
1909         0x312   => 230,
1910         0x313   => 230,
1911         0x314   => 230,
1912         0x33D   => 230,
1913         0x33E   => 230,
1914         0x33F   => 230,
1915         0x340   => 230,
1916         0x341   => 230,
1917         0x342   => 230,
1918         0x343   => 230,
1919         0x344   => 230,
1920         0x346   => 230,
1921         0x34A   => 230,
1922         0x34B   => 230,
1923         0x34C   => 230,
1924         0x350   => 230,
1925         0x351   => 230,
1926         0x352   => 230,
1927         0x357   => 230,
1928         0x363   => 230,
1929         0x364   => 230,
1930         0x365   => 230,
1931         0x366   => 230,
1932         0x367   => 230,
1933         0x368   => 230,
1934         0x369   => 230,
1935         0x36A   => 230,
1936         0x36B   => 230,
1937         0x36C   => 230,
1938         0x36D   => 230,
1939         0x36E   => 230,
1940         0x36F   => 230,
1941         0x483   => 230,
1942         0x484   => 230,
1943         0x485   => 230,
1944         0x486   => 230,
1945         0x592   => 230,
1946         0x593   => 230,
1947         0x594   => 230,
1948         0x595   => 230,
1949         0x597   => 230,
1950         0x598   => 230,
1951         0x599   => 230,
1952         0x59C   => 230,
1953         0x59D   => 230,
1954         0x59E   => 230,
1955         0x59F   => 230,
1956         0x5A0   => 230,
1957         0x5A1   => 230,
1958         0x5A8   => 230,
1959         0x5A9   => 230,
1960         0x5AB   => 230,
1961         0x5AC   => 230,
1962         0x5AF   => 230,
1963         0x5C4   => 230,
1964         0x610   => 230,
1965         0x611   => 230,
1966         0x612   => 230,
1967         0x613   => 230,
1968         0x614   => 230,
1969         0x615   => 230,
1970         0x653   => 230,
1971         0x654   => 230,
1972         0x657   => 230,
1973         0x658   => 230,
1974         0x6D6   => 230,
1975         0x6D7   => 230,
1976         0x6D8   => 230,
1977         0x6D9   => 230,
1978         0x6DA   => 230,
1979         0x6DB   => 230,
1980         0x6DC   => 230,
1981         0x6DF   => 230,
1982         0x6E0   => 230,
1983         0x6E1   => 230,
1984         0x6E2   => 230,
1985         0x6E4   => 230,
1986         0x6E7   => 230,
1987         0x6E8   => 230,
1988         0x6EB   => 230,
1989         0x6EC   => 230,
1990         0x730   => 230,
1991         0x732   => 230,
1992         0x733   => 230,
1993         0x735   => 230,
1994         0x736   => 230,
1995         0x73A   => 230,
1996         0x73D   => 230,
1997         0x73F   => 230,
1998         0x740   => 230,
1999         0x741   => 230,
2000         0x743   => 230,
2001         0x745   => 230,
2002         0x747   => 230,
2003         0x749   => 230,
2004         0x74A   => 230,
2005         0x951   => 230,
2006         0x953   => 230,
2007         0x954   => 230,
2008         0xF82   => 230,
2009         0xF83   => 230,
2010         0xF86   => 230,
2011         0xF87   => 230,
2012         0x170D  => 230,
2013         0x193A  => 230,
2014         0x20D0  => 230,
2015         0x20D1  => 230,
2016         0x20D4  => 230,
2017         0x20D5  => 230,
2018         0x20D6  => 230,
2019         0x20D7  => 230,
2020         0x20DB  => 230,
2021         0x20DC  => 230,
2022         0x20E1  => 230,
2023         0x20E7  => 230,
2024         0x20E9  => 230,
2025         0xFE20  => 230,
2026         0xFE21  => 230,
2027         0xFE22  => 230,
2028         0xFE23  => 230,
2029         0x1D185 => 230,
2030         0x1D186 => 230,
2031         0x1D187 => 230,
2032         0x1D189 => 230,
2033         0x1D188 => 230,
2034         0x1D1AA => 230,
2035         0x1D1AB => 230,
2036         0x1D1AC => 230,
2037         0x1D1AD => 230,
2038         0x315   => 232,
2039         0x31A   => 232,
2040         0x302C  => 232,
2041         0x35F   => 233,
2042         0x362   => 233,
2043         0x35D   => 234,
2044         0x35E   => 234,
2045         0x360   => 234,
2046         0x361   => 234,
2047         0x345   => 240
2048     );
2049     // }}}
2050
2051     // {{{ properties
2052     /**
2053      * @var string
2054      * @access private
2055      */
2056     private $_punycode_prefix = 'xn--';
2057
2058     /**
2059      * @access private
2060      */
2061     private $_invalid_ucs = 0x80000000;
2062
2063     /**
2064      * @access private
2065      */
2066     private $_max_ucs = 0x10FFFF;
2067
2068     /**
2069      * @var int
2070      * @access private
2071      */
2072     private $_base = 36;
2073
2074     /**
2075      * @var int
2076      * @access private
2077      */
2078     private $_tmin = 1;
2079
2080     /**
2081      * @var int
2082      * @access private
2083      */
2084     private $_tmax = 26;
2085
2086     /**
2087      * @var int
2088      * @access private
2089      */
2090     private $_skew = 38;
2091
2092     /**
2093      * @var int
2094      * @access private
2095      */
2096     private $_damp = 700;
2097
2098     /**
2099      * @var int
2100      * @access private
2101      */
2102     private $_initial_bias = 72;
2103
2104     /**
2105      * @var int
2106      * @access private
2107      */
2108     private $_initial_n = 0x80;
2109
2110     /**
2111      * @var int
2112      * @access private
2113      */
2114     private $_slast;
2115
2116     /**
2117      * @access private
2118      */
2119     private $_sbase = 0xAC00;
2120
2121     /**
2122      * @access private
2123      */
2124     private $_lbase = 0x1100;
2125
2126     /**
2127      * @access private
2128      */
2129     private $_vbase = 0x1161;
2130
2131     /**
2132      * @access private
2133      */
2134     private $_tbase = 0x11a7;
2135
2136     /**
2137      * @var int
2138      * @access private
2139      */
2140     private $_lcount = 19;
2141
2142     /**
2143      * @var int
2144      * @access private
2145      */
2146     private $_vcount = 21;
2147
2148     /**
2149      * @var int
2150      * @access private
2151      */
2152     private $_tcount = 28;
2153
2154     /**
2155      * vcount * tcount
2156      *
2157      * @var int
2158      * @access private
2159      */
2160     private $_ncount = 588;
2161
2162     /**
2163      * lcount * tcount * vcount
2164      *
2165      * @var int
2166      * @access private
2167      */
2168     private $_scount = 11172;
2169
2170     /**
2171      * Default encoding for encode()'s input and decode()'s output is UTF-8;
2172      * Other possible encodings are ucs4_string and ucs4_array
2173      * See {@link setParams()} for how to select these
2174      *
2175      * @var bool
2176      * @access private
2177      */
2178     private $_api_encoding = 'utf8';
2179
2180     /**
2181      * Overlong UTF-8 encodings are forbidden
2182      *
2183      * @var bool
2184      * @access private
2185      */
2186     private $_allow_overlong = false;
2187
2188     /**
2189      * Behave strict or not
2190      *
2191      * @var bool
2192      * @access private
2193      */
2194     private $_strict_mode = false;
2195
2196     /**
2197      * Cached value indicating whether or not mbstring function overloading is
2198      * on for strlen
2199      *
2200      * This is cached for optimal performance.
2201      *
2202      * @var boolean
2203      * @see Net_IDNA_php5::_byteLength()
2204      */
2205     private static $_mb_string_overload = null;
2206     // }}}
2207
2208
2209     // {{{ constructor
2210     /**
2211      * Constructor
2212      *
2213      * @param  array  $options
2214      * @access public
2215      * @see    setParams()
2216      */
2217     public function __construct($options = null)
2218     {
2219         $this->_slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
2220
2221         if (is_array($options)) {
2222             $this->setParams($options);
2223         }
2224
2225         // populate mbstring overloading cache if not set
2226         if (self::$_mb_string_overload === null) {
2227             self::$_mb_string_overload = (extension_loaded('mbstring')
2228                 && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
2229         }
2230     }
2231     // }}}
2232
2233
2234     /**
2235      * Sets a new option value. Available options and values:
2236      *
2237      * [utf8 -     Use either UTF-8 or ISO-8859-1 as input (true for UTF-8, false
2238      *             otherwise); The output is always UTF-8]
2239      * [overlong - Unicode does not allow unnecessarily long encodings of chars,
2240      *             to allow this, set this parameter to true, else to false;
2241      *             default is false.]
2242      * [strict -   true: strict mode, good for registration purposes - Causes errors
2243      *             on failures; false: loose mode, ideal for "wildlife" applications
2244      *             by silently ignoring errors and returning the original input instead]
2245      *
2246      * @param    mixed     $option      Parameter to set (string: single parameter; array of Parameter => Value pairs)
2247      * @param    string    $value       Value to use (if parameter 1 is a string)
2248      * @return   boolean                true on success, false otherwise
2249      * @access   public
2250      */
2251     public function setParams($option, $value = false)
2252     {
2253         if (!is_array($option)) {
2254             $option = array($option => $value);
2255         }
2256
2257         foreach ($option as $k => $v) {
2258             switch ($k) {
2259             case 'encoding':
2260                 switch ($v) {
2261                 case 'utf8':
2262                 case 'ucs4_string':
2263                 case 'ucs4_array':
2264                     $this->_api_encoding = $v;
2265                     break;
2266
2267                 default:
2268                     throw new Exception('Set Parameter: Unknown parameter '.$v.' for option '.$k);
2269                 }
2270
2271                 break;
2272
2273             case 'overlong':
2274                 $this->_allow_overlong = ($v) ? true : false;
2275                 break;
2276
2277             case 'strict':
2278                 $this->_strict_mode = ($v) ? true : false;
2279                 break;
2280
2281             default:
2282                 return false;
2283             }
2284         }
2285
2286         return true;
2287     }
2288
2289     /**
2290      * Encode a given UTF-8 domain name.
2291      *
2292      * @param    string     $decoded     Domain name (UTF-8 or UCS-4)
2293      * [@param    string     $encoding    Desired input encoding, see {@link set_parameter}]
2294      * @return   string                  Encoded Domain name (ACE string)
2295      * @return   mixed                   processed string
2296      * @throws   Exception
2297      * @access   public
2298      */
2299     public function encode($decoded, $one_time_encoding = false)
2300     {
2301         // Forcing conversion of input to UCS4 array
2302         // If one time encoding is given, use this, else the objects property
2303         switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2304         case 'utf8':
2305             $decoded = $this->_utf8_to_ucs4($decoded);
2306             break;
2307         case 'ucs4_string':
2308            $decoded = $this->_ucs4_string_to_ucs4($decoded);
2309         case 'ucs4_array': // No break; before this line. Catch case, but do nothing
2310            break;
2311         default:
2312             throw new Exception('Unsupported input format');
2313         }
2314
2315         // No input, no output, what else did you expect?
2316         if (empty($decoded)) return '';
2317
2318         // Anchors for iteration
2319         $last_begin = 0;
2320         // Output string
2321         $output = '';
2322
2323         foreach ($decoded as $k => $v) {
2324             // Make sure to use just the plain dot
2325             switch($v) {
2326             case 0x3002:
2327             case 0xFF0E:
2328             case 0xFF61:
2329                 $decoded[$k] = 0x2E;
2330                 // It's right, no break here
2331                 // The codepoints above have to be converted to dots anyway
2332
2333             // Stumbling across an anchoring character
2334             case 0x2E:
2335             case 0x2F:
2336             case 0x3A:
2337             case 0x3F:
2338             case 0x40:
2339                 // Neither email addresses nor URLs allowed in strict mode
2340                 if ($this->_strict_mode) {
2341                    throw new Exception('Neither email addresses nor URLs are allowed in strict mode.');
2342                 } else {
2343                     // Skip first char
2344                     if ($k) {
2345                         $encoded = '';
2346                         $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2347                         if ($encoded) {
2348                             $output .= $encoded;
2349                         } else {
2350                             $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2351                         }
2352                         $output .= chr($decoded[$k]);
2353                     }
2354                     $last_begin = $k + 1;
2355                 }
2356             }
2357         }
2358         // Catch the rest of the string
2359         if ($last_begin) {
2360             $inp_len = sizeof($decoded);
2361             $encoded = '';
2362             $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2363             if ($encoded) {
2364                 $output .= $encoded;
2365             } else {
2366                 $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2367             }
2368             return $output;
2369         } else {
2370             if ($output = $this->_encode($decoded)) {
2371                 return $output;
2372             } else {
2373                 return $this->_ucs4_to_utf8($decoded);
2374             }
2375         }
2376     }
2377
2378     /**
2379      * Decode a given ACE domain name.
2380      *
2381      * @param    string     $encoded     Domain name (ACE string)
2382      * @param    string     $encoding    Desired output encoding, see {@link set_parameter}
2383      * @return   string                  Decoded Domain name (UTF-8 or UCS-4)
2384      * @throws   Exception
2385      * @access   public
2386      */
2387     public function decode($input, $one_time_encoding = false)
2388     {
2389         // Optionally set
2390         if ($one_time_encoding) {
2391             switch ($one_time_encoding) {
2392             case 'utf8':
2393             case 'ucs4_string':
2394             case 'ucs4_array':
2395                 break;
2396             default:
2397                 throw new Exception('Unknown encoding '.$one_time_encoding);
2398                 return false;
2399             }
2400         }
2401         // Make sure to drop any newline characters around
2402         $input = trim($input);
2403
2404         // Negotiate input and try to determine, wether it is a plain string,
2405         // an email address or something like a complete URL
2406         if (strpos($input, '@')) { // Maybe it is an email address
2407             // No no in strict mode
2408             if ($this->_strict_mode) {
2409                 throw new Exception('Only simple domain name parts can be handled in strict mode');
2410             }
2411             list($email_pref, $input) = explode('@', $input, 2);
2412             $arr = explode('.', $input);
2413             foreach ($arr as $k => $v) {
2414                 $conv = $this->_decode($v);
2415                 if ($conv) $arr[$k] = $conv;
2416             }
2417             $return = $email_pref . '@' . join('.', $arr);
2418         } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
2419             // No no in strict mode
2420             if ($this->_strict_mode) {
2421                 throw new Exception('Only simple domain name parts can be handled in strict mode');
2422             }
2423             $parsed = parse_url($input);
2424             if (isset($parsed['host'])) {
2425                 $arr = explode('.', $parsed['host']);
2426                 foreach ($arr as $k => $v) {
2427                     $conv = $this->_decode($v);
2428                     if ($conv) $arr[$k] = $conv;
2429                 }
2430                 $parsed['host'] = join('.', $arr);
2431                 if (isset($parsed['scheme'])) {
2432                     $parsed['scheme'] .= (strtolower($parsed['scheme']) == 'mailto') ? ':' : '://';
2433                 }
2434                 $return = join('', $parsed);
2435             } else { // parse_url seems to have failed, try without it
2436                 $arr = explode('.', $input);
2437                 foreach ($arr as $k => $v) {
2438                     $conv = $this->_decode($v);
2439                     if ($conv) $arr[$k] = $conv;
2440                 }
2441                 $return = join('.', $arr);
2442             }
2443         } else { // Otherwise we consider it being a pure domain name string
2444             $return = $this->_decode($input);
2445         }
2446         // The output is UTF-8 by default, other output formats need conversion here
2447         // If one time encoding is given, use this, else the objects property
2448         switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2449         case 'utf8':
2450             return $return;
2451             break;
2452         case 'ucs4_string':
2453            return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
2454            break;
2455         case 'ucs4_array':
2456             return $this->_utf8_to_ucs4($return);
2457             break;
2458         default:
2459             throw new Exception('Unsupported output format');
2460         }
2461     }
2462
2463
2464     // {{{ private
2465     /**
2466      * The actual encoding algorithm.
2467      *
2468      * @return   string
2469      * @throws   Exception
2470      * @access   private
2471      */
2472     private function _encode($decoded)
2473     {
2474         // We cannot encode a domain name containing the Punycode prefix
2475         $extract = self::_byteLength($this->_punycode_prefix);
2476         $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
2477         $check_deco = array_slice($decoded, 0, $extract);
2478
2479         if ($check_pref == $check_deco) {
2480             throw new Exception('This is already a punycode string');
2481         }
2482         // We will not try to encode strings consisting of basic code points only
2483         $encodable = false;
2484         foreach ($decoded as $k => $v) {
2485             if ($v > 0x7a) {
2486                 $encodable = true;
2487                 break;
2488             }
2489         }
2490         if (!$encodable) {
2491             if ($this->_strict_mode) {
2492                 throw new Exception('The given string does not contain encodable chars');
2493             } else {
2494                 return false;
2495             }
2496         }
2497
2498         // Do NAMEPREP
2499         try {
2500             $decoded = $this->_nameprep($decoded);
2501         } catch (Exception $e) {
2502             // hmm, serious - rethrow
2503             throw $e;
2504         }
2505
2506         $deco_len = count($decoded);
2507
2508         // Empty array
2509         if (!$deco_len) {
2510             return false;
2511         }
2512
2513         // How many chars have been consumed
2514         $codecount = 0;
2515
2516         // Start with the prefix; copy it to output
2517         $encoded = $this->_punycode_prefix;
2518
2519         $encoded = '';
2520         // Copy all basic code points to output
2521         for ($i = 0; $i < $deco_len; ++$i) {
2522             $test = $decoded[$i];
2523             // Will match [0-9a-zA-Z-]
2524             if ((0x2F < $test && $test < 0x40)
2525                     || (0x40 < $test && $test < 0x5B)
2526                     || (0x60 < $test && $test <= 0x7B)
2527                     || (0x2D == $test)) {
2528                 $encoded .= chr($decoded[$i]);
2529                 $codecount++;
2530             }
2531         }
2532
2533         // All codepoints were basic ones
2534         if ($codecount == $deco_len) {
2535             return $encoded;
2536         }
2537
2538         // Start with the prefix; copy it to output
2539         $encoded = $this->_punycode_prefix . $encoded;
2540
2541         // If we have basic code points in output, add an hyphen to the end
2542         if ($codecount) {
2543             $encoded .= '-';
2544         }
2545
2546         // Now find and encode all non-basic code points
2547         $is_first  = true;
2548         $cur_code  = $this->_initial_n;
2549         $bias      = $this->_initial_bias;
2550         $delta     = 0;
2551
2552         while ($codecount < $deco_len) {
2553             // Find the smallest code point >= the current code point and
2554             // remember the last ouccrence of it in the input
2555             for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
2556                 if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
2557                     $next_code = $decoded[$i];
2558                 }
2559             }
2560
2561             $delta += ($next_code - $cur_code) * ($codecount + 1);
2562             $cur_code = $next_code;
2563
2564             // Scan input again and encode all characters whose code point is $cur_code
2565             for ($i = 0; $i < $deco_len; $i++) {
2566                 if ($decoded[$i] < $cur_code) {
2567                     $delta++;
2568                 } else if ($decoded[$i] == $cur_code) {
2569                     for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
2570                         $t = ($k <= $bias)?
2571                             $this->_tmin :
2572                             (($k >= $bias + $this->_tmax)? $this->_tmax : $k - $bias);
2573
2574                         if ($q < $t) {
2575                             break;
2576                         }
2577
2578                         $encoded .= $this->_encodeDigit(ceil($t + (($q - $t) % ($this->_base - $t))));
2579                         $q = ($q - $t) / ($this->_base - $t);
2580                     }
2581
2582                     $encoded .= $this->_encodeDigit($q);
2583                     $bias = $this->_adapt($delta, $codecount + 1, $is_first);
2584                     $codecount++;
2585                     $delta = 0;
2586                     $is_first = false;
2587                 }
2588             }
2589
2590             $delta++;
2591             $cur_code++;
2592         }
2593
2594         return $encoded;
2595     }
2596
2597     /**
2598      * The actual decoding algorithm.
2599      *
2600      * @return   string
2601      * @throws   Exception
2602      * @access   private
2603      */
2604     private function _decode($encoded)
2605     {
2606         // We do need to find the Punycode prefix
2607         if (!preg_match('!^' . preg_quote($this->_punycode_prefix, '!') . '!', $encoded)) {
2608             return false;
2609         }
2610
2611         $encode_test = preg_replace('!^' . preg_quote($this->_punycode_prefix, '!') . '!', '', $encoded);
2612
2613         // If nothing left after removing the prefix, it is hopeless
2614         if (!$encode_test) {
2615             return false;
2616         }
2617
2618         // Find last occurence of the delimiter
2619         $delim_pos = strrpos($encoded, '-');
2620
2621         if ($delim_pos > self::_byteLength($this->_punycode_prefix)) {
2622             for ($k = self::_byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
2623                 $decoded[] = ord($encoded{$k});
2624             }
2625         } else {
2626             $decoded = array();
2627         }
2628
2629         $deco_len = count($decoded);
2630         $enco_len = self::_byteLength($encoded);
2631
2632         // Wandering through the strings; init
2633         $is_first = true;
2634         $bias     = $this->_initial_bias;
2635         $idx      = 0;
2636         $char     = $this->_initial_n;
2637
2638         for ($enco_idx = ($delim_pos)? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
2639             for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
2640                 $digit = $this->_decodeDigit($encoded{$enco_idx++});
2641                 $idx += $digit * $w;
2642
2643                 $t = ($k <= $bias) ?
2644                     $this->_tmin :
2645                     (($k >= $bias + $this->_tmax)? $this->_tmax : ($k - $bias));
2646
2647                 if ($digit < $t) {
2648                     break;
2649                 }
2650
2651                 $w = (int)($w * ($this->_base - $t));
2652             }
2653
2654             $bias      = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
2655             $is_first  = false;
2656             $char     += (int) ($idx / ($deco_len + 1));
2657             $idx      %= ($deco_len + 1);
2658
2659             if ($deco_len > 0) {
2660                 // Make room for the decoded char
2661                 for ($i = $deco_len; $i > $idx; $i--) {
2662                     $decoded[$i] = $decoded[($i - 1)];
2663                 }
2664             }
2665
2666             $decoded[$idx++] = $char;
2667         }
2668
2669         try {
2670             return $this->_ucs4_to_utf8($decoded);
2671         } catch (Exception $e) {
2672             // rethrow
2673             throw $e;
2674         }
2675     }
2676
2677     /**
2678      * Adapt the bias according to the current code point and position.
2679      *
2680      * @access   private
2681      */
2682     private function _adapt($delta, $npoints, $is_first)
2683     {
2684         $delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
2685         $delta += (int) ($delta / $npoints);
2686
2687         for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
2688             $delta = (int) ($delta / ($this->_base - $this->_tmin));
2689         }
2690
2691         return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
2692     }
2693
2694     /**
2695      * Encoding a certain digit.
2696      *
2697      * @access   private
2698      */
2699     private function _encodeDigit($d)
2700     {
2701         return chr($d + 22 + 75 * ($d < 26));
2702     }
2703
2704     /**
2705      * Decode a certain digit.
2706      *
2707      * @access   private
2708      */
2709     private function _decodeDigit($cp)
2710     {
2711         $cp = ord($cp);
2712         return ($cp - 48 < 10)? $cp - 22 : (($cp - 65 < 26)? $cp - 65 : (($cp - 97 < 26)? $cp - 97 : $this->_base));
2713     }
2714
2715     /**
2716      * Do Nameprep according to RFC3491 and RFC3454.
2717      *
2718      * @param    array      $input       Unicode Characters
2719      * @return   string                  Unicode Characters, Nameprep'd
2720      * @throws   Exception
2721      * @access   private
2722      */
2723     private function _nameprep($input)
2724     {
2725         $output = array();
2726
2727         // Walking through the input array, performing the required steps on each of
2728         // the input chars and putting the result into the output array
2729         // While mapping required chars we apply the cannonical ordering
2730
2731         foreach ($input as $v) {
2732             // Map to nothing == skip that code point
2733             if (in_array($v, self::$_np_map_nothing)) {
2734                 continue;
2735             }
2736
2737             // Try to find prohibited input
2738             if (in_array($v, self::$_np_prohibit) || in_array($v, self::$_general_prohibited)) {
2739                 throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
2740             }
2741
2742             foreach (self::$_np_prohibit_ranges as $range) {
2743                 if ($range[0] <= $v && $v <= $range[1]) {
2744                     throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
2745                 }
2746             }
2747
2748             // Hangul syllable decomposition
2749             if (0xAC00 <= $v && $v <= 0xD7AF) {
2750                 foreach ($this->_hangulDecompose($v) as $out) {
2751                     $output[] = $out;
2752                 }
2753             } else if (isset(self::$_np_replacemaps[$v])) { // There's a decomposition mapping for that code point
2754                 foreach ($this->_applyCannonicalOrdering(self::$_np_replacemaps[$v]) as $out) {
2755                     $output[] = $out;
2756                 }
2757             } else {
2758                 $output[] = $v;
2759             }
2760         }
2761
2762         // Combine code points
2763
2764         $last_class   = 0;
2765         $last_starter = 0;
2766         $out_len      = count($output);
2767
2768         for ($i = 0; $i < $out_len; ++$i) {
2769             $class = $this->_getCombiningClass($output[$i]);
2770
2771             if ((!$last_class || $last_class != $class) && $class) {
2772                 // Try to match
2773                 $seq_len = $i - $last_starter;
2774                 $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
2775
2776                 // On match: Replace the last starter with the composed character and remove
2777                 // the now redundant non-starter(s)
2778                 if ($out) {
2779                     $output[$last_starter] = $out;
2780
2781                     if (count($out) != $seq_len) {
2782                         for ($j = $i + 1; $j < $out_len; ++$j) {
2783                             $output[$j - 1] = $output[$j];
2784                         }
2785
2786                         unset($output[$out_len]);
2787                     }
2788
2789                     // Rewind the for loop by one, since there can be more possible compositions
2790                     $i--;
2791                     $out_len--;
2792                     $last_class = ($i == $last_starter)? 0 : $this->_getCombiningClass($output[$i - 1]);
2793
2794                     continue;
2795                 }
2796             }
2797
2798             // The current class is 0
2799             if (!$class) {
2800                 $last_starter = $i;
2801             }
2802
2803             $last_class = $class;
2804         }
2805
2806         return $output;
2807     }
2808
2809     /**
2810      * Decomposes a Hangul syllable
2811      * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2812      *
2813      * @param    integer    $char        32bit UCS4 code point
2814      * @return   array                   Either Hangul Syllable decomposed or original 32bit
2815      *                                   value as one value array
2816      * @access   private
2817      */
2818     private function _hangulDecompose($char)
2819     {
2820         $sindex = $char - $this->_sbase;
2821
2822         if ($sindex < 0 || $sindex >= $this->_scount) {
2823             return array($char);
2824         }
2825
2826         $result   = array();
2827         $T        = $this->_tbase + $sindex % $this->_tcount;
2828         $result[] = (int)($this->_lbase +  $sindex / $this->_ncount);
2829         $result[] = (int)($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
2830
2831         if ($T != $this->_tbase) {
2832             $result[] = $T;
2833         }
2834
2835         return $result;
2836     }
2837
2838     /**
2839      * Ccomposes a Hangul syllable
2840      * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2841      *
2842      * @param    array      $input       Decomposed UCS4 sequence
2843      * @return   array                   UCS4 sequence with syllables composed
2844      * @access   private
2845      */
2846     private function _hangulCompose($input)
2847     {
2848         $inp_len = count($input);
2849
2850         if (!$inp_len) {
2851             return array();
2852         }
2853
2854         $result   = array();
2855         $last     = $input[0];
2856         $result[] = $last; // copy first char from input to output
2857
2858         for ($i = 1; $i < $inp_len; ++$i) {
2859             $char = $input[$i];
2860
2861             // Find out, wether two current characters from L and V
2862             $lindex = $last - $this->_lbase;
2863
2864             if (0 <= $lindex && $lindex < $this->_lcount) {
2865                 $vindex = $char - $this->_vbase;
2866
2867                 if (0 <= $vindex && $vindex < $this->_vcount) {
2868                     // create syllable of form LV
2869                     $last    = ($this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount);
2870                     $out_off = count($result) - 1;
2871                     $result[$out_off] = $last; // reset last
2872
2873                     // discard char
2874                     continue;
2875                 }
2876             }
2877
2878             // Find out, wether two current characters are LV and T
2879             $sindex = $last - $this->_sbase;
2880
2881             if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount) == 0) {
2882                 $tindex = $char - $this->_tbase;
2883
2884                 if (0 <= $tindex && $tindex <= $this->_tcount) {
2885                     // create syllable of form LVT
2886                     $last += $tindex;
2887                     $out_off = count($result) - 1;
2888                     $result[$out_off] = $last; // reset last
2889
2890                     // discard char
2891                     continue;
2892                 }
2893             }
2894
2895             // if neither case was true, just add the character
2896             $last = $char;
2897             $result[] = $char;
2898         }
2899
2900         return $result;
2901     }
2902
2903     /**
2904      * Returns the combining class of a certain wide char.
2905      *
2906      * @param    integer    $char        Wide char to check (32bit integer)
2907      * @return   integer                 Combining class if found, else 0
2908      * @access   private
2909      */
2910     private function _getCombiningClass($char)
2911     {
2912         return isset(self::$_np_norm_combcls[$char])? self::$_np_norm_combcls[$char] : 0;
2913     }
2914
2915     /**
2916      * Apllies the cannonical ordering of a decomposed UCS4 sequence.
2917      *
2918      * @param    array      $input       Decomposed UCS4 sequence
2919      * @return   array                   Ordered USC4 sequence
2920      * @access   private
2921      */
2922     private function _applyCannonicalOrdering($input)
2923     {
2924         $swap = true;
2925         $size = count($input);
2926
2927         while ($swap) {
2928             $swap = false;
2929             $last = $this->_getCombiningClass($input[0]);
2930
2931             for ($i = 0; $i < $size - 1; ++$i) {
2932                 $next = $this->_getCombiningClass($input[$i + 1]);
2933
2934                 if ($next != 0 && $last > $next) {
2935                     // Move item leftward until it fits
2936                     for ($j = $i + 1; $j > 0; --$j) {
2937                         if ($this->_getCombiningClass($input[$j - 1]) <= $next) {
2938                             break;
2939                         }
2940
2941                         $t = $input[$j];
2942                         $input[$j] = $input[$j - 1];
2943                         $input[$j - 1] = $t;
2944                         $swap = 1;
2945                     }
2946
2947                     // Reentering the loop looking at the old character again
2948                     $next = $last;
2949                 }
2950
2951                 $last = $next;
2952             }
2953         }
2954
2955         return $input;
2956     }
2957
2958     /**
2959      * Do composition of a sequence of starter and non-starter.
2960      *
2961      * @param    array      $input       UCS4 Decomposed sequence
2962      * @return   array                   Ordered USC4 sequence
2963      * @access   private
2964      */
2965     private function _combine($input)
2966     {
2967         $inp_len = count($input);
2968
2969         // Is it a Hangul syllable?
2970         if (1 != $inp_len) {
2971             $hangul = $this->_hangulCompose($input);
2972
2973             // This place is probably wrong
2974             if (count($hangul) != $inp_len) {
2975                 return $hangul;
2976             }
2977         }
2978
2979         foreach (self::$_np_replacemaps as $np_src => $np_target) {
2980             if ($np_target[0] != $input[0]) {
2981                 continue;
2982             }
2983
2984             if (count($np_target) != $inp_len) {
2985                 continue;
2986             }
2987
2988             $hit = false;
2989
2990             foreach ($input as $k2 => $v2) {
2991                 if ($v2 == $np_target[$k2]) {
2992                     $hit = true;
2993                 } else {
2994                     $hit = false;
2995                     break;
2996                 }
2997             }
2998
2999             if ($hit) {
3000                 return $np_src;
3001             }
3002         }
3003
3004         return false;
3005     }
3006
3007     /**
3008      * This converts an UTF-8 encoded string to its UCS-4 (array) representation
3009      * By talking about UCS-4 we mean arrays of 32bit integers representing
3010      * each of the "chars". This is due to PHP not being able to handle strings with
3011      * bit depth different from 8. This applies to the reverse method _ucs4_to_utf8(), too.
3012      * The following UTF-8 encodings are supported:
3013      *
3014      * bytes bits  representation
3015      * 1        7  0xxxxxxx
3016      * 2       11  110xxxxx 10xxxxxx
3017      * 3       16  1110xxxx 10xxxxxx 10xxxxxx
3018      * 4       21  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3019      * 5       26  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3020      * 6       31  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3021      *
3022      * Each x represents a bit that can be used to store character data.
3023      *
3024      * @access   private
3025      */
3026     private function _utf8_to_ucs4($input)
3027     {
3028         $output = array();
3029         $out_len = 0;
3030         $inp_len = self::_byteLength($input, '8bit');
3031         $mode = 'next';
3032         $test = 'none';
3033         for ($k = 0; $k < $inp_len; ++$k) {
3034             $v = ord($input{$k}); // Extract byte from input string
3035
3036             if ($v < 128) { // We found an ASCII char - put into stirng as is
3037                 $output[$out_len] = $v;
3038                 ++$out_len;
3039                 if ('add' == $mode) {
3040                     throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3041                     return false;
3042                 }
3043                 continue;
3044             }
3045             if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char
3046                 $start_byte = $v;
3047                 $mode = 'add';
3048                 $test = 'range';
3049                 if ($v >> 5 == 6) { // &110xxxxx 10xxxxx
3050                     $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left
3051                     $v = ($v - 192) << 6;
3052                 } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx
3053                     $next_byte = 1;
3054                     $v = ($v - 224) << 12;
3055                 } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3056                     $next_byte = 2;
3057                     $v = ($v - 240) << 18;
3058                 } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3059                     $next_byte = 3;
3060                     $v = ($v - 248) << 24;
3061                 } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3062                     $next_byte = 4;
3063                     $v = ($v - 252) << 30;
3064                 } else {
3065                     throw new Exception('This might be UTF-8, but I don\'t understand it at byte '.$k);
3066                     return false;
3067                 }
3068                 if ('add' == $mode) {
3069                     $output[$out_len] = (int) $v;
3070                     ++$out_len;
3071                     continue;
3072                 }
3073             }
3074             if ('add' == $mode) {
3075                 if (!$this->_allow_overlong && $test == 'range') {
3076                     $test = 'none';
3077                     if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
3078                         throw new Exception('Bogus UTF-8 character detected (out of legal range) at byte '.$k);
3079                         return false;
3080                     }
3081                 }
3082                 if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx
3083                     $v = ($v - 128) << ($next_byte * 6);
3084                     $output[($out_len - 1)] += $v;
3085                     --$next_byte;
3086                 } else {
3087                     throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3088                     return false;
3089                 }
3090                 if ($next_byte < 0) {
3091                     $mode = 'next';
3092                 }
3093             }
3094         } // for
3095         return $output;
3096     }
3097
3098     /**
3099      * Convert UCS-4 array into UTF-8 string.
3100      *
3101      * @throws   Exception
3102      * @access   private
3103      */
3104     private function _ucs4_to_utf8($input)
3105     {
3106         $output = '';
3107
3108         foreach ($input as $v) {
3109             // $v = ord($v);
3110
3111             if ($v < 128) {
3112                 // 7bit are transferred literally
3113                 $output .= chr($v);
3114             } else if ($v < 1 << 11) {
3115                 // 2 bytes
3116                 $output .= chr(192 + ($v >> 6))
3117                     . chr(128 + ($v & 63));
3118             } else if ($v < 1 << 16) {
3119                 // 3 bytes
3120                 $output .= chr(224 + ($v >> 12))
3121                     . chr(128 + (($v >> 6) & 63))
3122                     . chr(128 + ($v & 63));
3123             } else if ($v < 1 << 21) {
3124                 // 4 bytes
3125                 $output .= chr(240 + ($v >> 18))
3126                     . chr(128 + (($v >> 12) & 63))
3127                     . chr(128 + (($v >>  6) & 63))
3128                     . chr(128 + ($v & 63));
3129             } else if ($v < 1 << 26) {
3130                 // 5 bytes
3131                 $output .= chr(248 + ($v >> 24))
3132                     . chr(128 + (($v >> 18) & 63))
3133                     . chr(128 + (($v >> 12) & 63))
3134                     . chr(128 + (($v >>  6) & 63))
3135                     . chr(128 + ($v & 63));
3136             } else if ($v < 1 << 31) {
3137                 // 6 bytes
3138                 $output .= chr(252 + ($v >> 30))
3139                     . chr(128 + (($v >> 24) & 63))
3140                     . chr(128 + (($v >> 18) & 63))
3141                     . chr(128 + (($v >> 12) & 63))
3142                     . chr(128 + (($v >>  6) & 63))
3143                     . chr(128 + ($v & 63));
3144             } else {
3145                 throw new Exception('Conversion from UCS-4 to UTF-8 failed: malformed input at byte ' . $k);
3146             }
3147         }
3148
3149         return $output;
3150     }
3151
3152     /**
3153      * Convert UCS-4 array into UCS-4 string
3154      *
3155      * @throws   Exception
3156      * @access   private
3157      */
3158     private function _ucs4_to_ucs4_string($input)
3159     {
3160         $output = '';
3161         // Take array values and split output to 4 bytes per value
3162         // The bit mask is 255, which reads &11111111
3163         foreach ($input as $v) {
3164             $output .= ($v & (255 << 24) >> 24) . ($v & (255 << 16) >> 16) . ($v & (255 << 8) >> 8) . ($v & 255);
3165         }
3166         return $output;
3167     }
3168
3169     /**
3170      * Convert UCS-4 strin into UCS-4 garray
3171      *
3172      * @throws   Exception
3173      * @access   private
3174      */
3175     private function _ucs4_string_to_ucs4($input)
3176     {
3177         $output = array();
3178
3179         $inp_len = self::_byteLength($input);
3180         // Input length must be dividable by 4
3181         if ($inp_len % 4) {
3182             throw new Exception('Input UCS4 string is broken');
3183             return false;
3184         }
3185
3186         // Empty input - return empty output
3187         if (!$inp_len) return $output;
3188
3189         for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
3190             // Increment output position every 4 input bytes
3191             if (!$i % 4) {
3192                 $out_len++;
3193                 $output[$out_len] = 0;
3194             }
3195             $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
3196         }
3197         return $output;
3198     }
3199
3200     /**
3201      * Echo hex representation of UCS4 sequence.
3202      *
3203      * @param    array      $input       UCS4 sequence
3204      * @param    boolean    $include_bit Include bitmask in output
3205      * @return   void
3206      * @static
3207      * @access   private
3208      */
3209     private static function _showHex($input, $include_bit = false)
3210     {
3211         foreach ($input as $k => $v) {
3212             echo '[', $k, '] => ', sprintf('%X', $v);
3213
3214             if ($include_bit) {
3215                 echo ' (', Net_IDNA::_showBitmask($v), ')';
3216             }
3217
3218             echo "\n";
3219         }
3220     }
3221
3222     /**
3223      * Gives you a bit representation of given Byte (8 bits), Word (16 bits) or DWord (32 bits)
3224      * Output width is automagically determined
3225      *
3226      * @static
3227      * @access   private
3228      */
3229     private static function _showBitmask($octet)
3230     {
3231         if ($octet >= (1 << 16)) {
3232             $w = 31;
3233         } else if ($octet >= (1 << 8)) {
3234             $w = 15;
3235         } else {
3236             $w = 7;
3237         }
3238
3239         $return = '';
3240
3241         for ($i = $w; $i > -1; $i--) {
3242             $return .= ($octet & (1 << $i))? 1 : '0';
3243         }
3244
3245         return $return;
3246     }
3247
3248     /**
3249      * Gets the length of a string in bytes even if mbstring function
3250      * overloading is turned on
3251      *
3252      * @param string $string the string for which to get the length.
3253      *
3254      * @return integer the length of the string in bytes.
3255      *
3256      * @see Net_IDNA_php5::$_mb_string_overload
3257      */
3258     private static function _byteLength($string)
3259     {
3260         if (self::$_mb_string_overload) {
3261             return mb_strlen($string, '8bit');
3262         }
3263         return strlen((binary)$string);
3264     }
3265
3266     // }}}}
3267 }
3268
3269 ?>