5 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
7 // +----------------------------------------------------------------------+
8 // | This library is free software; you can redistribute it and/or modify |
9 // | it under the terms of the GNU Lesser General Public License as |
10 // | published by the Free Software Foundation; either version 2.1 of the |
11 // | License, or (at your option) any later version. |
13 // | This library is distributed in the hope that it will be useful, but |
14 // | WITHOUT ANY WARRANTY; without even the implied warranty of |
15 // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 // | Lesser General Public License for more details. |
18 // | You should have received a copy of the GNU Lesser General Public |
19 // | License along with this library; if not, write to the Free Software |
20 // | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
22 // +----------------------------------------------------------------------+
29 * Encode/decode Internationalized Domain Names.
31 * The class allows to convert internationalized domain names
32 * (see RFC 3490 for details) as they can be used with various registries worldwide
33 * to be translated between their original (localized) form and their encoded form
34 * as it will be used in the DNS (Domain Name System).
36 * The class provides two public methods, encode() and decode(), which do exactly
37 * what you would expect them to do. You are allowed to use complete domain names,
38 * simple strings and complete email addresses as well. That means, that you might
39 * use any of the following notations:
43 * - xn--brse-5qa.xn--knrz-1ra.info
45 * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
46 * array. Unicode output is available in the same formats.
47 * You can select your preferred format via {@link set_paramter()}.
49 * ACE input and output is always expected to be ASCII.
51 * @author Markus Nix <mnix@docuverse.de>
52 * @author Matthias Sommerfeld <mso@phlylabs.de>
53 * @author Stefan Neufeind <pear.neufeind@speedpartner.de>
55 * @version $Id: php5.php 284682 2009-07-24 04:27:35Z clockwerx $
62 * These Unicode codepoints are
63 * mapped to nothing, See RFC3454 for details
69 private static $_np_map_nothing = array(
100 * Prohibited codepints
106 private static $_general_prohibited = array(
174 * Codepints prohibited by Nameprep
179 private static $_np_prohibit = array(
267 * Codepoint ranges prohibited by nameprep
273 private static $_np_prohibit_ranges = array(
275 array(0x2060, 0x206F ),
276 array(0x1D173, 0x1D17A ),
277 array(0xE000, 0xF8FF ),
278 array(0xF0000, 0xFFFFD ),
279 array(0x100000, 0x10FFFD),
280 array(0xFDD0, 0xFDEF ),
281 array(0xD800, 0xDFFF ),
282 array(0x2FF0, 0x2FFB ),
283 array(0xE0020, 0xE007F )
287 * Replacement mappings (casemapping, replacement sequences, ...)
293 private static $_np_replacemaps = array(
320 0xB5 => array(0x3BC),
351 0xDF => array(0x73, 0x73),
352 0x100 => array(0x101),
353 0x102 => array(0x103),
354 0x104 => array(0x105),
355 0x106 => array(0x107),
356 0x108 => array(0x109),
357 0x10A => array(0x10B),
358 0x10C => array(0x10D),
359 0x10E => array(0x10F),
360 0x110 => array(0x111),
361 0x112 => array(0x113),
362 0x114 => array(0x115),
363 0x116 => array(0x117),
364 0x118 => array(0x119),
365 0x11A => array(0x11B),
366 0x11C => array(0x11D),
367 0x11E => array(0x11F),
368 0x120 => array(0x121),
369 0x122 => array(0x123),
370 0x124 => array(0x125),
371 0x126 => array(0x127),
372 0x128 => array(0x129),
373 0x12A => array(0x12B),
374 0x12C => array(0x12D),
375 0x12E => array(0x12F),
376 0x130 => array(0x69, 0x307),
377 0x132 => array(0x133),
378 0x134 => array(0x135),
379 0x136 => array(0x137),
380 0x139 => array(0x13A),
381 0x13B => array(0x13C),
382 0x13D => array(0x13E),
383 0x13F => array(0x140),
384 0x141 => array(0x142),
385 0x143 => array(0x144),
386 0x145 => array(0x146),
387 0x147 => array(0x148),
388 0x149 => array(0x2BC, 0x6E),
389 0x14A => array(0x14B),
390 0x14C => array(0x14D),
391 0x14E => array(0x14F),
392 0x150 => array(0x151),
393 0x152 => array(0x153),
394 0x154 => array(0x155),
395 0x156 => array(0x157),
396 0x158 => array(0x159),
397 0x15A => array(0x15B),
398 0x15C => array(0x15D),
399 0x15E => array(0x15F),
400 0x160 => array(0x161),
401 0x162 => array(0x163),
402 0x164 => array(0x165),
403 0x166 => array(0x167),
404 0x168 => array(0x169),
405 0x16A => array(0x16B),
406 0x16C => array(0x16D),
407 0x16E => array(0x16F),
408 0x170 => array(0x171),
409 0x172 => array(0x173),
410 0x174 => array(0x175),
411 0x176 => array(0x177),
412 0x178 => array(0xFF),
413 0x179 => array(0x17A),
414 0x17B => array(0x17C),
415 0x17D => array(0x17E),
416 0x17F => array(0x73),
417 0x181 => array(0x253),
418 0x182 => array(0x183),
419 0x184 => array(0x185),
420 0x186 => array(0x254),
421 0x187 => array(0x188),
422 0x189 => array(0x256),
423 0x18A => array(0x257),
424 0x18B => array(0x18C),
425 0x18E => array(0x1DD),
426 0x18F => array(0x259),
427 0x190 => array(0x25B),
428 0x191 => array(0x192),
429 0x193 => array(0x260),
430 0x194 => array(0x263),
431 0x196 => array(0x269),
432 0x197 => array(0x268),
433 0x198 => array(0x199),
434 0x19C => array(0x26F),
435 0x19D => array(0x272),
436 0x19F => array(0x275),
437 0x1A0 => array(0x1A1),
438 0x1A2 => array(0x1A3),
439 0x1A4 => array(0x1A5),
440 0x1A6 => array(0x280),
441 0x1A7 => array(0x1A8),
442 0x1A9 => array(0x283),
443 0x1AC => array(0x1AD),
444 0x1AE => array(0x288),
445 0x1AF => array(0x1B0),
446 0x1B1 => array(0x28A),
447 0x1B2 => array(0x28B),
448 0x1B3 => array(0x1B4),
449 0x1B5 => array(0x1B6),
450 0x1B7 => array(0x292),
451 0x1B8 => array(0x1B9),
452 0x1BC => array(0x1BD),
453 0x1C4 => array(0x1C6),
454 0x1C5 => array(0x1C6),
455 0x1C7 => array(0x1C9),
456 0x1C8 => array(0x1C9),
457 0x1CA => array(0x1CC),
458 0x1CB => array(0x1CC),
459 0x1CD => array(0x1CE),
460 0x1CF => array(0x1D0),
461 0x1D1 => array(0x1D2),
462 0x1D3 => array(0x1D4),
463 0x1D5 => array(0x1D6),
464 0x1D7 => array(0x1D8),
465 0x1D9 => array(0x1DA),
466 0x1DB => array(0x1DC),
467 0x1DE => array(0x1DF),
468 0x1E0 => array(0x1E1),
469 0x1E2 => array(0x1E3),
470 0x1E4 => array(0x1E5),
471 0x1E6 => array(0x1E7),
472 0x1E8 => array(0x1E9),
473 0x1EA => array(0x1EB),
474 0x1EC => array(0x1ED),
475 0x1EE => array(0x1EF),
476 0x1F0 => array(0x6A, 0x30C),
477 0x1F1 => array(0x1F3),
478 0x1F2 => array(0x1F3),
479 0x1F4 => array(0x1F5),
480 0x1F6 => array(0x195),
481 0x1F7 => array(0x1BF),
482 0x1F8 => array(0x1F9),
483 0x1FA => array(0x1FB),
484 0x1FC => array(0x1FD),
485 0x1FE => array(0x1FF),
486 0x200 => array(0x201),
487 0x202 => array(0x203),
488 0x204 => array(0x205),
489 0x206 => array(0x207),
490 0x208 => array(0x209),
491 0x20A => array(0x20B),
492 0x20C => array(0x20D),
493 0x20E => array(0x20F),
494 0x210 => array(0x211),
495 0x212 => array(0x213),
496 0x214 => array(0x215),
497 0x216 => array(0x217),
498 0x218 => array(0x219),
499 0x21A => array(0x21B),
500 0x21C => array(0x21D),
501 0x21E => array(0x21F),
502 0x220 => array(0x19E),
503 0x222 => array(0x223),
504 0x224 => array(0x225),
505 0x226 => array(0x227),
506 0x228 => array(0x229),
507 0x22A => array(0x22B),
508 0x22C => array(0x22D),
509 0x22E => array(0x22F),
510 0x230 => array(0x231),
511 0x232 => array(0x233),
512 0x345 => array(0x3B9),
513 0x37A => array(0x20, 0x3B9),
514 0x386 => array(0x3AC),
515 0x388 => array(0x3AD),
516 0x389 => array(0x3AE),
517 0x38A => array(0x3AF),
518 0x38C => array(0x3CC),
519 0x38E => array(0x3CD),
520 0x38F => array(0x3CE),
521 0x390 => array(0x3B9, 0x308, 0x301),
522 0x391 => array(0x3B1),
523 0x392 => array(0x3B2),
524 0x393 => array(0x3B3),
525 0x394 => array(0x3B4),
526 0x395 => array(0x3B5),
527 0x396 => array(0x3B6),
528 0x397 => array(0x3B7),
529 0x398 => array(0x3B8),
530 0x399 => array(0x3B9),
531 0x39A => array(0x3BA),
532 0x39B => array(0x3BB),
533 0x39C => array(0x3BC),
534 0x39D => array(0x3BD),
535 0x39E => array(0x3BE),
536 0x39F => array(0x3BF),
537 0x3A0 => array(0x3C0),
538 0x3A1 => array(0x3C1),
539 0x3A3 => array(0x3C3),
540 0x3A4 => array(0x3C4),
541 0x3A5 => array(0x3C5),
542 0x3A6 => array(0x3C6),
543 0x3A7 => array(0x3C7),
544 0x3A8 => array(0x3C8),
545 0x3A9 => array(0x3C9),
546 0x3AA => array(0x3CA),
547 0x3AB => array(0x3CB),
548 0x3B0 => array(0x3C5, 0x308, 0x301),
549 0x3C2 => array(0x3C3),
550 0x3D0 => array(0x3B2),
551 0x3D1 => array(0x3B8),
552 0x3D2 => array(0x3C5),
553 0x3D3 => array(0x3CD),
554 0x3D4 => array(0x3CB),
555 0x3D5 => array(0x3C6),
556 0x3D6 => array(0x3C0),
557 0x3D8 => array(0x3D9),
558 0x3DA => array(0x3DB),
559 0x3DC => array(0x3DD),
560 0x3DE => array(0x3DF),
561 0x3E0 => array(0x3E1),
562 0x3E2 => array(0x3E3),
563 0x3E4 => array(0x3E5),
564 0x3E6 => array(0x3E7),
565 0x3E8 => array(0x3E9),
566 0x3EA => array(0x3EB),
567 0x3EC => array(0x3ED),
568 0x3EE => array(0x3EF),
569 0x3F0 => array(0x3BA),
570 0x3F1 => array(0x3C1),
571 0x3F2 => array(0x3C3),
572 0x3F4 => array(0x3B8),
573 0x3F5 => array(0x3B5),
574 0x400 => array(0x450),
575 0x401 => array(0x451),
576 0x402 => array(0x452),
577 0x403 => array(0x453),
578 0x404 => array(0x454),
579 0x405 => array(0x455),
580 0x406 => array(0x456),
581 0x407 => array(0x457),
582 0x408 => array(0x458),
583 0x409 => array(0x459),
584 0x40A => array(0x45A),
585 0x40B => array(0x45B),
586 0x40C => array(0x45C),
587 0x40D => array(0x45D),
588 0x40E => array(0x45E),
589 0x40F => array(0x45F),
590 0x410 => array(0x430),
591 0x411 => array(0x431),
592 0x412 => array(0x432),
593 0x413 => array(0x433),
594 0x414 => array(0x434),
595 0x415 => array(0x435),
596 0x416 => array(0x436),
597 0x417 => array(0x437),
598 0x418 => array(0x438),
599 0x419 => array(0x439),
600 0x41A => array(0x43A),
601 0x41B => array(0x43B),
602 0x41C => array(0x43C),
603 0x41D => array(0x43D),
604 0x41E => array(0x43E),
605 0x41F => array(0x43F),
606 0x420 => array(0x440),
607 0x421 => array(0x441),
608 0x422 => array(0x442),
609 0x423 => array(0x443),
610 0x424 => array(0x444),
611 0x425 => array(0x445),
612 0x426 => array(0x446),
613 0x427 => array(0x447),
614 0x428 => array(0x448),
615 0x429 => array(0x449),
616 0x42A => array(0x44A),
617 0x42B => array(0x44B),
618 0x42C => array(0x44C),
619 0x42D => array(0x44D),
620 0x42E => array(0x44E),
621 0x42F => array(0x44F),
622 0x460 => array(0x461),
623 0x462 => array(0x463),
624 0x464 => array(0x465),
625 0x466 => array(0x467),
626 0x468 => array(0x469),
627 0x46A => array(0x46B),
628 0x46C => array(0x46D),
629 0x46E => array(0x46F),
630 0x470 => array(0x471),
631 0x472 => array(0x473),
632 0x474 => array(0x475),
633 0x476 => array(0x477),
634 0x478 => array(0x479),
635 0x47A => array(0x47B),
636 0x47C => array(0x47D),
637 0x47E => array(0x47F),
638 0x480 => array(0x481),
639 0x48A => array(0x48B),
640 0x48C => array(0x48D),
641 0x48E => array(0x48F),
642 0x490 => array(0x491),
643 0x492 => array(0x493),
644 0x494 => array(0x495),
645 0x496 => array(0x497),
646 0x498 => array(0x499),
647 0x49A => array(0x49B),
648 0x49C => array(0x49D),
649 0x49E => array(0x49F),
650 0x4A0 => array(0x4A1),
651 0x4A2 => array(0x4A3),
652 0x4A4 => array(0x4A5),
653 0x4A6 => array(0x4A7),
654 0x4A8 => array(0x4A9),
655 0x4AA => array(0x4AB),
656 0x4AC => array(0x4AD),
657 0x4AE => array(0x4AF),
658 0x4B0 => array(0x4B1),
659 0x4B2 => array(0x4B3),
660 0x4B4 => array(0x4B5),
661 0x4B6 => array(0x4B7),
662 0x4B8 => array(0x4B9),
663 0x4BA => array(0x4BB),
664 0x4BC => array(0x4BD),
665 0x4BE => array(0x4BF),
666 0x4C1 => array(0x4C2),
667 0x4C3 => array(0x4C4),
668 0x4C5 => array(0x4C6),
669 0x4C7 => array(0x4C8),
670 0x4C9 => array(0x4CA),
671 0x4CB => array(0x4CC),
672 0x4CD => array(0x4CE),
673 0x4D0 => array(0x4D1),
674 0x4D2 => array(0x4D3),
675 0x4D4 => array(0x4D5),
676 0x4D6 => array(0x4D7),
677 0x4D8 => array(0x4D9),
678 0x4DA => array(0x4DB),
679 0x4DC => array(0x4DD),
680 0x4DE => array(0x4DF),
681 0x4E0 => array(0x4E1),
682 0x4E2 => array(0x4E3),
683 0x4E4 => array(0x4E5),
684 0x4E6 => array(0x4E7),
685 0x4E8 => array(0x4E9),
686 0x4EA => array(0x4EB),
687 0x4EC => array(0x4ED),
688 0x4EE => array(0x4EF),
689 0x4F0 => array(0x4F1),
690 0x4F2 => array(0x4F3),
691 0x4F4 => array(0x4F5),
692 0x4F8 => array(0x4F9),
693 0x500 => array(0x501),
694 0x502 => array(0x503),
695 0x504 => array(0x505),
696 0x506 => array(0x507),
697 0x508 => array(0x509),
698 0x50A => array(0x50B),
699 0x50C => array(0x50D),
700 0x50E => array(0x50F),
701 0x531 => array(0x561),
702 0x532 => array(0x562),
703 0x533 => array(0x563),
704 0x534 => array(0x564),
705 0x535 => array(0x565),
706 0x536 => array(0x566),
707 0x537 => array(0x567),
708 0x538 => array(0x568),
709 0x539 => array(0x569),
710 0x53A => array(0x56A),
711 0x53B => array(0x56B),
712 0x53C => array(0x56C),
713 0x53D => array(0x56D),
714 0x53E => array(0x56E),
715 0x53F => array(0x56F),
716 0x540 => array(0x570),
717 0x541 => array(0x571),
718 0x542 => array(0x572),
719 0x543 => array(0x573),
720 0x544 => array(0x574),
721 0x545 => array(0x575),
722 0x546 => array(0x576),
723 0x547 => array(0x577),
724 0x548 => array(0x578),
725 0x549 => array(0x579),
726 0x54A => array(0x57A),
727 0x54B => array(0x57B),
728 0x54C => array(0x57C),
729 0x54D => array(0x57D),
730 0x54E => array(0x57E),
731 0x54F => array(0x57F),
732 0x550 => array(0x580),
733 0x551 => array(0x581),
734 0x552 => array(0x582),
735 0x553 => array(0x583),
736 0x554 => array(0x584),
737 0x555 => array(0x585),
738 0x556 => array(0x586),
739 0x587 => array(0x565, 0x582),
740 0x1E00 => array(0x1E01),
741 0x1E02 => array(0x1E03),
742 0x1E04 => array(0x1E05),
743 0x1E06 => array(0x1E07),
744 0x1E08 => array(0x1E09),
745 0x1E0A => array(0x1E0B),
746 0x1E0C => array(0x1E0D),
747 0x1E0E => array(0x1E0F),
748 0x1E10 => array(0x1E11),
749 0x1E12 => array(0x1E13),
750 0x1E14 => array(0x1E15),
751 0x1E16 => array(0x1E17),
752 0x1E18 => array(0x1E19),
753 0x1E1A => array(0x1E1B),
754 0x1E1C => array(0x1E1D),
755 0x1E1E => array(0x1E1F),
756 0x1E20 => array(0x1E21),
757 0x1E22 => array(0x1E23),
758 0x1E24 => array(0x1E25),
759 0x1E26 => array(0x1E27),
760 0x1E28 => array(0x1E29),
761 0x1E2A => array(0x1E2B),
762 0x1E2C => array(0x1E2D),
763 0x1E2E => array(0x1E2F),
764 0x1E30 => array(0x1E31),
765 0x1E32 => array(0x1E33),
766 0x1E34 => array(0x1E35),
767 0x1E36 => array(0x1E37),
768 0x1E38 => array(0x1E39),
769 0x1E3A => array(0x1E3B),
770 0x1E3C => array(0x1E3D),
771 0x1E3E => array(0x1E3F),
772 0x1E40 => array(0x1E41),
773 0x1E42 => array(0x1E43),
774 0x1E44 => array(0x1E45),
775 0x1E46 => array(0x1E47),
776 0x1E48 => array(0x1E49),
777 0x1E4A => array(0x1E4B),
778 0x1E4C => array(0x1E4D),
779 0x1E4E => array(0x1E4F),
780 0x1E50 => array(0x1E51),
781 0x1E52 => array(0x1E53),
782 0x1E54 => array(0x1E55),
783 0x1E56 => array(0x1E57),
784 0x1E58 => array(0x1E59),
785 0x1E5A => array(0x1E5B),
786 0x1E5C => array(0x1E5D),
787 0x1E5E => array(0x1E5F),
788 0x1E60 => array(0x1E61),
789 0x1E62 => array(0x1E63),
790 0x1E64 => array(0x1E65),
791 0x1E66 => array(0x1E67),
792 0x1E68 => array(0x1E69),
793 0x1E6A => array(0x1E6B),
794 0x1E6C => array(0x1E6D),
795 0x1E6E => array(0x1E6F),
796 0x1E70 => array(0x1E71),
797 0x1E72 => array(0x1E73),
798 0x1E74 => array(0x1E75),
799 0x1E76 => array(0x1E77),
800 0x1E78 => array(0x1E79),
801 0x1E7A => array(0x1E7B),
802 0x1E7C => array(0x1E7D),
803 0x1E7E => array(0x1E7F),
804 0x1E80 => array(0x1E81),
805 0x1E82 => array(0x1E83),
806 0x1E84 => array(0x1E85),
807 0x1E86 => array(0x1E87),
808 0x1E88 => array(0x1E89),
809 0x1E8A => array(0x1E8B),
810 0x1E8C => array(0x1E8D),
811 0x1E8E => array(0x1E8F),
812 0x1E90 => array(0x1E91),
813 0x1E92 => array(0x1E93),
814 0x1E94 => array(0x1E95),
815 0x1E96 => array(0x68, 0x331),
816 0x1E97 => array(0x74, 0x308),
817 0x1E98 => array(0x77, 0x30A),
818 0x1E99 => array(0x79, 0x30A),
819 0x1E9A => array(0x61, 0x2BE),
820 0x1E9B => array(0x1E61),
821 0x1EA0 => array(0x1EA1),
822 0x1EA2 => array(0x1EA3),
823 0x1EA4 => array(0x1EA5),
824 0x1EA6 => array(0x1EA7),
825 0x1EA8 => array(0x1EA9),
826 0x1EAA => array(0x1EAB),
827 0x1EAC => array(0x1EAD),
828 0x1EAE => array(0x1EAF),
829 0x1EB0 => array(0x1EB1),
830 0x1EB2 => array(0x1EB3),
831 0x1EB4 => array(0x1EB5),
832 0x1EB6 => array(0x1EB7),
833 0x1EB8 => array(0x1EB9),
834 0x1EBA => array(0x1EBB),
835 0x1EBC => array(0x1EBD),
836 0x1EBE => array(0x1EBF),
837 0x1EC0 => array(0x1EC1),
838 0x1EC2 => array(0x1EC3),
839 0x1EC4 => array(0x1EC5),
840 0x1EC6 => array(0x1EC7),
841 0x1EC8 => array(0x1EC9),
842 0x1ECA => array(0x1ECB),
843 0x1ECC => array(0x1ECD),
844 0x1ECE => array(0x1ECF),
845 0x1ED0 => array(0x1ED1),
846 0x1ED2 => array(0x1ED3),
847 0x1ED4 => array(0x1ED5),
848 0x1ED6 => array(0x1ED7),
849 0x1ED8 => array(0x1ED9),
850 0x1EDA => array(0x1EDB),
851 0x1EDC => array(0x1EDD),
852 0x1EDE => array(0x1EDF),
853 0x1EE0 => array(0x1EE1),
854 0x1EE2 => array(0x1EE3),
855 0x1EE4 => array(0x1EE5),
856 0x1EE6 => array(0x1EE7),
857 0x1EE8 => array(0x1EE9),
858 0x1EEA => array(0x1EEB),
859 0x1EEC => array(0x1EED),
860 0x1EEE => array(0x1EEF),
861 0x1EF0 => array(0x1EF1),
862 0x1EF2 => array(0x1EF3),
863 0x1EF4 => array(0x1EF5),
864 0x1EF6 => array(0x1EF7),
865 0x1EF8 => array(0x1EF9),
866 0x1F08 => array(0x1F00),
867 0x1F09 => array(0x1F01),
868 0x1F0A => array(0x1F02),
869 0x1F0B => array(0x1F03),
870 0x1F0C => array(0x1F04),
871 0x1F0D => array(0x1F05),
872 0x1F0E => array(0x1F06),
873 0x1F0F => array(0x1F07),
874 0x1F18 => array(0x1F10),
875 0x1F19 => array(0x1F11),
876 0x1F1A => array(0x1F12),
877 0x1F1B => array(0x1F13),
878 0x1F1C => array(0x1F14),
879 0x1F1D => array(0x1F15),
880 0x1F28 => array(0x1F20),
881 0x1F29 => array(0x1F21),
882 0x1F2A => array(0x1F22),
883 0x1F2B => array(0x1F23),
884 0x1F2C => array(0x1F24),
885 0x1F2D => array(0x1F25),
886 0x1F2E => array(0x1F26),
887 0x1F2F => array(0x1F27),
888 0x1F38 => array(0x1F30),
889 0x1F39 => array(0x1F31),
890 0x1F3A => array(0x1F32),
891 0x1F3B => array(0x1F33),
892 0x1F3C => array(0x1F34),
893 0x1F3D => array(0x1F35),
894 0x1F3E => array(0x1F36),
895 0x1F3F => array(0x1F37),
896 0x1F48 => array(0x1F40),
897 0x1F49 => array(0x1F41),
898 0x1F4A => array(0x1F42),
899 0x1F4B => array(0x1F43),
900 0x1F4C => array(0x1F44),
901 0x1F4D => array(0x1F45),
902 0x1F50 => array(0x3C5, 0x313),
903 0x1F52 => array(0x3C5, 0x313, 0x300),
904 0x1F54 => array(0x3C5, 0x313, 0x301),
905 0x1F56 => array(0x3C5, 0x313, 0x342),
906 0x1F59 => array(0x1F51),
907 0x1F5B => array(0x1F53),
908 0x1F5D => array(0x1F55),
909 0x1F5F => array(0x1F57),
910 0x1F68 => array(0x1F60),
911 0x1F69 => array(0x1F61),
912 0x1F6A => array(0x1F62),
913 0x1F6B => array(0x1F63),
914 0x1F6C => array(0x1F64),
915 0x1F6D => array(0x1F65),
916 0x1F6E => array(0x1F66),
917 0x1F6F => array(0x1F67),
918 0x1F80 => array(0x1F00, 0x3B9),
919 0x1F81 => array(0x1F01, 0x3B9),
920 0x1F82 => array(0x1F02, 0x3B9),
921 0x1F83 => array(0x1F03, 0x3B9),
922 0x1F84 => array(0x1F04, 0x3B9),
923 0x1F85 => array(0x1F05, 0x3B9),
924 0x1F86 => array(0x1F06, 0x3B9),
925 0x1F87 => array(0x1F07, 0x3B9),
926 0x1F88 => array(0x1F00, 0x3B9),
927 0x1F89 => array(0x1F01, 0x3B9),
928 0x1F8A => array(0x1F02, 0x3B9),
929 0x1F8B => array(0x1F03, 0x3B9),
930 0x1F8C => array(0x1F04, 0x3B9),
931 0x1F8D => array(0x1F05, 0x3B9),
932 0x1F8E => array(0x1F06, 0x3B9),
933 0x1F8F => array(0x1F07, 0x3B9),
934 0x1F90 => array(0x1F20, 0x3B9),
935 0x1F91 => array(0x1F21, 0x3B9),
936 0x1F92 => array(0x1F22, 0x3B9),
937 0x1F93 => array(0x1F23, 0x3B9),
938 0x1F94 => array(0x1F24, 0x3B9),
939 0x1F95 => array(0x1F25, 0x3B9),
940 0x1F96 => array(0x1F26, 0x3B9),
941 0x1F97 => array(0x1F27, 0x3B9),
942 0x1F98 => array(0x1F20, 0x3B9),
943 0x1F99 => array(0x1F21, 0x3B9),
944 0x1F9A => array(0x1F22, 0x3B9),
945 0x1F9B => array(0x1F23, 0x3B9),
946 0x1F9C => array(0x1F24, 0x3B9),
947 0x1F9D => array(0x1F25, 0x3B9),
948 0x1F9E => array(0x1F26, 0x3B9),
949 0x1F9F => array(0x1F27, 0x3B9),
950 0x1FA0 => array(0x1F60, 0x3B9),
951 0x1FA1 => array(0x1F61, 0x3B9),
952 0x1FA2 => array(0x1F62, 0x3B9),
953 0x1FA3 => array(0x1F63, 0x3B9),
954 0x1FA4 => array(0x1F64, 0x3B9),
955 0x1FA5 => array(0x1F65, 0x3B9),
956 0x1FA6 => array(0x1F66, 0x3B9),
957 0x1FA7 => array(0x1F67, 0x3B9),
958 0x1FA8 => array(0x1F60, 0x3B9),
959 0x1FA9 => array(0x1F61, 0x3B9),
960 0x1FAA => array(0x1F62, 0x3B9),
961 0x1FAB => array(0x1F63, 0x3B9),
962 0x1FAC => array(0x1F64, 0x3B9),
963 0x1FAD => array(0x1F65, 0x3B9),
964 0x1FAE => array(0x1F66, 0x3B9),
965 0x1FAF => array(0x1F67, 0x3B9),
966 0x1FB2 => array(0x1F70, 0x3B9),
967 0x1FB3 => array(0x3B1, 0x3B9),
968 0x1FB4 => array(0x3AC, 0x3B9),
969 0x1FB6 => array(0x3B1, 0x342),
970 0x1FB7 => array(0x3B1, 0x342, 0x3B9),
971 0x1FB8 => array(0x1FB0),
972 0x1FB9 => array(0x1FB1),
973 0x1FBA => array(0x1F70),
974 0x1FBB => array(0x1F71),
975 0x1FBC => array(0x3B1, 0x3B9),
976 0x1FBE => array(0x3B9),
977 0x1FC2 => array(0x1F74, 0x3B9),
978 0x1FC3 => array(0x3B7, 0x3B9),
979 0x1FC4 => array(0x3AE, 0x3B9),
980 0x1FC6 => array(0x3B7, 0x342),
981 0x1FC7 => array(0x3B7, 0x342, 0x3B9),
982 0x1FC8 => array(0x1F72),
983 0x1FC9 => array(0x1F73),
984 0x1FCA => array(0x1F74),
985 0x1FCB => array(0x1F75),
986 0x1FCC => array(0x3B7, 0x3B9),
987 0x1FD2 => array(0x3B9, 0x308, 0x300),
988 0x1FD3 => array(0x3B9, 0x308, 0x301),
989 0x1FD6 => array(0x3B9, 0x342),
990 0x1FD7 => array(0x3B9, 0x308, 0x342),
991 0x1FD8 => array(0x1FD0),
992 0x1FD9 => array(0x1FD1),
993 0x1FDA => array(0x1F76),
994 0x1FDB => array(0x1F77),
995 0x1FE2 => array(0x3C5, 0x308, 0x300),
996 0x1FE3 => array(0x3C5, 0x308, 0x301),
997 0x1FE4 => array(0x3C1, 0x313),
998 0x1FE6 => array(0x3C5, 0x342),
999 0x1FE7 => array(0x3C5, 0x308, 0x342),
1000 0x1FE8 => array(0x1FE0),
1001 0x1FE9 => array(0x1FE1),
1002 0x1FEA => array(0x1F7A),
1003 0x1FEB => array(0x1F7B),
1004 0x1FEC => array(0x1FE5),
1005 0x1FF2 => array(0x1F7C, 0x3B9),
1006 0x1FF3 => array(0x3C9, 0x3B9),
1007 0x1FF4 => array(0x3CE, 0x3B9),
1008 0x1FF6 => array(0x3C9, 0x342),
1009 0x1FF7 => array(0x3C9, 0x342, 0x3B9),
1010 0x1FF8 => array(0x1F78),
1011 0x1FF9 => array(0x1F79),
1012 0x1FFA => array(0x1F7C),
1013 0x1FFB => array(0x1F7D),
1014 0x1FFC => array(0x3C9, 0x3B9),
1015 0x20A8 => array(0x72, 0x73),
1016 0x2102 => array(0x63),
1017 0x2103 => array(0xB0, 0x63),
1018 0x2107 => array(0x25B),
1019 0x2109 => array(0xB0, 0x66),
1020 0x210B => array(0x68),
1021 0x210C => array(0x68),
1022 0x210D => array(0x68),
1023 0x2110 => array(0x69),
1024 0x2111 => array(0x69),
1025 0x2112 => array(0x6C),
1026 0x2115 => array(0x6E),
1027 0x2116 => array(0x6E, 0x6F),
1028 0x2119 => array(0x70),
1029 0x211A => array(0x71),
1030 0x211B => array(0x72),
1031 0x211C => array(0x72),
1032 0x211D => array(0x72),
1033 0x2120 => array(0x73, 0x6D),
1034 0x2121 => array(0x74, 0x65, 0x6C),
1035 0x2122 => array(0x74, 0x6D),
1036 0x2124 => array(0x7A),
1037 0x2126 => array(0x3C9),
1038 0x2128 => array(0x7A),
1039 0x212A => array(0x6B),
1040 0x212B => array(0xE5),
1041 0x212C => array(0x62),
1042 0x212D => array(0x63),
1043 0x2130 => array(0x65),
1044 0x2131 => array(0x66),
1045 0x2133 => array(0x6D),
1046 0x213E => array(0x3B3),
1047 0x213F => array(0x3C0),
1048 0x2145 => array(0x64),
1049 0x2160 => array(0x2170),
1050 0x2161 => array(0x2171),
1051 0x2162 => array(0x2172),
1052 0x2163 => array(0x2173),
1053 0x2164 => array(0x2174),
1054 0x2165 => array(0x2175),
1055 0x2166 => array(0x2176),
1056 0x2167 => array(0x2177),
1057 0x2168 => array(0x2178),
1058 0x2169 => array(0x2179),
1059 0x216A => array(0x217A),
1060 0x216B => array(0x217B),
1061 0x216C => array(0x217C),
1062 0x216D => array(0x217D),
1063 0x216E => array(0x217E),
1064 0x216F => array(0x217F),
1065 0x24B6 => array(0x24D0),
1066 0x24B7 => array(0x24D1),
1067 0x24B8 => array(0x24D2),
1068 0x24B9 => array(0x24D3),
1069 0x24BA => array(0x24D4),
1070 0x24BB => array(0x24D5),
1071 0x24BC => array(0x24D6),
1072 0x24BD => array(0x24D7),
1073 0x24BE => array(0x24D8),
1074 0x24BF => array(0x24D9),
1075 0x24C0 => array(0x24DA),
1076 0x24C1 => array(0x24DB),
1077 0x24C2 => array(0x24DC),
1078 0x24C3 => array(0x24DD),
1079 0x24C4 => array(0x24DE),
1080 0x24C5 => array(0x24DF),
1081 0x24C6 => array(0x24E0),
1082 0x24C7 => array(0x24E1),
1083 0x24C8 => array(0x24E2),
1084 0x24C9 => array(0x24E3),
1085 0x24CA => array(0x24E4),
1086 0x24CB => array(0x24E5),
1087 0x24CC => array(0x24E6),
1088 0x24CD => array(0x24E7),
1089 0x24CE => array(0x24E8),
1090 0x24CF => array(0x24E9),
1091 0x3371 => array(0x68, 0x70, 0x61),
1092 0x3373 => array(0x61, 0x75),
1093 0x3375 => array(0x6F, 0x76),
1094 0x3380 => array(0x70, 0x61),
1095 0x3381 => array(0x6E, 0x61),
1096 0x3382 => array(0x3BC, 0x61),
1097 0x3383 => array(0x6D, 0x61),
1098 0x3384 => array(0x6B, 0x61),
1099 0x3385 => array(0x6B, 0x62),
1100 0x3386 => array(0x6D, 0x62),
1101 0x3387 => array(0x67, 0x62),
1102 0x338A => array(0x70, 0x66),
1103 0x338B => array(0x6E, 0x66),
1104 0x338C => array(0x3BC, 0x66),
1105 0x3390 => array(0x68, 0x7A),
1106 0x3391 => array(0x6B, 0x68, 0x7A),
1107 0x3392 => array(0x6D, 0x68, 0x7A),
1108 0x3393 => array(0x67, 0x68, 0x7A),
1109 0x3394 => array(0x74, 0x68, 0x7A),
1110 0x33A9 => array(0x70, 0x61),
1111 0x33AA => array(0x6B, 0x70, 0x61),
1112 0x33AB => array(0x6D, 0x70, 0x61),
1113 0x33AC => array(0x67, 0x70, 0x61),
1114 0x33B4 => array(0x70, 0x76),
1115 0x33B5 => array(0x6E, 0x76),
1116 0x33B6 => array(0x3BC, 0x76),
1117 0x33B7 => array(0x6D, 0x76),
1118 0x33B8 => array(0x6B, 0x76),
1119 0x33B9 => array(0x6D, 0x76),
1120 0x33BA => array(0x70, 0x77),
1121 0x33BB => array(0x6E, 0x77),
1122 0x33BC => array(0x3BC, 0x77),
1123 0x33BD => array(0x6D, 0x77),
1124 0x33BE => array(0x6B, 0x77),
1125 0x33BF => array(0x6D, 0x77),
1126 0x33C0 => array(0x6B, 0x3C9),
1127 0x33C1 => array(0x6D, 0x3C9), /*
1128 0x33C2 => array(0x61, 0x2E, 0x6D, 0x2E), */
1129 0x33C3 => array(0x62, 0x71),
1130 0x33C6 => array(0x63, 0x2215, 0x6B, 0x67),
1131 0x33C7 => array(0x63, 0x6F, 0x2E),
1132 0x33C8 => array(0x64, 0x62),
1133 0x33C9 => array(0x67, 0x79),
1134 0x33CB => array(0x68, 0x70),
1135 0x33CD => array(0x6B, 0x6B),
1136 0x33CE => array(0x6B, 0x6D),
1137 0x33D7 => array(0x70, 0x68),
1138 0x33D9 => array(0x70, 0x70, 0x6D),
1139 0x33DA => array(0x70, 0x72),
1140 0x33DC => array(0x73, 0x76),
1141 0x33DD => array(0x77, 0x62),
1142 0xFB00 => array(0x66, 0x66),
1143 0xFB01 => array(0x66, 0x69),
1144 0xFB02 => array(0x66, 0x6C),
1145 0xFB03 => array(0x66, 0x66, 0x69),
1146 0xFB04 => array(0x66, 0x66, 0x6C),
1147 0xFB05 => array(0x73, 0x74),
1148 0xFB06 => array(0x73, 0x74),
1149 0xFB13 => array(0x574, 0x576),
1150 0xFB14 => array(0x574, 0x565),
1151 0xFB15 => array(0x574, 0x56B),
1152 0xFB16 => array(0x57E, 0x576),
1153 0xFB17 => array(0x574, 0x56D),
1154 0xFF21 => array(0xFF41),
1155 0xFF22 => array(0xFF42),
1156 0xFF23 => array(0xFF43),
1157 0xFF24 => array(0xFF44),
1158 0xFF25 => array(0xFF45),
1159 0xFF26 => array(0xFF46),
1160 0xFF27 => array(0xFF47),
1161 0xFF28 => array(0xFF48),
1162 0xFF29 => array(0xFF49),
1163 0xFF2A => array(0xFF4A),
1164 0xFF2B => array(0xFF4B),
1165 0xFF2C => array(0xFF4C),
1166 0xFF2D => array(0xFF4D),
1167 0xFF2E => array(0xFF4E),
1168 0xFF2F => array(0xFF4F),
1169 0xFF30 => array(0xFF50),
1170 0xFF31 => array(0xFF51),
1171 0xFF32 => array(0xFF52),
1172 0xFF33 => array(0xFF53),
1173 0xFF34 => array(0xFF54),
1174 0xFF35 => array(0xFF55),
1175 0xFF36 => array(0xFF56),
1176 0xFF37 => array(0xFF57),
1177 0xFF38 => array(0xFF58),
1178 0xFF39 => array(0xFF59),
1179 0xFF3A => array(0xFF5A),
1180 0x10400 => array(0x10428),
1181 0x10401 => array(0x10429),
1182 0x10402 => array(0x1042A),
1183 0x10403 => array(0x1042B),
1184 0x10404 => array(0x1042C),
1185 0x10405 => array(0x1042D),
1186 0x10406 => array(0x1042E),
1187 0x10407 => array(0x1042F),
1188 0x10408 => array(0x10430),
1189 0x10409 => array(0x10431),
1190 0x1040A => array(0x10432),
1191 0x1040B => array(0x10433),
1192 0x1040C => array(0x10434),
1193 0x1040D => array(0x10435),
1194 0x1040E => array(0x10436),
1195 0x1040F => array(0x10437),
1196 0x10410 => array(0x10438),
1197 0x10411 => array(0x10439),
1198 0x10412 => array(0x1043A),
1199 0x10413 => array(0x1043B),
1200 0x10414 => array(0x1043C),
1201 0x10415 => array(0x1043D),
1202 0x10416 => array(0x1043E),
1203 0x10417 => array(0x1043F),
1204 0x10418 => array(0x10440),
1205 0x10419 => array(0x10441),
1206 0x1041A => array(0x10442),
1207 0x1041B => array(0x10443),
1208 0x1041C => array(0x10444),
1209 0x1041D => array(0x10445),
1210 0x1041E => array(0x10446),
1211 0x1041F => array(0x10447),
1212 0x10420 => array(0x10448),
1213 0x10421 => array(0x10449),
1214 0x10422 => array(0x1044A),
1215 0x10423 => array(0x1044B),
1216 0x10424 => array(0x1044C),
1217 0x10425 => array(0x1044D),
1218 0x1D400 => array(0x61),
1219 0x1D401 => array(0x62),
1220 0x1D402 => array(0x63),
1221 0x1D403 => array(0x64),
1222 0x1D404 => array(0x65),
1223 0x1D405 => array(0x66),
1224 0x1D406 => array(0x67),
1225 0x1D407 => array(0x68),
1226 0x1D408 => array(0x69),
1227 0x1D409 => array(0x6A),
1228 0x1D40A => array(0x6B),
1229 0x1D40B => array(0x6C),
1230 0x1D40C => array(0x6D),
1231 0x1D40D => array(0x6E),
1232 0x1D40E => array(0x6F),
1233 0x1D40F => array(0x70),
1234 0x1D410 => array(0x71),
1235 0x1D411 => array(0x72),
1236 0x1D412 => array(0x73),
1237 0x1D413 => array(0x74),
1238 0x1D414 => array(0x75),
1239 0x1D415 => array(0x76),
1240 0x1D416 => array(0x77),
1241 0x1D417 => array(0x78),
1242 0x1D418 => array(0x79),
1243 0x1D419 => array(0x7A),
1244 0x1D434 => array(0x61),
1245 0x1D435 => array(0x62),
1246 0x1D436 => array(0x63),
1247 0x1D437 => array(0x64),
1248 0x1D438 => array(0x65),
1249 0x1D439 => array(0x66),
1250 0x1D43A => array(0x67),
1251 0x1D43B => array(0x68),
1252 0x1D43C => array(0x69),
1253 0x1D43D => array(0x6A),
1254 0x1D43E => array(0x6B),
1255 0x1D43F => array(0x6C),
1256 0x1D440 => array(0x6D),
1257 0x1D441 => array(0x6E),
1258 0x1D442 => array(0x6F),
1259 0x1D443 => array(0x70),
1260 0x1D444 => array(0x71),
1261 0x1D445 => array(0x72),
1262 0x1D446 => array(0x73),
1263 0x1D447 => array(0x74),
1264 0x1D448 => array(0x75),
1265 0x1D449 => array(0x76),
1266 0x1D44A => array(0x77),
1267 0x1D44B => array(0x78),
1268 0x1D44C => array(0x79),
1269 0x1D44D => array(0x7A),
1270 0x1D468 => array(0x61),
1271 0x1D469 => array(0x62),
1272 0x1D46A => array(0x63),
1273 0x1D46B => array(0x64),
1274 0x1D46C => array(0x65),
1275 0x1D46D => array(0x66),
1276 0x1D46E => array(0x67),
1277 0x1D46F => array(0x68),
1278 0x1D470 => array(0x69),
1279 0x1D471 => array(0x6A),
1280 0x1D472 => array(0x6B),
1281 0x1D473 => array(0x6C),
1282 0x1D474 => array(0x6D),
1283 0x1D475 => array(0x6E),
1284 0x1D476 => array(0x6F),
1285 0x1D477 => array(0x70),
1286 0x1D478 => array(0x71),
1287 0x1D479 => array(0x72),
1288 0x1D47A => array(0x73),
1289 0x1D47B => array(0x74),
1290 0x1D47C => array(0x75),
1291 0x1D47D => array(0x76),
1292 0x1D47E => array(0x77),
1293 0x1D47F => array(0x78),
1294 0x1D480 => array(0x79),
1295 0x1D481 => array(0x7A),
1296 0x1D49C => array(0x61),
1297 0x1D49E => array(0x63),
1298 0x1D49F => array(0x64),
1299 0x1D4A2 => array(0x67),
1300 0x1D4A5 => array(0x6A),
1301 0x1D4A6 => array(0x6B),
1302 0x1D4A9 => array(0x6E),
1303 0x1D4AA => array(0x6F),
1304 0x1D4AB => array(0x70),
1305 0x1D4AC => array(0x71),
1306 0x1D4AE => array(0x73),
1307 0x1D4AF => array(0x74),
1308 0x1D4B0 => array(0x75),
1309 0x1D4B1 => array(0x76),
1310 0x1D4B2 => array(0x77),
1311 0x1D4B3 => array(0x78),
1312 0x1D4B4 => array(0x79),
1313 0x1D4B5 => array(0x7A),
1314 0x1D4D0 => array(0x61),
1315 0x1D4D1 => array(0x62),
1316 0x1D4D2 => array(0x63),
1317 0x1D4D3 => array(0x64),
1318 0x1D4D4 => array(0x65),
1319 0x1D4D5 => array(0x66),
1320 0x1D4D6 => array(0x67),
1321 0x1D4D7 => array(0x68),
1322 0x1D4D8 => array(0x69),
1323 0x1D4D9 => array(0x6A),
1324 0x1D4DA => array(0x6B),
1325 0x1D4DB => array(0x6C),
1326 0x1D4DC => array(0x6D),
1327 0x1D4DD => array(0x6E),
1328 0x1D4DE => array(0x6F),
1329 0x1D4DF => array(0x70),
1330 0x1D4E0 => array(0x71),
1331 0x1D4E1 => array(0x72),
1332 0x1D4E2 => array(0x73),
1333 0x1D4E3 => array(0x74),
1334 0x1D4E4 => array(0x75),
1335 0x1D4E5 => array(0x76),
1336 0x1D4E6 => array(0x77),
1337 0x1D4E7 => array(0x78),
1338 0x1D4E8 => array(0x79),
1339 0x1D4E9 => array(0x7A),
1340 0x1D504 => array(0x61),
1341 0x1D505 => array(0x62),
1342 0x1D507 => array(0x64),
1343 0x1D508 => array(0x65),
1344 0x1D509 => array(0x66),
1345 0x1D50A => array(0x67),
1346 0x1D50D => array(0x6A),
1347 0x1D50E => array(0x6B),
1348 0x1D50F => array(0x6C),
1349 0x1D510 => array(0x6D),
1350 0x1D511 => array(0x6E),
1351 0x1D512 => array(0x6F),
1352 0x1D513 => array(0x70),
1353 0x1D514 => array(0x71),
1354 0x1D516 => array(0x73),
1355 0x1D517 => array(0x74),
1356 0x1D518 => array(0x75),
1357 0x1D519 => array(0x76),
1358 0x1D51A => array(0x77),
1359 0x1D51B => array(0x78),
1360 0x1D51C => array(0x79),
1361 0x1D538 => array(0x61),
1362 0x1D539 => array(0x62),
1363 0x1D53B => array(0x64),
1364 0x1D53C => array(0x65),
1365 0x1D53D => array(0x66),
1366 0x1D53E => array(0x67),
1367 0x1D540 => array(0x69),
1368 0x1D541 => array(0x6A),
1369 0x1D542 => array(0x6B),
1370 0x1D543 => array(0x6C),
1371 0x1D544 => array(0x6D),
1372 0x1D546 => array(0x6F),
1373 0x1D54A => array(0x73),
1374 0x1D54B => array(0x74),
1375 0x1D54C => array(0x75),
1376 0x1D54D => array(0x76),
1377 0x1D54E => array(0x77),
1378 0x1D54F => array(0x78),
1379 0x1D550 => array(0x79),
1380 0x1D56C => array(0x61),
1381 0x1D56D => array(0x62),
1382 0x1D56E => array(0x63),
1383 0x1D56F => array(0x64),
1384 0x1D570 => array(0x65),
1385 0x1D571 => array(0x66),
1386 0x1D572 => array(0x67),
1387 0x1D573 => array(0x68),
1388 0x1D574 => array(0x69),
1389 0x1D575 => array(0x6A),
1390 0x1D576 => array(0x6B),
1391 0x1D577 => array(0x6C),
1392 0x1D578 => array(0x6D),
1393 0x1D579 => array(0x6E),
1394 0x1D57A => array(0x6F),
1395 0x1D57B => array(0x70),
1396 0x1D57C => array(0x71),
1397 0x1D57D => array(0x72),
1398 0x1D57E => array(0x73),
1399 0x1D57F => array(0x74),
1400 0x1D580 => array(0x75),
1401 0x1D581 => array(0x76),
1402 0x1D582 => array(0x77),
1403 0x1D583 => array(0x78),
1404 0x1D584 => array(0x79),
1405 0x1D585 => array(0x7A),
1406 0x1D5A0 => array(0x61),
1407 0x1D5A1 => array(0x62),
1408 0x1D5A2 => array(0x63),
1409 0x1D5A3 => array(0x64),
1410 0x1D5A4 => array(0x65),
1411 0x1D5A5 => array(0x66),
1412 0x1D5A6 => array(0x67),
1413 0x1D5A7 => array(0x68),
1414 0x1D5A8 => array(0x69),
1415 0x1D5A9 => array(0x6A),
1416 0x1D5AA => array(0x6B),
1417 0x1D5AB => array(0x6C),
1418 0x1D5AC => array(0x6D),
1419 0x1D5AD => array(0x6E),
1420 0x1D5AE => array(0x6F),
1421 0x1D5AF => array(0x70),
1422 0x1D5B0 => array(0x71),
1423 0x1D5B1 => array(0x72),
1424 0x1D5B2 => array(0x73),
1425 0x1D5B3 => array(0x74),
1426 0x1D5B4 => array(0x75),
1427 0x1D5B5 => array(0x76),
1428 0x1D5B6 => array(0x77),
1429 0x1D5B7 => array(0x78),
1430 0x1D5B8 => array(0x79),
1431 0x1D5B9 => array(0x7A),
1432 0x1D5D4 => array(0x61),
1433 0x1D5D5 => array(0x62),
1434 0x1D5D6 => array(0x63),
1435 0x1D5D7 => array(0x64),
1436 0x1D5D8 => array(0x65),
1437 0x1D5D9 => array(0x66),
1438 0x1D5DA => array(0x67),
1439 0x1D5DB => array(0x68),
1440 0x1D5DC => array(0x69),
1441 0x1D5DD => array(0x6A),
1442 0x1D5DE => array(0x6B),
1443 0x1D5DF => array(0x6C),
1444 0x1D5E0 => array(0x6D),
1445 0x1D5E1 => array(0x6E),
1446 0x1D5E2 => array(0x6F),
1447 0x1D5E3 => array(0x70),
1448 0x1D5E4 => array(0x71),
1449 0x1D5E5 => array(0x72),
1450 0x1D5E6 => array(0x73),
1451 0x1D5E7 => array(0x74),
1452 0x1D5E8 => array(0x75),
1453 0x1D5E9 => array(0x76),
1454 0x1D5EA => array(0x77),
1455 0x1D5EB => array(0x78),
1456 0x1D5EC => array(0x79),
1457 0x1D5ED => array(0x7A),
1458 0x1D608 => array(0x61),
1459 0x1D609 => array(0x62),
1460 0x1D60A => array(0x63),
1461 0x1D60B => array(0x64),
1462 0x1D60C => array(0x65),
1463 0x1D60D => array(0x66),
1464 0x1D60E => array(0x67),
1465 0x1D60F => array(0x68),
1466 0x1D610 => array(0x69),
1467 0x1D611 => array(0x6A),
1468 0x1D612 => array(0x6B),
1469 0x1D613 => array(0x6C),
1470 0x1D614 => array(0x6D),
1471 0x1D615 => array(0x6E),
1472 0x1D616 => array(0x6F),
1473 0x1D617 => array(0x70),
1474 0x1D618 => array(0x71),
1475 0x1D619 => array(0x72),
1476 0x1D61A => array(0x73),
1477 0x1D61B => array(0x74),
1478 0x1D61C => array(0x75),
1479 0x1D61D => array(0x76),
1480 0x1D61E => array(0x77),
1481 0x1D61F => array(0x78),
1482 0x1D620 => array(0x79),
1483 0x1D621 => array(0x7A),
1484 0x1D63C => array(0x61),
1485 0x1D63D => array(0x62),
1486 0x1D63E => array(0x63),
1487 0x1D63F => array(0x64),
1488 0x1D640 => array(0x65),
1489 0x1D641 => array(0x66),
1490 0x1D642 => array(0x67),
1491 0x1D643 => array(0x68),
1492 0x1D644 => array(0x69),
1493 0x1D645 => array(0x6A),
1494 0x1D646 => array(0x6B),
1495 0x1D647 => array(0x6C),
1496 0x1D648 => array(0x6D),
1497 0x1D649 => array(0x6E),
1498 0x1D64A => array(0x6F),
1499 0x1D64B => array(0x70),
1500 0x1D64C => array(0x71),
1501 0x1D64D => array(0x72),
1502 0x1D64E => array(0x73),
1503 0x1D64F => array(0x74),
1504 0x1D650 => array(0x75),
1505 0x1D651 => array(0x76),
1506 0x1D652 => array(0x77),
1507 0x1D653 => array(0x78),
1508 0x1D654 => array(0x79),
1509 0x1D655 => array(0x7A),
1510 0x1D670 => array(0x61),
1511 0x1D671 => array(0x62),
1512 0x1D672 => array(0x63),
1513 0x1D673 => array(0x64),
1514 0x1D674 => array(0x65),
1515 0x1D675 => array(0x66),
1516 0x1D676 => array(0x67),
1517 0x1D677 => array(0x68),
1518 0x1D678 => array(0x69),
1519 0x1D679 => array(0x6A),
1520 0x1D67A => array(0x6B),
1521 0x1D67B => array(0x6C),
1522 0x1D67C => array(0x6D),
1523 0x1D67D => array(0x6E),
1524 0x1D67E => array(0x6F),
1525 0x1D67F => array(0x70),
1526 0x1D680 => array(0x71),
1527 0x1D681 => array(0x72),
1528 0x1D682 => array(0x73),
1529 0x1D683 => array(0x74),
1530 0x1D684 => array(0x75),
1531 0x1D685 => array(0x76),
1532 0x1D686 => array(0x77),
1533 0x1D687 => array(0x78),
1534 0x1D688 => array(0x79),
1535 0x1D689 => array(0x7A),
1536 0x1D6A8 => array(0x3B1),
1537 0x1D6A9 => array(0x3B2),
1538 0x1D6AA => array(0x3B3),
1539 0x1D6AB => array(0x3B4),
1540 0x1D6AC => array(0x3B5),
1541 0x1D6AD => array(0x3B6),
1542 0x1D6AE => array(0x3B7),
1543 0x1D6AF => array(0x3B8),
1544 0x1D6B0 => array(0x3B9),
1545 0x1D6B1 => array(0x3BA),
1546 0x1D6B2 => array(0x3BB),
1547 0x1D6B3 => array(0x3BC),
1548 0x1D6B4 => array(0x3BD),
1549 0x1D6B5 => array(0x3BE),
1550 0x1D6B6 => array(0x3BF),
1551 0x1D6B7 => array(0x3C0),
1552 0x1D6B8 => array(0x3C1),
1553 0x1D6B9 => array(0x3B8),
1554 0x1D6BA => array(0x3C3),
1555 0x1D6BB => array(0x3C4),
1556 0x1D6BC => array(0x3C5),
1557 0x1D6BD => array(0x3C6),
1558 0x1D6BE => array(0x3C7),
1559 0x1D6BF => array(0x3C8),
1560 0x1D6C0 => array(0x3C9),
1561 0x1D6D3 => array(0x3C3),
1562 0x1D6E2 => array(0x3B1),
1563 0x1D6E3 => array(0x3B2),
1564 0x1D6E4 => array(0x3B3),
1565 0x1D6E5 => array(0x3B4),
1566 0x1D6E6 => array(0x3B5),
1567 0x1D6E7 => array(0x3B6),
1568 0x1D6E8 => array(0x3B7),
1569 0x1D6E9 => array(0x3B8),
1570 0x1D6EA => array(0x3B9),
1571 0x1D6EB => array(0x3BA),
1572 0x1D6EC => array(0x3BB),
1573 0x1D6ED => array(0x3BC),
1574 0x1D6EE => array(0x3BD),
1575 0x1D6EF => array(0x3BE),
1576 0x1D6F0 => array(0x3BF),
1577 0x1D6F1 => array(0x3C0),
1578 0x1D6F2 => array(0x3C1),
1579 0x1D6F3 => array(0x3B8),
1580 0x1D6F4 => array(0x3C3),
1581 0x1D6F5 => array(0x3C4),
1582 0x1D6F6 => array(0x3C5),
1583 0x1D6F7 => array(0x3C6),
1584 0x1D6F8 => array(0x3C7),
1585 0x1D6F9 => array(0x3C8),
1586 0x1D6FA => array(0x3C9),
1587 0x1D70D => array(0x3C3),
1588 0x1D71C => array(0x3B1),
1589 0x1D71D => array(0x3B2),
1590 0x1D71E => array(0x3B3),
1591 0x1D71F => array(0x3B4),
1592 0x1D720 => array(0x3B5),
1593 0x1D721 => array(0x3B6),
1594 0x1D722 => array(0x3B7),
1595 0x1D723 => array(0x3B8),
1596 0x1D724 => array(0x3B9),
1597 0x1D725 => array(0x3BA),
1598 0x1D726 => array(0x3BB),
1599 0x1D727 => array(0x3BC),
1600 0x1D728 => array(0x3BD),
1601 0x1D729 => array(0x3BE),
1602 0x1D72A => array(0x3BF),
1603 0x1D72B => array(0x3C0),
1604 0x1D72C => array(0x3C1),
1605 0x1D72D => array(0x3B8),
1606 0x1D72E => array(0x3C3),
1607 0x1D72F => array(0x3C4),
1608 0x1D730 => array(0x3C5),
1609 0x1D731 => array(0x3C6),
1610 0x1D732 => array(0x3C7),
1611 0x1D733 => array(0x3C8),
1612 0x1D734 => array(0x3C9),
1613 0x1D747 => array(0x3C3),
1614 0x1D756 => array(0x3B1),
1615 0x1D757 => array(0x3B2),
1616 0x1D758 => array(0x3B3),
1617 0x1D759 => array(0x3B4),
1618 0x1D75A => array(0x3B5),
1619 0x1D75B => array(0x3B6),
1620 0x1D75C => array(0x3B7),
1621 0x1D75D => array(0x3B8),
1622 0x1D75E => array(0x3B9),
1623 0x1D75F => array(0x3BA),
1624 0x1D760 => array(0x3BB),
1625 0x1D761 => array(0x3BC),
1626 0x1D762 => array(0x3BD),
1627 0x1D763 => array(0x3BE),
1628 0x1D764 => array(0x3BF),
1629 0x1D765 => array(0x3C0),
1630 0x1D766 => array(0x3C1),
1631 0x1D767 => array(0x3B8),
1632 0x1D768 => array(0x3C3),
1633 0x1D769 => array(0x3C4),
1634 0x1D76A => array(0x3C5),
1635 0x1D76B => array(0x3C6),
1636 0x1D76C => array(0x3C7),
1637 0x1D76D => array(0x3C8),
1638 0x1D76E => array(0x3C9),
1639 0x1D781 => array(0x3C3),
1640 0x1D790 => array(0x3B1),
1641 0x1D791 => array(0x3B2),
1642 0x1D792 => array(0x3B3),
1643 0x1D793 => array(0x3B4),
1644 0x1D794 => array(0x3B5),
1645 0x1D795 => array(0x3B6),
1646 0x1D796 => array(0x3B7),
1647 0x1D797 => array(0x3B8),
1648 0x1D798 => array(0x3B9),
1649 0x1D799 => array(0x3BA),
1650 0x1D79A => array(0x3BB),
1651 0x1D79B => array(0x3BC),
1652 0x1D79C => array(0x3BD),
1653 0x1D79D => array(0x3BE),
1654 0x1D79E => array(0x3BF),
1655 0x1D79F => array(0x3C0),
1656 0x1D7A0 => array(0x3C1),
1657 0x1D7A1 => array(0x3B8),
1658 0x1D7A2 => array(0x3C3),
1659 0x1D7A3 => array(0x3C4),
1660 0x1D7A4 => array(0x3C5),
1661 0x1D7A5 => array(0x3C6),
1662 0x1D7A6 => array(0x3C7),
1663 0x1D7A7 => array(0x3C8),
1664 0x1D7A8 => array(0x3C9),
1665 0x1D7BB => array(0x3C3),
1666 0x3F9 => array(0x3C3),
1667 0x1D2C => array(0x61),
1668 0x1D2D => array(0xE6),
1669 0x1D2E => array(0x62),
1670 0x1D30 => array(0x64),
1671 0x1D31 => array(0x65),
1672 0x1D32 => array(0x1DD),
1673 0x1D33 => array(0x67),
1674 0x1D34 => array(0x68),
1675 0x1D35 => array(0x69),
1676 0x1D36 => array(0x6A),
1677 0x1D37 => array(0x6B),
1678 0x1D38 => array(0x6C),
1679 0x1D39 => array(0x6D),
1680 0x1D3A => array(0x6E),
1681 0x1D3C => array(0x6F),
1682 0x1D3D => array(0x223),
1683 0x1D3E => array(0x70),
1684 0x1D3F => array(0x72),
1685 0x1D40 => array(0x74),
1686 0x1D41 => array(0x75),
1687 0x1D42 => array(0x77),
1688 0x213B => array(0x66, 0x61, 0x78),
1689 0x3250 => array(0x70, 0x74, 0x65),
1690 0x32CC => array(0x68, 0x67),
1691 0x32CE => array(0x65, 0x76),
1692 0x32CF => array(0x6C, 0x74, 0x64),
1693 0x337A => array(0x69, 0x75),
1694 0x33DE => array(0x76, 0x2215, 0x6D),
1695 0x33DF => array(0x61, 0x2215, 0x6D)
1699 * Normalization Combining Classes; Code Points not listed
1700 * got Combining Class 0.
1706 private static $_np_norm_combcls = array(
2056 private $_punycode_prefix = 'xn--';
2061 private $_invalid_ucs = 0x80000000;
2066 private $_max_ucs = 0x10FFFF;
2072 private $_base = 36;
2084 private $_tmax = 26;
2090 private $_skew = 38;
2096 private $_damp = 700;
2102 private $_initial_bias = 72;
2108 private $_initial_n = 0x80;
2119 private $_sbase = 0xAC00;
2124 private $_lbase = 0x1100;
2129 private $_vbase = 0x1161;
2134 private $_tbase = 0x11a7;
2140 private $_lcount = 19;
2146 private $_vcount = 21;
2152 private $_tcount = 28;
2160 private $_ncount = 588;
2163 * lcount * tcount * vcount
2168 private $_scount = 11172;
2171 * Default encoding for encode()'s input and decode()'s output is UTF-8;
2172 * Other possible encodings are ucs4_string and ucs4_array
2173 * See {@link setParams()} for how to select these
2178 private $_api_encoding = 'utf8';
2181 * Overlong UTF-8 encodings are forbidden
2186 private $_allow_overlong = false;
2189 * Behave strict or not
2194 private $_strict_mode = false;
2197 * Cached value indicating whether or not mbstring function overloading is
2200 * This is cached for optimal performance.
2203 * @see Net_IDNA_php5::_byteLength()
2205 private static $_mb_string_overload = null;
2213 * @param array $options
2217 public function __construct($options = null)
2219 $this->_slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
2221 if (is_array($options)) {
2222 $this->setParams($options);
2225 // populate mbstring overloading cache if not set
2226 if (self::$_mb_string_overload === null) {
2227 self::$_mb_string_overload = (extension_loaded('mbstring')
2228 && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
2235 * Sets a new option value. Available options and values:
2237 * [utf8 - Use either UTF-8 or ISO-8859-1 as input (true for UTF-8, false
2238 * otherwise); The output is always UTF-8]
2239 * [overlong - Unicode does not allow unnecessarily long encodings of chars,
2240 * to allow this, set this parameter to true, else to false;
2241 * default is false.]
2242 * [strict - true: strict mode, good for registration purposes - Causes errors
2243 * on failures; false: loose mode, ideal for "wildlife" applications
2244 * by silently ignoring errors and returning the original input instead]
2246 * @param mixed $option Parameter to set (string: single parameter; array of Parameter => Value pairs)
2247 * @param string $value Value to use (if parameter 1 is a string)
2248 * @return boolean true on success, false otherwise
2251 public function setParams($option, $value = false)
2253 if (!is_array($option)) {
2254 $option = array($option => $value);
2257 foreach ($option as $k => $v) {
2264 $this->_api_encoding = $v;
2268 throw new Exception('Set Parameter: Unknown parameter '.$v.' for option '.$k);
2274 $this->_allow_overlong = ($v) ? true : false;
2278 $this->_strict_mode = ($v) ? true : false;
2290 * Encode a given UTF-8 domain name.
2292 * @param string $decoded Domain name (UTF-8 or UCS-4)
2293 * [@param string $encoding Desired input encoding, see {@link set_parameter}]
2294 * @return string Encoded Domain name (ACE string)
2295 * @return mixed processed string
2299 public function encode($decoded, $one_time_encoding = false)
2301 // Forcing conversion of input to UCS4 array
2302 // If one time encoding is given, use this, else the objects property
2303 switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2305 $decoded = $this->_utf8_to_ucs4($decoded);
2308 $decoded = $this->_ucs4_string_to_ucs4($decoded);
2309 case 'ucs4_array': // No break; before this line. Catch case, but do nothing
2312 throw new Exception('Unsupported input format');
2315 // No input, no output, what else did you expect?
2316 if (empty($decoded)) return '';
2318 // Anchors for iteration
2323 foreach ($decoded as $k => $v) {
2324 // Make sure to use just the plain dot
2329 $decoded[$k] = 0x2E;
2330 // It's right, no break here
2331 // The codepoints above have to be converted to dots anyway
2333 // Stumbling across an anchoring character
2339 // Neither email addresses nor URLs allowed in strict mode
2340 if ($this->_strict_mode) {
2341 throw new Exception('Neither email addresses nor URLs are allowed in strict mode.');
2346 $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2348 $output .= $encoded;
2350 $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2352 $output .= chr($decoded[$k]);
2354 $last_begin = $k + 1;
2358 // Catch the rest of the string
2360 $inp_len = sizeof($decoded);
2362 $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2364 $output .= $encoded;
2366 $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2370 if ($output = $this->_encode($decoded)) {
2373 return $this->_ucs4_to_utf8($decoded);
2379 * Decode a given ACE domain name.
2381 * @param string $encoded Domain name (ACE string)
2382 * @param string $encoding Desired output encoding, see {@link set_parameter}
2383 * @return string Decoded Domain name (UTF-8 or UCS-4)
2387 public function decode($input, $one_time_encoding = false)
2390 if ($one_time_encoding) {
2391 switch ($one_time_encoding) {
2397 throw new Exception('Unknown encoding '.$one_time_encoding);
2401 // Make sure to drop any newline characters around
2402 $input = trim($input);
2404 // Negotiate input and try to determine, wether it is a plain string,
2405 // an email address or something like a complete URL
2406 if (strpos($input, '@')) { // Maybe it is an email address
2407 // No no in strict mode
2408 if ($this->_strict_mode) {
2409 throw new Exception('Only simple domain name parts can be handled in strict mode');
2411 list($email_pref, $input) = explode('@', $input, 2);
2412 $arr = explode('.', $input);
2413 foreach ($arr as $k => $v) {
2414 $conv = $this->_decode($v);
2415 if ($conv) $arr[$k] = $conv;
2417 $return = $email_pref . '@' . join('.', $arr);
2418 } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
2419 // No no in strict mode
2420 if ($this->_strict_mode) {
2421 throw new Exception('Only simple domain name parts can be handled in strict mode');
2423 $parsed = parse_url($input);
2424 if (isset($parsed['host'])) {
2425 $arr = explode('.', $parsed['host']);
2426 foreach ($arr as $k => $v) {
2427 $conv = $this->_decode($v);
2428 if ($conv) $arr[$k] = $conv;
2430 $parsed['host'] = join('.', $arr);
2431 if (isset($parsed['scheme'])) {
2432 $parsed['scheme'] .= (strtolower($parsed['scheme']) == 'mailto') ? ':' : '://';
2434 $return = join('', $parsed);
2435 } else { // parse_url seems to have failed, try without it
2436 $arr = explode('.', $input);
2437 foreach ($arr as $k => $v) {
2438 $conv = $this->_decode($v);
2439 if ($conv) $arr[$k] = $conv;
2441 $return = join('.', $arr);
2443 } else { // Otherwise we consider it being a pure domain name string
2444 $return = $this->_decode($input);
2446 // The output is UTF-8 by default, other output formats need conversion here
2447 // If one time encoding is given, use this, else the objects property
2448 switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2453 return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
2456 return $this->_utf8_to_ucs4($return);
2459 throw new Exception('Unsupported output format');
2466 * The actual encoding algorithm.
2472 private function _encode($decoded)
2474 // We cannot encode a domain name containing the Punycode prefix
2475 $extract = self::_byteLength($this->_punycode_prefix);
2476 $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
2477 $check_deco = array_slice($decoded, 0, $extract);
2479 if ($check_pref == $check_deco) {
2480 throw new Exception('This is already a punycode string');
2482 // We will not try to encode strings consisting of basic code points only
2484 foreach ($decoded as $k => $v) {
2491 if ($this->_strict_mode) {
2492 throw new Exception('The given string does not contain encodable chars');
2500 $decoded = $this->_nameprep($decoded);
2501 } catch (Exception $e) {
2502 // hmm, serious - rethrow
2506 $deco_len = count($decoded);
2513 // How many chars have been consumed
2516 // Start with the prefix; copy it to output
2517 $encoded = $this->_punycode_prefix;
2520 // Copy all basic code points to output
2521 for ($i = 0; $i < $deco_len; ++$i) {
2522 $test = $decoded[$i];
2523 // Will match [0-9a-zA-Z-]
2524 if ((0x2F < $test && $test < 0x40)
2525 || (0x40 < $test && $test < 0x5B)
2526 || (0x60 < $test && $test <= 0x7B)
2527 || (0x2D == $test)) {
2528 $encoded .= chr($decoded[$i]);
2533 // All codepoints were basic ones
2534 if ($codecount == $deco_len) {
2538 // Start with the prefix; copy it to output
2539 $encoded = $this->_punycode_prefix . $encoded;
2541 // If we have basic code points in output, add an hyphen to the end
2546 // Now find and encode all non-basic code points
2548 $cur_code = $this->_initial_n;
2549 $bias = $this->_initial_bias;
2552 while ($codecount < $deco_len) {
2553 // Find the smallest code point >= the current code point and
2554 // remember the last ouccrence of it in the input
2555 for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
2556 if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
2557 $next_code = $decoded[$i];
2561 $delta += ($next_code - $cur_code) * ($codecount + 1);
2562 $cur_code = $next_code;
2564 // Scan input again and encode all characters whose code point is $cur_code
2565 for ($i = 0; $i < $deco_len; $i++) {
2566 if ($decoded[$i] < $cur_code) {
2568 } else if ($decoded[$i] == $cur_code) {
2569 for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
2572 (($k >= $bias + $this->_tmax)? $this->_tmax : $k - $bias);
2578 $encoded .= $this->_encodeDigit(ceil($t + (($q - $t) % ($this->_base - $t))));
2579 $q = ($q - $t) / ($this->_base - $t);
2582 $encoded .= $this->_encodeDigit($q);
2583 $bias = $this->_adapt($delta, $codecount + 1, $is_first);
2598 * The actual decoding algorithm.
2604 private function _decode($encoded)
2606 // We do need to find the Punycode prefix
2607 if (!preg_match('!^' . preg_quote($this->_punycode_prefix, '!') . '!', $encoded)) {
2611 $encode_test = preg_replace('!^' . preg_quote($this->_punycode_prefix, '!') . '!', '', $encoded);
2613 // If nothing left after removing the prefix, it is hopeless
2614 if (!$encode_test) {
2618 // Find last occurence of the delimiter
2619 $delim_pos = strrpos($encoded, '-');
2621 if ($delim_pos > self::_byteLength($this->_punycode_prefix)) {
2622 for ($k = self::_byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
2623 $decoded[] = ord($encoded{$k});
2629 $deco_len = count($decoded);
2630 $enco_len = self::_byteLength($encoded);
2632 // Wandering through the strings; init
2634 $bias = $this->_initial_bias;
2636 $char = $this->_initial_n;
2638 for ($enco_idx = ($delim_pos)? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
2639 for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
2640 $digit = $this->_decodeDigit($encoded{$enco_idx++});
2641 $idx += $digit * $w;
2643 $t = ($k <= $bias) ?
2645 (($k >= $bias + $this->_tmax)? $this->_tmax : ($k - $bias));
2651 $w = (int)($w * ($this->_base - $t));
2654 $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
2656 $char += (int) ($idx / ($deco_len + 1));
2657 $idx %= ($deco_len + 1);
2659 if ($deco_len > 0) {
2660 // Make room for the decoded char
2661 for ($i = $deco_len; $i > $idx; $i--) {
2662 $decoded[$i] = $decoded[($i - 1)];
2666 $decoded[$idx++] = $char;
2670 return $this->_ucs4_to_utf8($decoded);
2671 } catch (Exception $e) {
2678 * Adapt the bias according to the current code point and position.
2682 private function _adapt($delta, $npoints, $is_first)
2684 $delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
2685 $delta += (int) ($delta / $npoints);
2687 for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
2688 $delta = (int) ($delta / ($this->_base - $this->_tmin));
2691 return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
2695 * Encoding a certain digit.
2699 private function _encodeDigit($d)
2701 return chr($d + 22 + 75 * ($d < 26));
2705 * Decode a certain digit.
2709 private function _decodeDigit($cp)
2712 return ($cp - 48 < 10)? $cp - 22 : (($cp - 65 < 26)? $cp - 65 : (($cp - 97 < 26)? $cp - 97 : $this->_base));
2716 * Do Nameprep according to RFC3491 and RFC3454.
2718 * @param array $input Unicode Characters
2719 * @return string Unicode Characters, Nameprep'd
2723 private function _nameprep($input)
2727 // Walking through the input array, performing the required steps on each of
2728 // the input chars and putting the result into the output array
2729 // While mapping required chars we apply the cannonical ordering
2731 foreach ($input as $v) {
2732 // Map to nothing == skip that code point
2733 if (in_array($v, self::$_np_map_nothing)) {
2737 // Try to find prohibited input
2738 if (in_array($v, self::$_np_prohibit) || in_array($v, self::$_general_prohibited)) {
2739 throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
2742 foreach (self::$_np_prohibit_ranges as $range) {
2743 if ($range[0] <= $v && $v <= $range[1]) {
2744 throw new Exception('NAMEPREP: Prohibited input U+' . sprintf('%08X', $v));
2748 // Hangul syllable decomposition
2749 if (0xAC00 <= $v && $v <= 0xD7AF) {
2750 foreach ($this->_hangulDecompose($v) as $out) {
2753 } else if (isset(self::$_np_replacemaps[$v])) { // There's a decomposition mapping for that code point
2754 foreach ($this->_applyCannonicalOrdering(self::$_np_replacemaps[$v]) as $out) {
2762 // Combine code points
2766 $out_len = count($output);
2768 for ($i = 0; $i < $out_len; ++$i) {
2769 $class = $this->_getCombiningClass($output[$i]);
2771 if ((!$last_class || $last_class != $class) && $class) {
2773 $seq_len = $i - $last_starter;
2774 $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
2776 // On match: Replace the last starter with the composed character and remove
2777 // the now redundant non-starter(s)
2779 $output[$last_starter] = $out;
2781 if (count($out) != $seq_len) {
2782 for ($j = $i + 1; $j < $out_len; ++$j) {
2783 $output[$j - 1] = $output[$j];
2786 unset($output[$out_len]);
2789 // Rewind the for loop by one, since there can be more possible compositions
2792 $last_class = ($i == $last_starter)? 0 : $this->_getCombiningClass($output[$i - 1]);
2798 // The current class is 0
2803 $last_class = $class;
2810 * Decomposes a Hangul syllable
2811 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2813 * @param integer $char 32bit UCS4 code point
2814 * @return array Either Hangul Syllable decomposed or original 32bit
2815 * value as one value array
2818 private function _hangulDecompose($char)
2820 $sindex = $char - $this->_sbase;
2822 if ($sindex < 0 || $sindex >= $this->_scount) {
2823 return array($char);
2827 $T = $this->_tbase + $sindex % $this->_tcount;
2828 $result[] = (int)($this->_lbase + $sindex / $this->_ncount);
2829 $result[] = (int)($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
2831 if ($T != $this->_tbase) {
2839 * Ccomposes a Hangul syllable
2840 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2842 * @param array $input Decomposed UCS4 sequence
2843 * @return array UCS4 sequence with syllables composed
2846 private function _hangulCompose($input)
2848 $inp_len = count($input);
2856 $result[] = $last; // copy first char from input to output
2858 for ($i = 1; $i < $inp_len; ++$i) {
2861 // Find out, wether two current characters from L and V
2862 $lindex = $last - $this->_lbase;
2864 if (0 <= $lindex && $lindex < $this->_lcount) {
2865 $vindex = $char - $this->_vbase;
2867 if (0 <= $vindex && $vindex < $this->_vcount) {
2868 // create syllable of form LV
2869 $last = ($this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount);
2870 $out_off = count($result) - 1;
2871 $result[$out_off] = $last; // reset last
2878 // Find out, wether two current characters are LV and T
2879 $sindex = $last - $this->_sbase;
2881 if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount) == 0) {
2882 $tindex = $char - $this->_tbase;
2884 if (0 <= $tindex && $tindex <= $this->_tcount) {
2885 // create syllable of form LVT
2887 $out_off = count($result) - 1;
2888 $result[$out_off] = $last; // reset last
2895 // if neither case was true, just add the character
2904 * Returns the combining class of a certain wide char.
2906 * @param integer $char Wide char to check (32bit integer)
2907 * @return integer Combining class if found, else 0
2910 private function _getCombiningClass($char)
2912 return isset(self::$_np_norm_combcls[$char])? self::$_np_norm_combcls[$char] : 0;
2916 * Apllies the cannonical ordering of a decomposed UCS4 sequence.
2918 * @param array $input Decomposed UCS4 sequence
2919 * @return array Ordered USC4 sequence
2922 private function _applyCannonicalOrdering($input)
2925 $size = count($input);
2929 $last = $this->_getCombiningClass($input[0]);
2931 for ($i = 0; $i < $size - 1; ++$i) {
2932 $next = $this->_getCombiningClass($input[$i + 1]);
2934 if ($next != 0 && $last > $next) {
2935 // Move item leftward until it fits
2936 for ($j = $i + 1; $j > 0; --$j) {
2937 if ($this->_getCombiningClass($input[$j - 1]) <= $next) {
2942 $input[$j] = $input[$j - 1];
2943 $input[$j - 1] = $t;
2947 // Reentering the loop looking at the old character again
2959 * Do composition of a sequence of starter and non-starter.
2961 * @param array $input UCS4 Decomposed sequence
2962 * @return array Ordered USC4 sequence
2965 private function _combine($input)
2967 $inp_len = count($input);
2969 // Is it a Hangul syllable?
2970 if (1 != $inp_len) {
2971 $hangul = $this->_hangulCompose($input);
2973 // This place is probably wrong
2974 if (count($hangul) != $inp_len) {
2979 foreach (self::$_np_replacemaps as $np_src => $np_target) {
2980 if ($np_target[0] != $input[0]) {
2984 if (count($np_target) != $inp_len) {
2990 foreach ($input as $k2 => $v2) {
2991 if ($v2 == $np_target[$k2]) {
3008 * This converts an UTF-8 encoded string to its UCS-4 (array) representation
3009 * By talking about UCS-4 we mean arrays of 32bit integers representing
3010 * each of the "chars". This is due to PHP not being able to handle strings with
3011 * bit depth different from 8. This applies to the reverse method _ucs4_to_utf8(), too.
3012 * The following UTF-8 encodings are supported:
3014 * bytes bits representation
3016 * 2 11 110xxxxx 10xxxxxx
3017 * 3 16 1110xxxx 10xxxxxx 10xxxxxx
3018 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3019 * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3020 * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3022 * Each x represents a bit that can be used to store character data.
3026 private function _utf8_to_ucs4($input)
3030 $inp_len = self::_byteLength($input, '8bit');
3033 for ($k = 0; $k < $inp_len; ++$k) {
3034 $v = ord($input{$k}); // Extract byte from input string
3036 if ($v < 128) { // We found an ASCII char - put into stirng as is
3037 $output[$out_len] = $v;
3039 if ('add' == $mode) {
3040 throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3045 if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char
3049 if ($v >> 5 == 6) { // &110xxxxx 10xxxxx
3050 $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left
3051 $v = ($v - 192) << 6;
3052 } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx
3054 $v = ($v - 224) << 12;
3055 } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3057 $v = ($v - 240) << 18;
3058 } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3060 $v = ($v - 248) << 24;
3061 } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3063 $v = ($v - 252) << 30;
3065 throw new Exception('This might be UTF-8, but I don\'t understand it at byte '.$k);
3068 if ('add' == $mode) {
3069 $output[$out_len] = (int) $v;
3074 if ('add' == $mode) {
3075 if (!$this->_allow_overlong && $test == 'range') {
3077 if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
3078 throw new Exception('Bogus UTF-8 character detected (out of legal range) at byte '.$k);
3082 if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx
3083 $v = ($v - 128) << ($next_byte * 6);
3084 $output[($out_len - 1)] += $v;
3087 throw new Exception('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3090 if ($next_byte < 0) {
3099 * Convert UCS-4 array into UTF-8 string.
3104 private function _ucs4_to_utf8($input)
3108 foreach ($input as $v) {
3112 // 7bit are transferred literally
3114 } else if ($v < 1 << 11) {
3116 $output .= chr(192 + ($v >> 6))
3117 . chr(128 + ($v & 63));
3118 } else if ($v < 1 << 16) {
3120 $output .= chr(224 + ($v >> 12))
3121 . chr(128 + (($v >> 6) & 63))
3122 . chr(128 + ($v & 63));
3123 } else if ($v < 1 << 21) {
3125 $output .= chr(240 + ($v >> 18))
3126 . chr(128 + (($v >> 12) & 63))
3127 . chr(128 + (($v >> 6) & 63))
3128 . chr(128 + ($v & 63));
3129 } else if ($v < 1 << 26) {
3131 $output .= chr(248 + ($v >> 24))
3132 . chr(128 + (($v >> 18) & 63))
3133 . chr(128 + (($v >> 12) & 63))
3134 . chr(128 + (($v >> 6) & 63))
3135 . chr(128 + ($v & 63));
3136 } else if ($v < 1 << 31) {
3138 $output .= chr(252 + ($v >> 30))
3139 . chr(128 + (($v >> 24) & 63))
3140 . chr(128 + (($v >> 18) & 63))
3141 . chr(128 + (($v >> 12) & 63))
3142 . chr(128 + (($v >> 6) & 63))
3143 . chr(128 + ($v & 63));
3145 throw new Exception('Conversion from UCS-4 to UTF-8 failed: malformed input at byte ' . $k);
3153 * Convert UCS-4 array into UCS-4 string
3158 private function _ucs4_to_ucs4_string($input)
3161 // Take array values and split output to 4 bytes per value
3162 // The bit mask is 255, which reads &11111111
3163 foreach ($input as $v) {
3164 $output .= ($v & (255 << 24) >> 24) . ($v & (255 << 16) >> 16) . ($v & (255 << 8) >> 8) . ($v & 255);
3170 * Convert UCS-4 strin into UCS-4 garray
3175 private function _ucs4_string_to_ucs4($input)
3179 $inp_len = self::_byteLength($input);
3180 // Input length must be dividable by 4
3182 throw new Exception('Input UCS4 string is broken');
3186 // Empty input - return empty output
3187 if (!$inp_len) return $output;
3189 for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
3190 // Increment output position every 4 input bytes
3193 $output[$out_len] = 0;
3195 $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
3201 * Echo hex representation of UCS4 sequence.
3203 * @param array $input UCS4 sequence
3204 * @param boolean $include_bit Include bitmask in output
3209 private static function _showHex($input, $include_bit = false)
3211 foreach ($input as $k => $v) {
3212 echo '[', $k, '] => ', sprintf('%X', $v);
3215 echo ' (', Net_IDNA::_showBitmask($v), ')';
3223 * Gives you a bit representation of given Byte (8 bits), Word (16 bits) or DWord (32 bits)
3224 * Output width is automagically determined
3229 private static function _showBitmask($octet)
3231 if ($octet >= (1 << 16)) {
3233 } else if ($octet >= (1 << 8)) {
3241 for ($i = $w; $i > -1; $i--) {
3242 $return .= ($octet & (1 << $i))? 1 : '0';
3249 * Gets the length of a string in bytes even if mbstring function
3250 * overloading is turned on
3252 * @param string $string the string for which to get the length.
3254 * @return integer the length of the string in bytes.
3256 * @see Net_IDNA_php5::$_mb_string_overload
3258 private static function _byteLength($string)
3260 if (self::$_mb_string_overload) {
3261 return mb_strlen($string, '8bit');
3263 return strlen((binary)$string);