5 * XML Utilities package
11 * Copyright (c) 2003-2008 Stephan Schmidt <schst@php.net>
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * * Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * * Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * * The name of the author may not be used to endorse or promote products
24 * derived from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
27 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
28 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
30 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 * @author Stephan Schmidt <schst@php.net>
41 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
42 * @license http://opensource.org/licenses/bsd-license New BSD License
44 * @link http://pear.php.net/package/XML_Util
48 * Error code for invalid chars in XML name
50 define('XML_UTIL_ERROR_INVALID_CHARS', 51);
53 * Error code for invalid chars in XML name
55 define('XML_UTIL_ERROR_INVALID_START', 52);
58 * Error code for non-scalar tag content
60 define('XML_UTIL_ERROR_NON_SCALAR_CONTENT', 60);
63 * Error code for missing tag name
65 define('XML_UTIL_ERROR_NO_TAG_NAME', 61);
68 * Replace XML entities
70 define('XML_UTIL_REPLACE_ENTITIES', 1);
73 * Embedd content in a CData Section
75 define('XML_UTIL_CDATA_SECTION', 5);
78 * Do not replace entitites
80 define('XML_UTIL_ENTITIES_NONE', 0);
83 * Replace all XML entitites
84 * This setting will replace <, >, ", ' and &
86 define('XML_UTIL_ENTITIES_XML', 1);
89 * Replace only required XML entitites
90 * This setting will replace <, " and &
92 define('XML_UTIL_ENTITIES_XML_REQUIRED', 2);
95 * Replace HTML entitites
96 * @link http://www.php.net/htmlentities
98 define('XML_UTIL_ENTITIES_HTML', 3);
101 * Do not collapse any empty tags.
103 define('XML_UTIL_COLLAPSE_NONE', 0);
106 * Collapse all empty tags.
108 define('XML_UTIL_COLLAPSE_ALL', 1);
111 * Collapse only empty XHTML tags that have no end tag.
113 define('XML_UTIL_COLLAPSE_XHTML_ONLY', 2);
116 * Utility class for working with XML documents
120 * @author Stephan Schmidt <schst@php.net>
121 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
122 * @license http://opensource.org/licenses/bsd-license New BSD License
123 * @version Release: 1.4.2
124 * @link http://pear.php.net/package/XML_Util
131 * @return string $version API version
133 public static function apiVersion()
139 * Replace XML entities
141 * With the optional second parameter, you may select, which
142 * entities should be replaced.
145 * require_once 'XML/Util.php';
147 * // replace XML entites:
148 * $string = XML_Util::replaceEntities('This string contains < & >.');
151 * With the optional third parameter, you may pass the character encoding
153 * require_once 'XML/Util.php';
155 * // replace XML entites in UTF-8:
156 * $string = XML_Util::replaceEntities(
157 * 'This string contains < & > as well as ä, ö, ß, à and ê',
158 * XML_UTIL_ENTITIES_HTML,
163 * @param string $string string where XML special chars
165 * @param int $replaceEntities setting for entities in attribute values
166 * (one of XML_UTIL_ENTITIES_XML,
167 * XML_UTIL_ENTITIES_XML_REQUIRED,
168 * XML_UTIL_ENTITIES_HTML)
169 * @param string $encoding encoding value (if any)...
170 * must be a valid encoding as determined
171 * by the htmlentities() function
173 * @return string string with replaced chars
174 * @see reverseEntities()
176 public static function replaceEntities(
177 $string, $replaceEntities = XML_UTIL_ENTITIES_XML, $encoding = 'ISO-8859-1'
179 switch ($replaceEntities) {
180 case XML_UTIL_ENTITIES_XML:
192 case XML_UTIL_ENTITIES_XML_REQUIRED:
202 case XML_UTIL_ENTITIES_HTML:
203 return htmlentities($string, ENT_COMPAT, $encoding);
210 * Reverse XML entities
212 * With the optional second parameter, you may select, which
213 * entities should be reversed.
216 * require_once 'XML/Util.php';
218 * // reverse XML entites:
219 * $string = XML_Util::reverseEntities('This string contains < & >.');
222 * With the optional third parameter, you may pass the character encoding
224 * require_once 'XML/Util.php';
226 * // reverse XML entites in UTF-8:
227 * $string = XML_Util::reverseEntities(
228 * 'This string contains < & > as well as'
229 * . ' ä, ö, ß, à and ê',
230 * XML_UTIL_ENTITIES_HTML,
235 * @param string $string string where XML special chars
237 * @param int $replaceEntities setting for entities in attribute values
238 * (one of XML_UTIL_ENTITIES_XML,
239 * XML_UTIL_ENTITIES_XML_REQUIRED,
240 * XML_UTIL_ENTITIES_HTML)
241 * @param string $encoding encoding value (if any)...
242 * must be a valid encoding as determined
243 * by the html_entity_decode() function
245 * @return string string with replaced chars
246 * @see replaceEntities()
248 public static function reverseEntities(
249 $string, $replaceEntities = XML_UTIL_ENTITIES_XML, $encoding = 'ISO-8859-1'
251 switch ($replaceEntities) {
252 case XML_UTIL_ENTITIES_XML:
264 case XML_UTIL_ENTITIES_XML_REQUIRED:
274 case XML_UTIL_ENTITIES_HTML:
275 return html_entity_decode($string, ENT_COMPAT, $encoding);
282 * Build an xml declaration
285 * require_once 'XML/Util.php';
287 * // get an XML declaration:
288 * $xmlDecl = XML_Util::getXMLDeclaration('1.0', 'UTF-8', true);
291 * @param string $version xml version
292 * @param string $encoding character encoding
293 * @param bool $standalone document is standalone (or not)
295 * @return string xml declaration
296 * @uses attributesToString() to serialize the attributes of the
299 public static function getXMLDeclaration(
300 $version = '1.0', $encoding = null, $standalone = null
303 'version' => $version,
306 if ($encoding !== null) {
307 $attributes['encoding'] = $encoding;
309 // add standalone, if specified
310 if ($standalone !== null) {
311 $attributes['standalone'] = $standalone ? 'yes' : 'no';
316 XML_Util::attributesToString($attributes, false)
321 * Build a document type declaration
324 * require_once 'XML/Util.php';
326 * // get a doctype declaration:
327 * $xmlDecl = XML_Util::getDocTypeDeclaration('rootTag','myDocType.dtd');
330 * @param string $root name of the root tag
331 * @param string $uri uri of the doctype definition
332 * (or array with uri and public id)
333 * @param string $internalDtd internal dtd entries
335 * @return string doctype declaration
338 public static function getDocTypeDeclaration(
339 $root, $uri = null, $internalDtd = null
341 if (is_array($uri)) {
342 $ref = sprintf(' PUBLIC "%s" "%s"', $uri['id'], $uri['uri']);
343 } elseif (!empty($uri)) {
344 $ref = sprintf(' SYSTEM "%s"', $uri);
349 if (empty($internalDtd)) {
350 return sprintf('<!DOCTYPE %s%s>', $root, $ref);
352 return sprintf("<!DOCTYPE %s%s [\n%s\n]>", $root, $ref, $internalDtd);
357 * Create string representation of an attribute list
360 * require_once 'XML/Util.php';
362 * // build an attribute string
368 * $attList = XML_Util::attributesToString($att);
371 * @param array $attributes attribute array
372 * @param bool|array $sort sort attribute list alphabetically,
373 * may also be an assoc array containing
374 * the keys 'sort', 'multiline', 'indent',
375 * 'linebreak' and 'entities'
376 * @param bool $multiline use linebreaks, if more than
377 * one attribute is given
378 * @param string $indent string used for indentation of
379 * multiline attributes
380 * @param string $linebreak string used for linebreaks of
381 * multiline attributes
382 * @param int $entities setting for entities in attribute values
383 * (one of XML_UTIL_ENTITIES_NONE,
384 * XML_UTIL_ENTITIES_XML,
385 * XML_UTIL_ENTITIES_XML_REQUIRED,
386 * XML_UTIL_ENTITIES_HTML)
388 * @return string string representation of the attributes
389 * @uses replaceEntities() to replace XML entities in attribute values
390 * @todo allow sort also to be an options array
392 public static function attributesToString(
393 $attributes, $sort = true, $multiline = false,
394 $indent = ' ', $linebreak = "\n", $entities = XML_UTIL_ENTITIES_XML
397 * second parameter may be an array
399 if (is_array($sort)) {
400 if (isset($sort['multiline'])) {
401 $multiline = $sort['multiline'];
403 if (isset($sort['indent'])) {
404 $indent = $sort['indent'];
406 if (isset($sort['linebreak'])) {
407 $multiline = $sort['linebreak'];
409 if (isset($sort['entities'])) {
410 $entities = $sort['entities'];
412 if (isset($sort['sort'])) {
413 $sort = $sort['sort'];
419 if (is_array($attributes) && !empty($attributes)) {
423 if (!$multiline || count($attributes) == 1) {
424 foreach ($attributes as $key => $value) {
425 if ($entities != XML_UTIL_ENTITIES_NONE) {
426 if ($entities === XML_UTIL_CDATA_SECTION) {
427 $entities = XML_UTIL_ENTITIES_XML;
429 $value = XML_Util::replaceEntities($value, $entities);
431 $string .= ' ' . $key . '="' . $value . '"';
435 foreach ($attributes as $key => $value) {
436 if ($entities != XML_UTIL_ENTITIES_NONE) {
437 $value = XML_Util::replaceEntities($value, $entities);
440 $string .= ' ' . $key . '="' . $value . '"';
443 $string .= $linebreak . $indent . $key . '="' . $value . '"';
452 * Collapses empty tags.
454 * @param string $xml XML
455 * @param int $mode Whether to collapse all empty tags (XML_UTIL_COLLAPSE_ALL)
456 * or only XHTML (XML_UTIL_COLLAPSE_XHTML_ONLY) ones.
460 public static function collapseEmptyTags($xml, $mode = XML_UTIL_COLLAPSE_ALL)
462 if (preg_match('~<([^>])+/>~s', $xml, $matches)) {
463 // it's already an empty tag
467 case XML_UTIL_COLLAPSE_ALL:
471 '(https?://[^:\s]+:\w+)' . // <http://foo.com:bar ($1)
472 '|(\w+:\w+)' . // <foo:bar ($2)
473 '|(\w+)' . // <foo ($3)
475 '([^>]*)' . // attributes ($4)
477 '<\/(\1|\2|\3)>' . // 1, 2, or 3 again ($5)
482 '${1}${2}${3}' . // tag (only one should have been populated)
483 '${4}' . // attributes
486 return (preg_replace($preg1, $preg2, $xml)?:$xml);
488 case XML_UTIL_COLLAPSE_XHTML_ONLY:
491 '/<(area|base(?:font)?|br|col|frame|hr|img|input|isindex|link|meta|'
492 . 'param)([^>]*)><\/\\1>/s',
498 case XML_UTIL_COLLAPSE_NONE:
508 * This method will call XML_Util::createTagFromArray(), which
512 * require_once 'XML/Util.php';
514 * // create an XML tag:
515 * $tag = XML_Util::createTag('myNs:myTag',
516 * array('foo' => 'bar'),
517 * 'This is inside the tag',
518 * 'http://www.w3c.org/myNs#');
521 * @param string $qname qualified tagname (including namespace)
522 * @param array $attributes array containg attributes
523 * @param mixed $content the content
524 * @param string $namespaceUri URI of the namespace
525 * @param int $replaceEntities whether to replace XML special chars in
526 * content, embedd it in a CData section
528 * @param bool $multiline whether to create a multiline tag where
529 * each attribute gets written to a single line
530 * @param string $indent string used to indent attributes
531 * (_auto indents attributes so they start
532 * at the same column)
533 * @param string $linebreak string used for linebreaks
534 * @param bool $sortAttributes Whether to sort the attributes or not
535 * @param int $collapseTagMode How to handle a content-less, and thus collapseable, tag
537 * @return string XML tag
538 * @see createTagFromArray()
539 * @uses createTagFromArray() to create the tag
541 public static function createTag(
542 $qname, $attributes = array(), $content = null,
543 $namespaceUri = null, $replaceEntities = XML_UTIL_REPLACE_ENTITIES,
544 $multiline = false, $indent = '_auto', $linebreak = "\n",
545 $sortAttributes = true, $collapseTagMode = XML_UTIL_COLLAPSE_ALL
549 'attributes' => $attributes
553 if ($content !== null) {
554 $tag['content'] = $content;
558 if ($namespaceUri !== null) {
559 $tag['namespaceUri'] = $namespaceUri;
562 return XML_Util::createTagFromArray(
563 $tag, $replaceEntities, $multiline,
564 $indent, $linebreak, $sortAttributes,
570 * Create a tag from an array.
571 * This method awaits an array in the following format
574 * // qualified name of the tag
577 * // namespace prefix (optional, if qname is specified or no namespace)
578 * 'namespace' => $namespace
580 * // local part of the tagname (optional, if qname is specified)
581 * 'localpart' => $localpart,
583 * // array containing all attributes (optional)
584 * 'attributes' => array(),
586 * // tag content (optional)
587 * 'content' => $content,
589 * // namespaceUri for the given namespace (optional)
590 * 'namespaceUri' => $namespaceUri
595 * require_once 'XML/Util.php';
598 * 'qname' => 'foo:bar',
599 * 'namespaceUri' => 'http://foo.com',
600 * 'attributes' => array('key' => 'value', 'argh' => 'fruit&vegetable'),
601 * 'content' => 'I\'m inside the tag',
603 * // creating a tag with qualified name and namespaceUri
604 * $string = XML_Util::createTagFromArray($tag);
607 * @param array $tag tag definition
608 * @param int $replaceEntities whether to replace XML special chars in
609 * content, embedd it in a CData section
611 * @param bool $multiline whether to create a multiline tag where each
612 * attribute gets written to a single line
613 * @param string $indent string used to indent attributes
614 * (_auto indents attributes so they start
615 * at the same column)
616 * @param string $linebreak string used for linebreaks
617 * @param bool $sortAttributes Whether to sort the attributes or not
618 * @param int $collapseTagMode How to handle a content-less, and thus collapseable, tag
620 * @return string XML tag
623 * @uses attributesToString() to serialize the attributes of the tag
624 * @uses splitQualifiedName() to get local part and namespace of a qualified name
625 * @uses createCDataSection()
626 * @uses collapseEmptyTags()
629 public static function createTagFromArray(
630 $tag, $replaceEntities = XML_UTIL_REPLACE_ENTITIES,
631 $multiline = false, $indent = '_auto', $linebreak = "\n",
632 $sortAttributes = true, $collapseTagMode = XML_UTIL_COLLAPSE_ALL
634 if (isset($tag['content']) && !is_scalar($tag['content'])) {
635 return XML_Util::raiseError(
636 'Supplied non-scalar value as tag content',
637 XML_UTIL_ERROR_NON_SCALAR_CONTENT
641 if (!isset($tag['qname']) && !isset($tag['localPart'])) {
642 return XML_Util::raiseError(
643 'You must either supply a qualified name '
644 . '(qname) or local tag name (localPart).',
645 XML_UTIL_ERROR_NO_TAG_NAME
649 // if no attributes hav been set, use empty attributes
650 if (!isset($tag['attributes']) || !is_array($tag['attributes'])) {
651 $tag['attributes'] = array();
654 if (isset($tag['namespaces'])) {
655 foreach ($tag['namespaces'] as $ns => $uri) {
656 $tag['attributes']['xmlns:' . $ns] = $uri;
660 if (!isset($tag['qname'])) {
661 // qualified name is not given
663 // check for namespace
664 if (isset($tag['namespace']) && !empty($tag['namespace'])) {
665 $tag['qname'] = $tag['namespace'] . ':' . $tag['localPart'];
667 $tag['qname'] = $tag['localPart'];
669 } elseif (isset($tag['namespaceUri']) && !isset($tag['namespace'])) {
670 // namespace URI is set, but no namespace
672 $parts = XML_Util::splitQualifiedName($tag['qname']);
674 $tag['localPart'] = $parts['localPart'];
675 if (isset($parts['namespace'])) {
676 $tag['namespace'] = $parts['namespace'];
680 if (isset($tag['namespaceUri']) && !empty($tag['namespaceUri'])) {
681 // is a namespace given
682 if (isset($tag['namespace']) && !empty($tag['namespace'])) {
683 $tag['attributes']['xmlns:' . $tag['namespace']]
684 = $tag['namespaceUri'];
686 // define this Uri as the default namespace
687 $tag['attributes']['xmlns'] = $tag['namespaceUri'];
691 if (!array_key_exists('content', $tag)) {
692 $tag['content'] = '';
695 // check for multiline attributes
696 if ($multiline === true) {
697 if ($indent === '_auto') {
698 $indent = str_repeat(' ', (strlen($tag['qname'])+2));
702 // create attribute list
703 $attList = XML_Util::attributesToString(
705 $sortAttributes, $multiline, $indent, $linebreak
708 switch ($replaceEntities) {
709 case XML_UTIL_ENTITIES_NONE:
711 case XML_UTIL_CDATA_SECTION:
712 $tag['content'] = XML_Util::createCDataSection($tag['content']);
715 $tag['content'] = XML_Util::replaceEntities(
716 $tag['content'], $replaceEntities
721 '<%s%s>%s</%s>', $tag['qname'], $attList, $tag['content'],
725 return self::collapseEmptyTags($tag, $collapseTagMode);
729 * Create a start element
732 * require_once 'XML/Util.php';
734 * // create an XML start element:
735 * $tag = XML_Util::createStartElement('myNs:myTag',
736 * array('foo' => 'bar') ,'http://www.w3c.org/myNs#');
739 * @param string $qname qualified tagname (including namespace)
740 * @param array $attributes array containg attributes
741 * @param string $namespaceUri URI of the namespace
742 * @param bool $multiline whether to create a multiline tag where each
743 * attribute gets written to a single line
744 * @param string $indent string used to indent attributes (_auto indents
745 * attributes so they start at the same column)
746 * @param string $linebreak string used for linebreaks
747 * @param bool $sortAttributes Whether to sort the attributes or not
749 * @return string XML start element
750 * @see createEndElement(), createTag()
752 public static function createStartElement(
753 $qname, $attributes = array(), $namespaceUri = null,
754 $multiline = false, $indent = '_auto', $linebreak = "\n",
755 $sortAttributes = true
757 // if no attributes hav been set, use empty attributes
758 if (!isset($attributes) || !is_array($attributes)) {
759 $attributes = array();
762 if ($namespaceUri != null) {
763 $parts = XML_Util::splitQualifiedName($qname);
766 // check for multiline attributes
767 if ($multiline === true) {
768 if ($indent === '_auto') {
769 $indent = str_repeat(' ', (strlen($qname)+2));
773 if ($namespaceUri != null) {
774 // is a namespace given
775 if (isset($parts['namespace']) && !empty($parts['namespace'])) {
776 $attributes['xmlns:' . $parts['namespace']] = $namespaceUri;
778 // define this Uri as the default namespace
779 $attributes['xmlns'] = $namespaceUri;
783 // create attribute list
784 $attList = XML_Util::attributesToString(
785 $attributes, $sortAttributes,
786 $multiline, $indent, $linebreak
788 $element = sprintf('<%s%s>', $qname, $attList);
793 * Create an end element
796 * require_once 'XML/Util.php';
798 * // create an XML start element:
799 * $tag = XML_Util::createEndElement('myNs:myTag');
802 * @param string $qname qualified tagname (including namespace)
804 * @return string XML end element
805 * @see createStartElement(), createTag()
807 public static function createEndElement($qname)
809 $element = sprintf('</%s>', $qname);
814 * Create an XML comment
817 * require_once 'XML/Util.php';
819 * // create an XML start element:
820 * $tag = XML_Util::createComment('I am a comment');
823 * @param string $content content of the comment
825 * @return string XML comment
827 public static function createComment($content)
829 $comment = sprintf('<!-- %s -->', $content);
834 * Create a CData section
837 * require_once 'XML/Util.php';
839 * // create a CData section
840 * $tag = XML_Util::createCDataSection('I am content.');
843 * @param string $data data of the CData section
845 * @return string CData section with content
847 public static function createCDataSection($data)
851 preg_replace('/\]\]>/', ']]]]><![CDATA[>', strval($data))
856 * Split qualified name and return namespace and local part
859 * require_once 'XML/Util.php';
861 * // split qualified tag
862 * $parts = XML_Util::splitQualifiedName('xslt:stylesheet');
864 * the returned array will contain two elements:
867 * 'namespace' => 'xslt',
868 * 'localPart' => 'stylesheet'
872 * @param string $qname qualified tag name
873 * @param string $defaultNs default namespace (optional)
875 * @return array array containing namespace and local part
877 public static function splitQualifiedName($qname, $defaultNs = null)
879 if (strstr($qname, ':')) {
880 $tmp = explode(':', $qname);
882 'namespace' => $tmp[0],
883 'localPart' => $tmp[1]
887 'namespace' => $defaultNs,
888 'localPart' => $qname
893 * Check, whether string is valid XML name
895 * <p>XML names are used for tagname, attribute names and various
896 * other, lesser known entities.</p>
897 * <p>An XML name may only consist of alphanumeric characters,
898 * dashes, undescores and periods, and has to start with a letter
899 * or an underscore.</p>
902 * require_once 'XML/Util.php';
905 * $result = XML_Util::isValidName('invalidTag?');
906 * if (is_a($result, 'PEAR_Error')) {
907 * print 'Invalid XML name: ' . $result->getMessage();
911 * @param string $string string that should be checked
913 * @return mixed true, if string is a valid XML name, PEAR error otherwise
915 * @todo support for other charsets
916 * @todo PEAR CS - unable to avoid 85-char limit on second preg_match
918 public static function isValidName($string)
920 // check for invalid chars
921 if (!preg_match('/^[[:alpha:]_]\\z/', $string{0})) {
922 return XML_Util::raiseError(
923 'XML names may only start with letter or underscore',
924 XML_UTIL_ERROR_INVALID_START
928 // check for invalid chars
930 '/^([[:alpha:]_]([[:alnum:]\-\.]*)?:)?'
931 . '[[:alpha:]_]([[:alnum:]\_\-\.]+)?\\z/',
935 return XML_Util::raiseError(
936 'XML names may only contain alphanumeric '
937 . 'chars, period, hyphen, colon and underscores',
938 XML_UTIL_ERROR_INVALID_CHARS
946 * Replacement for XML_Util::raiseError
948 * Avoids the necessity to always require
951 * @param string $msg error message
952 * @param int $code error code
955 * @todo PEAR CS - should this use include_once instead?
957 public static function raiseError($msg, $code)
959 include_once 'PEAR.php';
960 return PEAR::raiseError($msg, $code);