]> git.mxchange.org Git - friendica.git/blob - src/Util/XML.php
[ActivityPub] Add support for more attachments structures
[friendica.git] / src / Util / XML.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2020, Friendica
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Util;
23
24 use DOMXPath;
25 use Friendica\Core\Logger;
26 use Friendica\Core\System;
27 use SimpleXMLElement;
28
29 /**
30  * This class contain methods to work with XML data
31  */
32 class XML
33 {
34         /**
35          * Creates an XML structure out of a given array
36          *
37          * @param array  $array         The array of the XML structure that will be generated
38          * @param object $xml           The createdXML will be returned by reference
39          * @param bool   $remove_header Should the XML header be removed or not?
40          * @param array  $namespaces    List of namespaces
41          * @param bool   $root          interally used parameter. Mustn't be used from outside.
42          *
43          * @return string The created XML
44          */
45         public static function fromArray($array, &$xml, $remove_header = false, $namespaces = [], $root = true)
46         {
47                 if ($root) {
48                         foreach ($array as $key => $value) {
49                                 foreach ($namespaces as $nskey => $nsvalue) {
50                                         $key .= " xmlns".($nskey == "" ? "":":").$nskey.'="'.$nsvalue.'"';
51                                 }
52
53                                 if (is_array($value)) {
54                                         $root = new SimpleXMLElement("<".$key."/>");
55                                         self::fromArray($value, $root, $remove_header, $namespaces, false);
56                                 } else {
57                                         $root = new SimpleXMLElement("<".$key.">".self::escape($value)."</".$key.">");
58                                 }
59
60                                 $dom = dom_import_simplexml($root)->ownerDocument;
61                                 $dom->formatOutput = true;
62                                 $xml = $dom;
63
64                                 $xml_text = $dom->saveXML();
65
66                                 if ($remove_header) {
67                                         $xml_text = trim(substr($xml_text, 21));
68                                 }
69
70                                 return $xml_text;
71                         }
72                 }
73
74                 $element = null;
75                 foreach ($array as $key => $value) {
76                         if (!isset($element) && isset($xml)) {
77                                 $element = $xml;
78                         }
79
80                         if (is_integer($key)) {
81                                 if (isset($element)) {
82                                         if (is_scalar($value)) {
83                                                 $element[0] = $value;
84                                         } else {
85                                                 /// @todo: handle nested array values
86                                         }
87                                 }
88                                 continue;
89                         }
90
91                         $element_parts = explode(":", $key);
92                         if ((count($element_parts) > 1) && isset($namespaces[$element_parts[0]])) {
93                                 $namespace = $namespaces[$element_parts[0]];
94                         } elseif (isset($namespaces[""])) {
95                                 $namespace = $namespaces[""];
96                         } else {
97                                 $namespace = null;
98                         }
99
100                         // Remove undefined namespaces from the key
101                         if ((count($element_parts) > 1) && is_null($namespace)) {
102                                 $key = $element_parts[1];
103                         }
104
105                         if (substr($key, 0, 11) == "@attributes") {
106                                 if (!isset($element) || !is_array($value)) {
107                                         continue;
108                                 }
109
110                                 foreach ($value as $attr_key => $attr_value) {
111                                         $element_parts = explode(":", $attr_key);
112                                         if ((count($element_parts) > 1) && isset($namespaces[$element_parts[0]])) {
113                                                 $namespace = $namespaces[$element_parts[0]];
114                                         } else {
115                                                 $namespace = null;
116                                         }
117
118                                         $element->addAttribute($attr_key, $attr_value, $namespace);
119                                 }
120
121                                 continue;
122                         }
123
124                         if (!is_array($value)) {
125                                 $element = $xml->addChild($key, self::escape($value), $namespace);
126                         } elseif (is_array($value)) {
127                                 $element = $xml->addChild($key, null, $namespace);
128                                 self::fromArray($value, $element, $remove_header, $namespaces, false);
129                         }
130                 }
131         }
132
133         /**
134          * Copies an XML object
135          *
136          * @param object $source      The XML source
137          * @param object $target      The XML target
138          * @param string $elementname Name of the XML element of the target
139          * @return void
140          */
141         public static function copy(&$source, &$target, $elementname)
142         {
143                 if (count($source->children()) == 0) {
144                         $target->addChild($elementname, self::escape($source));
145                 } else {
146                         $child = $target->addChild($elementname);
147                         foreach ($source->children() as $childfield => $childentry) {
148                                 self::copy($childentry, $child, $childfield);
149                         }
150                 }
151         }
152
153         /**
154          * Create an XML element
155          *
156          * @param \DOMDocument $doc        XML root
157          * @param string       $element    XML element name
158          * @param string       $value      XML value
159          * @param array        $attributes array containing the attributes
160          *
161          * @return \DOMElement XML element object
162          */
163         public static function createElement(\DOMDocument $doc, $element, $value = "", $attributes = [])
164         {
165                 $element = $doc->createElement($element, self::escape($value));
166
167                 foreach ($attributes as $key => $value) {
168                         $attribute = $doc->createAttribute($key);
169                         $attribute->value = self::escape($value);
170                         $element->appendChild($attribute);
171                 }
172                 return $element;
173         }
174
175         /**
176          * Create an XML and append it to the parent object
177          *
178          * @param \DOMDocument $doc        XML root
179          * @param object $parent     parent object
180          * @param string $element    XML element name
181          * @param string $value      XML value
182          * @param array  $attributes array containing the attributes
183          * @return void
184          */
185         public static function addElement(\DOMDocument $doc, $parent, $element, $value = "", $attributes = [])
186         {
187                 $element = self::createElement($doc, $element, $value, $attributes);
188                 $parent->appendChild($element);
189         }
190
191         /**
192          * Convert an XML document to a normalised, case-corrected array
193          *   used by webfinger
194          *
195          * @param object  $xml_element     The XML document
196          * @param integer $recursion_depth recursion counter for internal use - default 0
197          *                                 internal use, recursion counter
198          *
199          * @return array | string The array from the xml element or the string
200          */
201         public static function elementToArray($xml_element, &$recursion_depth = 0)
202         {
203                 // If we're getting too deep, bail out
204                 if ($recursion_depth > 512) {
205                         return(null);
206                 }
207
208                 $xml_element_copy = '';
209                 if (!is_string($xml_element)
210                         && !is_array($xml_element)
211                         && (get_class($xml_element) == 'SimpleXMLElement')
212                 ) {
213                         $xml_element_copy = $xml_element;
214                         $xml_element = get_object_vars($xml_element);
215                 }
216
217                 if (is_array($xml_element)) {
218                         $result_array = [];
219                         if (count($xml_element) <= 0) {
220                                 return (trim(strval($xml_element_copy)));
221                         }
222
223                         foreach ($xml_element as $key => $value) {
224                                 $recursion_depth++;
225                                 $result_array[strtolower($key)] = self::elementToArray($value, $recursion_depth);
226                                 $recursion_depth--;
227                         }
228
229                         if ($recursion_depth == 0) {
230                                 $temp_array = $result_array;
231                                 $result_array = [
232                                         strtolower($xml_element_copy->getName()) => $temp_array,
233                                 ];
234                         }
235
236                         return ($result_array);
237                 } else {
238                         return (trim(strval($xml_element)));
239                 }
240         }
241
242         /**
243          * Convert the given XML text to an array in the XML structure.
244          *
245          * Xml::toArray() will convert the given XML text to an array in the XML structure.
246          * Link: http://www.bin-co.com/php/scripts/xml2array/
247          * Portions significantly re-written by mike@macgirvin.com for Friendica
248          * (namespaces, lowercase tags, get_attribute default changed, more...)
249          *
250          * Examples: $array =  Xml::toArray(file_get_contents('feed.xml'));
251          *        $array =  Xml::toArray(file_get_contents('feed.xml', true, 1, 'attribute'));
252          *
253          * @param object  $contents         The XML text
254          * @param boolean $namespaces       True or false include namespace information
255          *                                  in the returned array as array elements.
256          * @param integer $get_attributes   1 or 0. If this is 1 the function will get the attributes as well as the tag values -
257          *                                  this results in a different array structure in the return value.
258          * @param string  $priority         Can be 'tag' or 'attribute'. This will change the way the resulting
259          *                                  array sturcture. For 'tag', the tags are given more importance.
260          *
261          * @return array The parsed XML in an array form. Use print_r() to see the resulting array structure.
262          * @throws \Exception
263          */
264         public static function toArray($contents, $namespaces = true, $get_attributes = 1, $priority = 'attribute')
265         {
266                 if (!$contents) {
267                         return [];
268                 }
269
270                 if (!function_exists('xml_parser_create')) {
271                         Logger::log('Xml::toArray: parser function missing');
272                         return [];
273                 }
274
275
276                 libxml_use_internal_errors(true);
277                 libxml_clear_errors();
278
279                 if ($namespaces) {
280                         $parser = @xml_parser_create_ns("UTF-8", ':');
281                 } else {
282                         $parser = @xml_parser_create();
283                 }
284
285                 if (! $parser) {
286                         Logger::log('Xml::toArray: xml_parser_create: no resource');
287                         return [];
288                 }
289
290                 xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, "UTF-8");
291                 // http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
292                 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
293                 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
294                 @xml_parse_into_struct($parser, trim($contents), $xml_values);
295                 @xml_parser_free($parser);
296
297                 if (! $xml_values) {
298                         Logger::log('Xml::toArray: libxml: parse error: ' . $contents, Logger::DATA);
299                         foreach (libxml_get_errors() as $err) {
300                                 Logger::log('libxml: parse: ' . $err->code . " at " . $err->line . ":" . $err->column . " : " . $err->message, Logger::DATA);
301                         }
302                         libxml_clear_errors();
303                         return;
304                 }
305
306                 //Initializations
307                 $xml_array = [];
308
309                 $current = &$xml_array; // Reference
310
311                 // Go through the tags.
312                 $repeated_tag_index = []; // Multiple tags with same name will be turned into an array
313                 foreach ($xml_values as $data) {
314                         $tag        = $data['tag'];
315                         $type       = $data['type'];
316                         $level      = $data['level'];
317                         $attributes = isset($data['attributes']) ? $data['attributes'] : null;
318                         $value      = isset($data['value']) ? $data['value'] : null;
319
320                         $result = [];
321                         $attributes_data = [];
322
323                         if (isset($value)) {
324                                 if ($priority == 'tag') {
325                                         $result = $value;
326                                 } else {
327                                         $result['value'] = $value; // Put the value in a assoc array if we are in the 'Attribute' mode
328                                 }
329                         }
330
331                         //Set the attributes too.
332                         if (isset($attributes) and $get_attributes) {
333                                 foreach ($attributes as $attr => $val) {
334                                         if ($priority == 'tag') {
335                                                 $attributes_data[$attr] = $val;
336                                         } else {
337                                                 $result['@attributes'][$attr] = $val; // Set all the attributes in a array called 'attr'
338                                         }
339                                 }
340                         }
341
342                         // See tag status and do the needed.
343                         if ($namespaces && strpos($tag, ':')) {
344                                 $namespc = substr($tag, 0, strrpos($tag, ':'));
345                                 $tag = strtolower(substr($tag, strlen($namespc)+1));
346                                 $result['@namespace'] = $namespc;
347                         }
348                         $tag = strtolower($tag);
349
350                         if ($type == "open") {   // The starting of the tag '<tag>'
351                                 $parent[$level-1] = &$current;
352                                 if (!is_array($current) || (!in_array($tag, array_keys($current)))) { // Insert New tag
353                                         $current[$tag] = $result;
354                                         if ($attributes_data) {
355                                                 $current[$tag. '_attr'] = $attributes_data;
356                                         }
357                                         $repeated_tag_index[$tag.'_'.$level] = 1;
358
359                                         $current = &$current[$tag];
360                                 } else { // There was another element with the same tag name
361
362                                         if (isset($current[$tag][0])) { // If there is a 0th element it is already an array
363                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;
364                                                 $repeated_tag_index[$tag.'_'.$level]++;
365                                         } else { // This section will make the value an array if multiple tags with the same name appear together
366                                                 $current[$tag] = [$current[$tag], $result]; // This will combine the existing item and the new item together to make an array
367                                                 $repeated_tag_index[$tag.'_'.$level] = 2;
368
369                                                 if (isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well
370                                                         $current[$tag]['0_attr'] = $current[$tag.'_attr'];
371                                                         unset($current[$tag.'_attr']);
372                                                 }
373                                         }
374                                         $last_item_index = $repeated_tag_index[$tag.'_'.$level]-1;
375                                         $current = &$current[$tag][$last_item_index];
376                                 }
377                         } elseif ($type == "complete") { // Tags that ends in 1 line '<tag />'
378                                 //See if the key is already taken.
379                                 if (!isset($current[$tag])) { //New Key
380                                         $current[$tag] = $result;
381                                         $repeated_tag_index[$tag.'_'.$level] = 1;
382                                         if ($priority == 'tag' and $attributes_data) {
383                                                 $current[$tag. '_attr'] = $attributes_data;
384                                         }
385                                 } else { // If taken, put all things inside a list(array)
386                                         if (isset($current[$tag][0]) and is_array($current[$tag])) { // If it is already an array...
387
388                                                 // ...push the new element into that array.
389                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;
390
391                                                 if ($priority == 'tag' and $get_attributes and $attributes_data) {
392                                                         $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
393                                                 }
394                                                 $repeated_tag_index[$tag.'_'.$level]++;
395                                         } else { // If it is not an array...
396                                                 $current[$tag] = [$current[$tag], $result]; //...Make it an array using using the existing value and the new value
397                                                 $repeated_tag_index[$tag.'_'.$level] = 1;
398                                                 if ($priority == 'tag' and $get_attributes) {
399                                                         if (isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well
400
401                                                                 $current[$tag]['0_attr'] = $current[$tag.'_attr'];
402                                                                 unset($current[$tag.'_attr']);
403                                                         }
404
405                                                         if ($attributes_data) {
406                                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
407                                                         }
408                                                 }
409                                                 $repeated_tag_index[$tag.'_'.$level]++; // 0 and 1 indexes are already taken
410                                         }
411                                 }
412                         } elseif ($type == 'close') { // End of tag '</tag>'
413                                 $current = &$parent[$level-1];
414                         }
415                 }
416
417                 return($xml_array);
418         }
419
420         /**
421          * Delete a node in a XML object
422          *
423          * @param \DOMDocument $doc  XML document
424          * @param string $node Node name
425          * @return void
426          */
427         public static function deleteNode(\DOMDocument $doc, $node)
428         {
429                 $xpath = new DOMXPath($doc);
430                 $list = $xpath->query("//".$node);
431                 foreach ($list as $child) {
432                         $child->parentNode->removeChild($child);
433                 }
434         }
435
436         /**
437          * Parse XML string
438          *
439          * @param string  $s
440          * @param boolean $suppress_log
441          * @return Object
442          */
443         public static function parseString(string $s, bool $suppress_log = false)
444         {
445                 libxml_use_internal_errors(true);
446
447                 $x = @simplexml_load_string($s);
448                 if (!$x) {
449                         if (!$suppress_log) {
450                                 Logger::error('Error(s) while parsing XML string.', ['callstack' => System::callstack()]);
451                                 foreach (libxml_get_errors() as $err) {
452                                         Logger::info('libxml error', ['code' => $err->code, 'position' => $err->line . ":" . $err->column, 'message' => $err->message]);
453                                 }
454                                 Logger::debug('Erroring XML string', ['xml' => $s]);
455                         }
456                         libxml_clear_errors();
457                 }
458                 return $x;
459         }
460
461         public static function getFirstNodeValue(DOMXPath $xpath, $element, $context = null)
462         {
463                 $result = $xpath->evaluate($element, $context);
464                 if (!is_object($result)) {
465                         return '';
466                 }
467
468                 $first_item = $result->item(0);
469                 if (!is_object($first_item)) {
470                         return '';
471                 }
472
473                 return $first_item->nodeValue;
474         }
475
476         public static function getFirstAttributes(DOMXPath $xpath, $element, $context = null)
477         {
478                 $result = $xpath->query($element, $context);
479                 if (!is_object($result)) {
480                         return false;
481                 }
482
483                 $first_item = $result->item(0);
484                 if (!is_object($first_item)) {
485                         return false;
486                 }
487
488                 return $first_item->attributes;
489         }
490
491         /**
492          * escape text ($str) for XML transport
493          *
494          * @param string $str
495          * @return string Escaped text.
496          */
497         public static function escape($str)
498         {
499                 $buffer = htmlspecialchars($str, ENT_QUOTES, 'UTF-8');
500                 $buffer = trim($buffer);
501
502                 return $buffer;
503         }
504
505         /**
506          * undo an escape
507          *
508          * @param string $s xml escaped text
509          * @return string unescaped text
510          */
511         public static function unescape($s)
512         {
513                 $ret = htmlspecialchars_decode($s, ENT_QUOTES);
514                 return $ret;
515         }
516
517         /**
518          * apply escape() to all values of array $val, recursively
519          *
520          * @param array $val
521          * @return array|string
522          */
523         public static function arrayEscape($val)
524         {
525                 if (is_bool($val)) {
526                         return $val ? 'true' : 'false';
527                 } elseif (is_array($val)) {
528                         return array_map('XML::arrayEscape', $val);
529                 }
530
531                 return self::escape((string) $val);
532         }
533 }