]> git.mxchange.org Git - friendica.git/blob - library/HTMLPurifier/Strategy/MakeWellFormed.php
"Vier-Theme" Some more design stuff.
[friendica.git] / library / HTMLPurifier / Strategy / MakeWellFormed.php
1 <?php
2
3 /**
4  * Takes tokens makes them well-formed (balance end tags, etc.)
5  */
6 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
7 {
8
9     /**
10      * Array stream of tokens being processed.
11      */
12     protected $tokens;
13
14     /**
15      * Current index in $tokens.
16      */
17     protected $t;
18
19     /**
20      * Current nesting of elements.
21      */
22     protected $stack;
23
24     /**
25      * Injectors active in this stream processing.
26      */
27     protected $injectors;
28
29     /**
30      * Current instance of HTMLPurifier_Config.
31      */
32     protected $config;
33
34     /**
35      * Current instance of HTMLPurifier_Context.
36      */
37     protected $context;
38
39     public function execute($tokens, $config, $context) {
40
41         $definition = $config->getHTMLDefinition();
42
43         // local variables
44         $generator = new HTMLPurifier_Generator($config, $context);
45         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
46         $e = $context->get('ErrorCollector', true);
47         $t = false; // token index
48         $i = false; // injector index
49         $token      = false; // the current token
50         $reprocess  = false; // whether or not to reprocess the same token
51         $stack = array();
52
53         // member variables
54         $this->stack   =& $stack;
55         $this->t       =& $t;
56         $this->tokens  =& $tokens;
57         $this->config  = $config;
58         $this->context = $context;
59
60         // context variables
61         $context->register('CurrentNesting', $stack);
62         $context->register('InputIndex',     $t);
63         $context->register('InputTokens',    $tokens);
64         $context->register('CurrentToken',   $token);
65
66         // -- begin INJECTOR --
67
68         $this->injectors = array();
69
70         $injectors = $config->getBatch('AutoFormat');
71         $def_injectors = $definition->info_injector;
72         $custom_injectors = $injectors['Custom'];
73         unset($injectors['Custom']); // special case
74         foreach ($injectors as $injector => $b) {
75             // XXX: Fix with a legitimate lookup table of enabled filters
76             if (strpos($injector, '.') !== false) continue;
77             $injector = "HTMLPurifier_Injector_$injector";
78             if (!$b) continue;
79             $this->injectors[] = new $injector;
80         }
81         foreach ($def_injectors as $injector) {
82             // assumed to be objects
83             $this->injectors[] = $injector;
84         }
85         foreach ($custom_injectors as $injector) {
86             if (!$injector) continue;
87             if (is_string($injector)) {
88                 $injector = "HTMLPurifier_Injector_$injector";
89                 $injector = new $injector;
90             }
91             $this->injectors[] = $injector;
92         }
93
94         // give the injectors references to the definition and context
95         // variables for performance reasons
96         foreach ($this->injectors as $ix => $injector) {
97             $error = $injector->prepare($config, $context);
98             if (!$error) continue;
99             array_splice($this->injectors, $ix, 1); // rm the injector
100             trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
101         }
102
103         // -- end INJECTOR --
104
105         // a note on punting:
106         //      In order to reduce code duplication, whenever some code needs
107         //      to make HTML changes in order to make things "correct", the
108         //      new HTML gets sent through the purifier, regardless of its
109         //      status. This means that if we add a start token, because it
110         //      was totally necessary, we don't have to update nesting; we just
111         //      punt ($reprocess = true; continue;) and it does that for us.
112
113         // isset is in loop because $tokens size changes during loop exec
114         for (
115             $t = 0;
116             $t == 0 || isset($tokens[$t - 1]);
117             // only increment if we don't need to reprocess
118             $reprocess ? $reprocess = false : $t++
119         ) {
120
121             // check for a rewind
122             if (is_int($i) && $i >= 0) {
123                 // possibility: disable rewinding if the current token has a
124                 // rewind set on it already. This would offer protection from
125                 // infinite loop, but might hinder some advanced rewinding.
126                 $rewind_to = $this->injectors[$i]->getRewind();
127                 if (is_int($rewind_to) && $rewind_to < $t) {
128                     if ($rewind_to < 0) $rewind_to = 0;
129                     while ($t > $rewind_to) {
130                         $t--;
131                         $prev = $tokens[$t];
132                         // indicate that other injectors should not process this token,
133                         // but we need to reprocess it
134                         unset($prev->skip[$i]);
135                         $prev->rewind = $i;
136                         if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
137                         elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
138                     }
139                 }
140                 $i = false;
141             }
142
143             // handle case of document end
144             if (!isset($tokens[$t])) {
145                 // kill processing if stack is empty
146                 if (empty($this->stack)) break;
147
148                 // peek
149                 $top_nesting = array_pop($this->stack);
150                 $this->stack[] = $top_nesting;
151
152                 // send error
153                 if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
154                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
155                 }
156
157                 // append, don't splice, since this is the end
158                 $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
159
160                 // punt!
161                 $reprocess = true;
162                 continue;
163             }
164
165             $token = $tokens[$t];
166
167             //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
168             //flush();
169
170             // quick-check: if it's not a tag, no need to process
171             if (empty($token->is_tag)) {
172                 if ($token instanceof HTMLPurifier_Token_Text) {
173                     foreach ($this->injectors as $i => $injector) {
174                         if (isset($token->skip[$i])) continue;
175                         if ($token->rewind !== null && $token->rewind !== $i) continue;
176                         $injector->handleText($token);
177                         $this->processToken($token, $i);
178                         $reprocess = true;
179                         break;
180                     }
181                 }
182                 // another possibility is a comment
183                 continue;
184             }
185
186             if (isset($definition->info[$token->name])) {
187                 $type = $definition->info[$token->name]->child->type;
188             } else {
189                 $type = false; // Type is unknown, treat accordingly
190             }
191
192             // quick tag checks: anything that's *not* an end tag
193             $ok = false;
194             if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
195                 // claims to be a start tag but is empty
196                 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
197                 $ok = true;
198             } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
199                 // claims to be empty but really is a start tag
200                 $this->swap(new HTMLPurifier_Token_End($token->name));
201                 $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
202                 // punt (since we had to modify the input stream in a non-trivial way)
203                 $reprocess = true;
204                 continue;
205             } elseif ($token instanceof HTMLPurifier_Token_Empty) {
206                 // real empty token
207                 $ok = true;
208             } elseif ($token instanceof HTMLPurifier_Token_Start) {
209                 // start tag
210
211                 // ...unless they also have to close their parent
212                 if (!empty($this->stack)) {
213
214                     $parent = array_pop($this->stack);
215                     $this->stack[] = $parent;
216
217                     if (isset($definition->info[$parent->name])) {
218                         $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
219                         $autoclose = !isset($elements[$token->name]);
220                     } else {
221                         $autoclose = false;
222                     }
223
224                     if ($autoclose && $definition->info[$token->name]->wrap) {
225                         // Check if an element can be wrapped by another 
226                         // element to make it valid in a context (for 
227                         // example, <ul><ul> needs a <li> in between)
228                         $wrapname = $definition->info[$token->name]->wrap;
229                         $wrapdef = $definition->info[$wrapname];
230                         $elements = $wrapdef->child->getAllowedElements($config);
231                         $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
232                         if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
233                             $newtoken = new HTMLPurifier_Token_Start($wrapname);
234                             $this->insertBefore($newtoken);
235                             $reprocess = true;
236                             continue;
237                         }
238                     }
239
240                     $carryover = false;
241                     if ($autoclose && $definition->info[$parent->name]->formatting) {
242                         $carryover = true;
243                     }
244
245                     if ($autoclose) {
246                         // errors need to be updated
247                         $new_token = new HTMLPurifier_Token_End($parent->name);
248                         $new_token->start = $parent;
249                         if ($carryover) {
250                             $element = clone $parent;
251                             $element->armor['MakeWellFormed_TagClosedError'] = true;
252                             $element->carryover = true;
253                             $this->processToken(array($new_token, $token, $element));
254                         } else {
255                             $this->insertBefore($new_token);
256                         }
257                         if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
258                             if (!$carryover) {
259                                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
260                             } else {
261                                 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
262                             }
263                         }
264                         $reprocess = true;
265                         continue;
266                     }
267
268                 }
269                 $ok = true;
270             }
271
272             if ($ok) {
273                 foreach ($this->injectors as $i => $injector) {
274                     if (isset($token->skip[$i])) continue;
275                     if ($token->rewind !== null && $token->rewind !== $i) continue;
276                     $injector->handleElement($token);
277                     $this->processToken($token, $i);
278                     $reprocess = true;
279                     break;
280                 }
281                 if (!$reprocess) {
282                     // ah, nothing interesting happened; do normal processing
283                     $this->swap($token);
284                     if ($token instanceof HTMLPurifier_Token_Start) {
285                         $this->stack[] = $token;
286                     } elseif ($token instanceof HTMLPurifier_Token_End) {
287                         throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
288                     }
289                 }
290                 continue;
291             }
292
293             // sanity check: we should be dealing with a closing tag
294             if (!$token instanceof HTMLPurifier_Token_End) {
295                 throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
296             }
297
298             // make sure that we have something open
299             if (empty($this->stack)) {
300                 if ($escape_invalid_tags) {
301                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
302                     $this->swap(new HTMLPurifier_Token_Text(
303                         $generator->generateFromToken($token)
304                     ));
305                 } else {
306                     $this->remove();
307                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
308                 }
309                 $reprocess = true;
310                 continue;
311             }
312
313             // first, check for the simplest case: everything closes neatly.
314             // Eventually, everything passes through here; if there are problems
315             // we modify the input stream accordingly and then punt, so that
316             // the tokens get processed again.
317             $current_parent = array_pop($this->stack);
318             if ($current_parent->name == $token->name) {
319                 $token->start = $current_parent;
320                 foreach ($this->injectors as $i => $injector) {
321                     if (isset($token->skip[$i])) continue;
322                     if ($token->rewind !== null && $token->rewind !== $i) continue;
323                     $injector->handleEnd($token);
324                     $this->processToken($token, $i);
325                     $this->stack[] = $current_parent;
326                     $reprocess = true;
327                     break;
328                 }
329                 continue;
330             }
331
332             // okay, so we're trying to close the wrong tag
333
334             // undo the pop previous pop
335             $this->stack[] = $current_parent;
336
337             // scroll back the entire nest, trying to find our tag.
338             // (feature could be to specify how far you'd like to go)
339             $size = count($this->stack);
340             // -2 because -1 is the last element, but we already checked that
341             $skipped_tags = false;
342             for ($j = $size - 2; $j >= 0; $j--) {
343                 if ($this->stack[$j]->name == $token->name) {
344                     $skipped_tags = array_slice($this->stack, $j);
345                     break;
346                 }
347             }
348
349             // we didn't find the tag, so remove
350             if ($skipped_tags === false) {
351                 if ($escape_invalid_tags) {
352                     $this->swap(new HTMLPurifier_Token_Text(
353                         $generator->generateFromToken($token)
354                     ));
355                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
356                 } else {
357                     $this->remove();
358                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
359                 }
360                 $reprocess = true;
361                 continue;
362             }
363
364             // do errors, in REVERSE $j order: a,b,c with </a></b></c>
365             $c = count($skipped_tags);
366             if ($e) {
367                 for ($j = $c - 1; $j > 0; $j--) {
368                     // notice we exclude $j == 0, i.e. the current ending tag, from
369                     // the errors...
370                     if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
371                         $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
372                     }
373                 }
374             }
375
376             // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
377             $replace = array($token);
378             for ($j = 1; $j < $c; $j++) {
379                 // ...as well as from the insertions
380                 $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
381                 $new_token->start = $skipped_tags[$j];
382                 array_unshift($replace, $new_token);
383                 if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
384                     $element = clone $skipped_tags[$j];
385                     $element->carryover = true;
386                     $element->armor['MakeWellFormed_TagClosedError'] = true;
387                     $replace[] = $element;
388                 }
389             }
390             $this->processToken($replace);
391             $reprocess = true;
392             continue;
393         }
394
395         $context->destroy('CurrentNesting');
396         $context->destroy('InputTokens');
397         $context->destroy('InputIndex');
398         $context->destroy('CurrentToken');
399
400         unset($this->injectors, $this->stack, $this->tokens, $this->t);
401         return $tokens;
402     }
403
404     /**
405      * Processes arbitrary token values for complicated substitution patterns.
406      * In general:
407      *
408      * If $token is an array, it is a list of tokens to substitute for the
409      * current token. These tokens then get individually processed. If there
410      * is a leading integer in the list, that integer determines how many
411      * tokens from the stream should be removed.
412      *
413      * If $token is a regular token, it is swapped with the current token.
414      *
415      * If $token is false, the current token is deleted.
416      *
417      * If $token is an integer, that number of tokens (with the first token
418      * being the current one) will be deleted.
419      *
420      * @param $token Token substitution value
421      * @param $injector Injector that performed the substitution; default is if
422      *        this is not an injector related operation.
423      */
424     protected function processToken($token, $injector = -1) {
425
426         // normalize forms of token
427         if (is_object($token)) $token = array(1, $token);
428         if (is_int($token))    $token = array($token);
429         if ($token === false)  $token = array(1);
430         if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
431         if (!is_int($token[0])) array_unshift($token, 1);
432         if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
433
434         // $token is now an array with the following form:
435         // array(number nodes to delete, new node 1, new node 2, ...)
436
437         $delete = array_shift($token);
438         $old = array_splice($this->tokens, $this->t, $delete, $token);
439
440         if ($injector > -1) {
441             // determine appropriate skips
442             $oldskip = isset($old[0]) ? $old[0]->skip : array();
443             foreach ($token as $object) {
444                 $object->skip = $oldskip;
445                 $object->skip[$injector] = true;
446             }
447         }
448
449     }
450
451     /**
452      * Inserts a token before the current token. Cursor now points to this token
453      */
454     private function insertBefore($token) {
455         array_splice($this->tokens, $this->t, 0, array($token));
456     }
457
458     /**
459      * Removes current token. Cursor now points to new token occupying previously
460      * occupied space.
461      */
462     private function remove() {
463         array_splice($this->tokens, $this->t, 1);
464     }
465
466     /**
467      * Swap current token with new token. Cursor points to new token (no change).
468      */
469     private function swap($token) {
470         $this->tokens[$this->t] = $token;
471     }
472
473 }
474
475 // vim: et sw=4 sts=4