]> git.mxchange.org Git - quix0rs-gnu-social.git/blob - extlib/Michelf/Markdown.php
neo-quitter unuglification by marcus, merge-request 44
[quix0rs-gnu-social.git] / extlib / Michelf / Markdown.php
1 <?php
2 #
3 # Markdown  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown  
6 # Copyright (c) 2004-2014 Michel Fortin  
7 # <http://michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown  
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
13 namespace Michelf;
14
15
16 #
17 # Markdown Parser Class
18 #
19
20 class Markdown implements MarkdownInterface {
21
22         ### Version ###
23
24         const  MARKDOWNLIB_VERSION  =  "1.4.1";
25
26         ### Simple Function Interface ###
27
28         public static function defaultTransform($text) {
29         #
30         # Initialize the parser and return the result of its transform method.
31         # This will work fine for derived classes too.
32         #
33                 # Take parser class on which this function was called.
34                 $parser_class = \get_called_class();
35
36                 # try to take parser from the static parser list
37                 static $parser_list;
38                 $parser =& $parser_list[$parser_class];
39
40                 # create the parser it not already set
41                 if (!$parser)
42                         $parser = new $parser_class;
43
44                 # Transform text using parser.
45                 return $parser->transform($text);
46         }
47
48         ### Configuration Variables ###
49
50         # Change to ">" for HTML output.
51         public $empty_element_suffix = " />";
52         public $tab_width = 4;
53         
54         # Change to `true` to disallow markup or entities.
55         public $no_markup = false;
56         public $no_entities = false;
57         
58         # Predefined urls and titles for reference links and images.
59         public $predef_urls = array();
60         public $predef_titles = array();
61
62         # Optional filter function for URLs
63         public $url_filter_func = null;
64
65
66         ### Parser Implementation ###
67
68         # Regex to match balanced [brackets].
69         # Needed to insert a maximum bracked depth while converting to PHP.
70         protected $nested_brackets_depth = 6;
71         protected $nested_brackets_re;
72         
73         protected $nested_url_parenthesis_depth = 4;
74         protected $nested_url_parenthesis_re;
75
76         # Table of hash values for escaped characters:
77         protected $escape_chars = '\`*_{}[]()>#+-.!';
78         protected $escape_chars_re;
79
80
81         public function __construct() {
82         #
83         # Constructor function. Initialize appropriate member variables.
84         #
85                 $this->_initDetab();
86                 $this->prepareItalicsAndBold();
87         
88                 $this->nested_brackets_re = 
89                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
90                         str_repeat('\])*', $this->nested_brackets_depth);
91         
92                 $this->nested_url_parenthesis_re = 
93                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
94                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
95                 
96                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
97                 
98                 # Sort document, block, and span gamut in ascendent priority order.
99                 asort($this->document_gamut);
100                 asort($this->block_gamut);
101                 asort($this->span_gamut);
102         }
103
104
105         # Internal hashes used during transformation.
106         protected $urls = array();
107         protected $titles = array();
108         protected $html_hashes = array();
109         
110         # Status flag to avoid invalid nesting.
111         protected $in_anchor = false;
112         
113         
114         protected function setup() {
115         #
116         # Called before the transformation process starts to setup parser 
117         # states.
118         #
119                 # Clear global hashes.
120                 $this->urls = $this->predef_urls;
121                 $this->titles = $this->predef_titles;
122                 $this->html_hashes = array();
123                 
124                 $this->in_anchor = false;
125         }
126         
127         protected function teardown() {
128         #
129         # Called after the transformation process to clear any variable 
130         # which may be taking up memory unnecessarly.
131         #
132                 $this->urls = array();
133                 $this->titles = array();
134                 $this->html_hashes = array();
135         }
136
137
138         public function transform($text) {
139         #
140         # Main function. Performs some preprocessing on the input text
141         # and pass it through the document gamut.
142         #
143                 $this->setup();
144         
145                 # Remove UTF-8 BOM and marker character in input, if present.
146                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
147
148                 # Standardize line endings:
149                 #   DOS to Unix and Mac to Unix
150                 $text = preg_replace('{\r\n?}', "\n", $text);
151
152                 # Make sure $text ends with a couple of newlines:
153                 $text .= "\n\n";
154
155                 # Convert all tabs to spaces.
156                 $text = $this->detab($text);
157
158                 # Turn block-level HTML blocks into hash entries
159                 $text = $this->hashHTMLBlocks($text);
160
161                 # Strip any lines consisting only of spaces and tabs.
162                 # This makes subsequent regexen easier to write, because we can
163                 # match consecutive blank lines with /\n+/ instead of something
164                 # contorted like /[ ]*\n+/ .
165                 $text = preg_replace('/^[ ]+$/m', '', $text);
166
167                 # Run document gamut methods.
168                 foreach ($this->document_gamut as $method => $priority) {
169                         $text = $this->$method($text);
170                 }
171                 
172                 $this->teardown();
173
174                 return $text . "\n";
175         }
176         
177         protected $document_gamut = array(
178                 # Strip link definitions, store in hashes.
179                 "stripLinkDefinitions" => 20,
180                 
181                 "runBasicBlockGamut"   => 30,
182                 );
183
184
185         protected function stripLinkDefinitions($text) {
186         #
187         # Strips link definitions from text, stores the URLs and titles in
188         # hash references.
189         #
190                 $less_than_tab = $this->tab_width - 1;
191
192                 # Link defs are in the form: ^[id]: url "optional title"
193                 $text = preg_replace_callback('{
194                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
195                                                           [ ]*
196                                                           \n?                           # maybe *one* newline
197                                                           [ ]*
198                                                         (?:
199                                                           <(.+?)>                       # url = $2
200                                                         |
201                                                           (\S+?)                        # url = $3
202                                                         )
203                                                           [ ]*
204                                                           \n?                           # maybe one newline
205                                                           [ ]*
206                                                         (?:
207                                                                 (?<=\s)                 # lookbehind for whitespace
208                                                                 ["(]
209                                                                 (.*?)                   # title = $4
210                                                                 [")]
211                                                                 [ ]*
212                                                         )?      # title is optional
213                                                         (?:\n+|\Z)
214                         }xm',
215                         array($this, '_stripLinkDefinitions_callback'),
216                         $text);
217                 return $text;
218         }
219         protected function _stripLinkDefinitions_callback($matches) {
220                 $link_id = strtolower($matches[1]);
221                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
222                 $this->urls[$link_id] = $url;
223                 $this->titles[$link_id] =& $matches[4];
224                 return ''; # String that will replace the block
225         }
226
227
228         protected function hashHTMLBlocks($text) {
229                 if ($this->no_markup)  return $text;
230
231                 $less_than_tab = $this->tab_width - 1;
232
233                 # Hashify HTML blocks:
234                 # We only want to do this for block-level HTML tags, such as headers,
235                 # lists, and tables. That's because we still want to wrap <p>s around
236                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
237                 # phrase emphasis, and spans. The list of tags we're looking for is
238                 # hard-coded:
239                 #
240                 # *  List "a" is made of tags which can be both inline or block-level.
241                 #    These will be treated block-level when the start tag is alone on 
242                 #    its line, otherwise they're not matched here and will be taken as 
243                 #    inline later.
244                 # *  List "b" is made of tags which are always block-level;
245                 #
246                 $block_tags_a_re = 'ins|del';
247                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
248                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
249                                                    'article|section|nav|aside|hgroup|header|footer|'.
250                                                    'figure';
251
252                 # Regular expression for the content of a block tag.
253                 $nested_tags_level = 4;
254                 $attr = '
255                         (?>                             # optional tag attributes
256                           \s                    # starts with whitespace
257                           (?>
258                                 [^>"/]+         # text outside quotes
259                           |
260                                 /+(?!>)         # slash not followed by ">"
261                           |
262                                 "[^"]*"         # text inside double quotes (tolerate ">")
263                           |
264                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
265                           )*
266                         )?      
267                         ';
268                 $content =
269                         str_repeat('
270                                 (?>
271                                   [^<]+                 # content without tag
272                                 |
273                                   <\2                   # nested opening tag
274                                         '.$attr.'       # attributes
275                                         (?>
276                                           />
277                                         |
278                                           >', $nested_tags_level).      # end of opening tag
279                                           '.*?'.                                        # last level nested tag content
280                         str_repeat('
281                                           </\2\s*>      # closing nested tag
282                                         )
283                                   |                             
284                                         <(?!/\2\s*>     # other tags with a different name
285                                   )
286                                 )*',
287                                 $nested_tags_level);
288                 $content2 = str_replace('\2', '\3', $content);
289
290                 # First, look for nested blocks, e.g.:
291                 #       <div>
292                 #               <div>
293                 #               tags for inner block must be indented.
294                 #               </div>
295                 #       </div>
296                 #
297                 # The outermost tags must start at the left margin for this to match, and
298                 # the inner nested divs must be indented.
299                 # We need to do this before the next, more liberal match, because the next
300                 # match will start at the first `<div>` and stop at the first `</div>`.
301                 $text = preg_replace_callback('{(?>
302                         (?>
303                                 (?<=\n)                 # Starting on its own line
304                                 |                               # or
305                                 \A\n?                   # the at beginning of the doc
306                         )
307                         (                                               # save in $1
308
309                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
310                           # in between.
311                                         
312                                                 [ ]{0,'.$less_than_tab.'}
313                                                 <('.$block_tags_b_re.')# start tag = $2
314                                                 '.$attr.'>                      # attributes followed by > and \n
315                                                 '.$content.'            # content, support nesting
316                                                 </\2>                           # the matching end tag
317                                                 [ ]*                            # trailing spaces/tabs
318                                                 (?=\n+|\Z)      # followed by a newline or end of document
319
320                         | # Special version for tags of group a.
321
322                                                 [ ]{0,'.$less_than_tab.'}
323                                                 <('.$block_tags_a_re.')# start tag = $3
324                                                 '.$attr.'>[ ]*\n        # attributes followed by >
325                                                 '.$content2.'           # content, support nesting
326                                                 </\3>                           # the matching end tag
327                                                 [ ]*                            # trailing spaces/tabs
328                                                 (?=\n+|\Z)      # followed by a newline or end of document
329                                         
330                         | # Special case just for <hr />. It was easier to make a special 
331                           # case than to make the other regex more complicated.
332                         
333                                                 [ ]{0,'.$less_than_tab.'}
334                                                 <(hr)                           # start tag = $2
335                                                 '.$attr.'                       # attributes
336                                                 /?>                                     # the matching end tag
337                                                 [ ]*
338                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
339                         
340                         | # Special case for standalone HTML comments:
341                         
342                                         [ ]{0,'.$less_than_tab.'}
343                                         (?s:
344                                                 <!-- .*? -->
345                                         )
346                                         [ ]*
347                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
348                         
349                         | # PHP and ASP-style processor instructions (<? and <%)
350                         
351                                         [ ]{0,'.$less_than_tab.'}
352                                         (?s:
353                                                 <([?%])                 # $2
354                                                 .*?
355                                                 \2>
356                                         )
357                                         [ ]*
358                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
359                                         
360                         )
361                         )}Sxmi',
362                         array($this, '_hashHTMLBlocks_callback'),
363                         $text);
364
365                 return $text;
366         }
367         protected function _hashHTMLBlocks_callback($matches) {
368                 $text = $matches[1];
369                 $key  = $this->hashBlock($text);
370                 return "\n\n$key\n\n";
371         }
372         
373         
374         protected function hashPart($text, $boundary = 'X') {
375         #
376         # Called whenever a tag must be hashed when a function insert an atomic 
377         # element in the text stream. Passing $text to through this function gives
378         # a unique text-token which will be reverted back when calling unhash.
379         #
380         # The $boundary argument specify what character should be used to surround
381         # the token. By convension, "B" is used for block elements that needs not
382         # to be wrapped into paragraph tags at the end, ":" is used for elements
383         # that are word separators and "X" is used in the general case.
384         #
385                 # Swap back any tag hash found in $text so we do not have to `unhash`
386                 # multiple times at the end.
387                 $text = $this->unhash($text);
388                 
389                 # Then hash the block.
390                 static $i = 0;
391                 $key = "$boundary\x1A" . ++$i . $boundary;
392                 $this->html_hashes[$key] = $text;
393                 return $key; # String that will replace the tag.
394         }
395
396
397         protected function hashBlock($text) {
398         #
399         # Shortcut function for hashPart with block-level boundaries.
400         #
401                 return $this->hashPart($text, 'B');
402         }
403
404
405         protected $block_gamut = array(
406         #
407         # These are all the transformations that form block-level
408         # tags like paragraphs, headers, and list items.
409         #
410                 "doHeaders"         => 10,
411                 "doHorizontalRules" => 20,
412                 
413                 "doLists"           => 40,
414                 "doCodeBlocks"      => 50,
415                 "doBlockQuotes"     => 60,
416                 );
417
418         protected function runBlockGamut($text) {
419         #
420         # Run block gamut tranformations.
421         #
422                 # We need to escape raw HTML in Markdown source before doing anything 
423                 # else. This need to be done for each block, and not only at the 
424                 # begining in the Markdown function since hashed blocks can be part of
425                 # list items and could have been indented. Indented blocks would have 
426                 # been seen as a code block in a previous pass of hashHTMLBlocks.
427                 $text = $this->hashHTMLBlocks($text);
428                 
429                 return $this->runBasicBlockGamut($text);
430         }
431         
432         protected function runBasicBlockGamut($text) {
433         #
434         # Run block gamut tranformations, without hashing HTML blocks. This is 
435         # useful when HTML blocks are known to be already hashed, like in the first
436         # whole-document pass.
437         #
438                 foreach ($this->block_gamut as $method => $priority) {
439                         $text = $this->$method($text);
440                 }
441                 
442                 # Finally form paragraph and restore hashed blocks.
443                 $text = $this->formParagraphs($text);
444
445                 return $text;
446         }
447         
448         
449         protected function doHorizontalRules($text) {
450                 # Do Horizontal Rules:
451                 return preg_replace(
452                         '{
453                                 ^[ ]{0,3}       # Leading space
454                                 ([-*_])         # $1: First marker
455                                 (?>                     # Repeated marker group
456                                         [ ]{0,2}        # Zero, one, or two spaces.
457                                         \1                      # Marker character
458                                 ){2,}           # Group repeated at least twice
459                                 [ ]*            # Tailing spaces
460                                 $                       # End of line.
461                         }mx',
462                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
463                         $text);
464         }
465
466
467         protected $span_gamut = array(
468         #
469         # These are all the transformations that occur *within* block-level
470         # tags like paragraphs, headers, and list items.
471         #
472                 # Process character escapes, code spans, and inline HTML
473                 # in one shot.
474                 "parseSpan"           => -30,
475
476                 # Process anchor and image tags. Images must come first,
477                 # because ![foo][f] looks like an anchor.
478                 "doImages"            =>  10,
479                 "doAnchors"           =>  20,
480                 
481                 # Make links out of things like `<http://example.com/>`
482                 # Must come after doAnchors, because you can use < and >
483                 # delimiters in inline links like [this](<url>).
484                 "doAutoLinks"         =>  30,
485                 "encodeAmpsAndAngles" =>  40,
486
487                 "doItalicsAndBold"    =>  50,
488                 "doHardBreaks"        =>  60,
489                 );
490
491         protected function runSpanGamut($text) {
492         #
493         # Run span gamut tranformations.
494         #
495                 foreach ($this->span_gamut as $method => $priority) {
496                         $text = $this->$method($text);
497                 }
498
499                 return $text;
500         }
501         
502         
503         protected function doHardBreaks($text) {
504                 # Do hard breaks:
505                 return preg_replace_callback('/ {2,}\n/', 
506                         array($this, '_doHardBreaks_callback'), $text);
507         }
508         protected function _doHardBreaks_callback($matches) {
509                 return $this->hashPart("<br$this->empty_element_suffix\n");
510         }
511
512
513         protected function doAnchors($text) {
514         #
515         # Turn Markdown link shortcuts into XHTML <a> tags.
516         #
517                 if ($this->in_anchor) return $text;
518                 $this->in_anchor = true;
519                 
520                 #
521                 # First, handle reference-style links: [link text] [id]
522                 #
523                 $text = preg_replace_callback('{
524                         (                                       # wrap whole match in $1
525                           \[
526                                 ('.$this->nested_brackets_re.') # link text = $2
527                           \]
528
529                           [ ]?                          # one optional space
530                           (?:\n[ ]*)?           # one optional newline followed by spaces
531
532                           \[
533                                 (.*?)           # id = $3
534                           \]
535                         )
536                         }xs',
537                         array($this, '_doAnchors_reference_callback'), $text);
538
539                 #
540                 # Next, inline-style links: [link text](url "optional title")
541                 #
542                 $text = preg_replace_callback('{
543                         (                               # wrap whole match in $1
544                           \[
545                                 ('.$this->nested_brackets_re.') # link text = $2
546                           \]
547                           \(                    # literal paren
548                                 [ \n]*
549                                 (?:
550                                         <(.+?)> # href = $3
551                                 |
552                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
553                                 )
554                                 [ \n]*
555                                 (                       # $5
556                                   ([\'"])       # quote char = $6
557                                   (.*?)         # Title = $7
558                                   \6            # matching quote
559                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
560                                 )?                      # title is optional
561                           \)
562                         )
563                         }xs',
564                         array($this, '_doAnchors_inline_callback'), $text);
565
566                 #
567                 # Last, handle reference-style shortcuts: [link text]
568                 # These must come last in case you've also got [link text][1]
569                 # or [link text](/foo)
570                 #
571                 $text = preg_replace_callback('{
572                         (                                       # wrap whole match in $1
573                           \[
574                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
575                           \]
576                         )
577                         }xs',
578                         array($this, '_doAnchors_reference_callback'), $text);
579
580                 $this->in_anchor = false;
581                 return $text;
582         }
583         protected function _doAnchors_reference_callback($matches) {
584                 $whole_match =  $matches[1];
585                 $link_text   =  $matches[2];
586                 $link_id     =& $matches[3];
587
588                 if ($link_id == "") {
589                         # for shortcut links like [this][] or [this].
590                         $link_id = $link_text;
591                 }
592                 
593                 # lower-case and turn embedded newlines into spaces
594                 $link_id = strtolower($link_id);
595                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
596
597                 if (isset($this->urls[$link_id])) {
598                         $url = $this->urls[$link_id];
599                         $url = $this->encodeURLAttribute($url);
600                         
601                         $result = "<a href=\"$url\"";
602                         if ( isset( $this->titles[$link_id] ) ) {
603                                 $title = $this->titles[$link_id];
604                                 $title = $this->encodeAttribute($title);
605                                 $result .=  " title=\"$title\"";
606                         }
607                 
608                         $link_text = $this->runSpanGamut($link_text);
609                         $result .= ">$link_text</a>";
610                         $result = $this->hashPart($result);
611                 }
612                 else {
613                         $result = $whole_match;
614                 }
615                 return $result;
616         }
617         protected function _doAnchors_inline_callback($matches) {
618                 $whole_match    =  $matches[1];
619                 $link_text              =  $this->runSpanGamut($matches[2]);
620                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
621                 $title                  =& $matches[7];
622
623                 // if the URL was of the form <s p a c e s> it got caught by the HTML
624                 // tag parser and hashed. Need to reverse the process before using the URL.
625                 $unhashed = $this->unhash($url);
626                 if ($unhashed != $url)
627                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
628
629                 $url = $this->encodeURLAttribute($url);
630
631                 $result = "<a href=\"$url\"";
632                 if (isset($title)) {
633                         $title = $this->encodeAttribute($title);
634                         $result .=  " title=\"$title\"";
635                 }
636                 
637                 $link_text = $this->runSpanGamut($link_text);
638                 $result .= ">$link_text</a>";
639
640                 return $this->hashPart($result);
641         }
642
643
644         protected function doImages($text) {
645         #
646         # Turn Markdown image shortcuts into <img> tags.
647         #
648                 #
649                 # First, handle reference-style labeled images: ![alt text][id]
650                 #
651                 $text = preg_replace_callback('{
652                         (                               # wrap whole match in $1
653                           !\[
654                                 ('.$this->nested_brackets_re.')         # alt text = $2
655                           \]
656
657                           [ ]?                          # one optional space
658                           (?:\n[ ]*)?           # one optional newline followed by spaces
659
660                           \[
661                                 (.*?)           # id = $3
662                           \]
663
664                         )
665                         }xs', 
666                         array($this, '_doImages_reference_callback'), $text);
667
668                 #
669                 # Next, handle inline images:  ![alt text](url "optional title")
670                 # Don't forget: encode * and _
671                 #
672                 $text = preg_replace_callback('{
673                         (                               # wrap whole match in $1
674                           !\[
675                                 ('.$this->nested_brackets_re.')         # alt text = $2
676                           \]
677                           \s?                   # One optional whitespace character
678                           \(                    # literal paren
679                                 [ \n]*
680                                 (?:
681                                         <(\S*)> # src url = $3
682                                 |
683                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
684                                 )
685                                 [ \n]*
686                                 (                       # $5
687                                   ([\'"])       # quote char = $6
688                                   (.*?)         # title = $7
689                                   \6            # matching quote
690                                   [ \n]*
691                                 )?                      # title is optional
692                           \)
693                         )
694                         }xs',
695                         array($this, '_doImages_inline_callback'), $text);
696
697                 return $text;
698         }
699         protected function _doImages_reference_callback($matches) {
700                 $whole_match = $matches[1];
701                 $alt_text    = $matches[2];
702                 $link_id     = strtolower($matches[3]);
703
704                 if ($link_id == "") {
705                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
706                 }
707
708                 $alt_text = $this->encodeAttribute($alt_text);
709                 if (isset($this->urls[$link_id])) {
710                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
711                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
712                         if (isset($this->titles[$link_id])) {
713                                 $title = $this->titles[$link_id];
714                                 $title = $this->encodeAttribute($title);
715                                 $result .=  " title=\"$title\"";
716                         }
717                         $result .= $this->empty_element_suffix;
718                         $result = $this->hashPart($result);
719                 }
720                 else {
721                         # If there's no such link ID, leave intact:
722                         $result = $whole_match;
723                 }
724
725                 return $result;
726         }
727         protected function _doImages_inline_callback($matches) {
728                 $whole_match    = $matches[1];
729                 $alt_text               = $matches[2];
730                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
731                 $title                  =& $matches[7];
732
733                 $alt_text = $this->encodeAttribute($alt_text);
734                 $url = $this->encodeURLAttribute($url);
735                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
736                 if (isset($title)) {
737                         $title = $this->encodeAttribute($title);
738                         $result .=  " title=\"$title\""; # $title already quoted
739                 }
740                 $result .= $this->empty_element_suffix;
741
742                 return $this->hashPart($result);
743         }
744
745
746         protected function doHeaders($text) {
747                 # Setext-style headers:
748                 #         Header 1
749                 #         ========
750                 #  
751                 #         Header 2
752                 #         --------
753                 #
754                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
755                         array($this, '_doHeaders_callback_setext'), $text);
756
757                 # atx-style headers:
758                 #       # Header 1
759                 #       ## Header 2
760                 #       ## Header 2 with closing hashes ##
761                 #       ...
762                 #       ###### Header 6
763                 #
764                 $text = preg_replace_callback('{
765                                 ^(\#{1,6})      # $1 = string of #\'s
766                                 [ ]*
767                                 (.+?)           # $2 = Header text
768                                 [ ]*
769                                 \#*                     # optional closing #\'s (not counted)
770                                 \n+
771                         }xm',
772                         array($this, '_doHeaders_callback_atx'), $text);
773
774                 return $text;
775         }
776         protected function _doHeaders_callback_setext($matches) {
777                 # Terrible hack to check we haven't found an empty list item.
778                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
779                         return $matches[0];
780                 
781                 $level = $matches[2]{0} == '=' ? 1 : 2;
782                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
783                 return "\n" . $this->hashBlock($block) . "\n\n";
784         }
785         protected function _doHeaders_callback_atx($matches) {
786                 $level = strlen($matches[1]);
787                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
788                 return "\n" . $this->hashBlock($block) . "\n\n";
789         }
790
791
792         protected function doLists($text) {
793         #
794         # Form HTML ordered (numbered) and unordered (bulleted) lists.
795         #
796                 $less_than_tab = $this->tab_width - 1;
797
798                 # Re-usable patterns to match list item bullets and number markers:
799                 $marker_ul_re  = '[*+-]';
800                 $marker_ol_re  = '\d+[\.]';
801
802                 $markers_relist = array(
803                         $marker_ul_re => $marker_ol_re,
804                         $marker_ol_re => $marker_ul_re,
805                         );
806
807                 foreach ($markers_relist as $marker_re => $other_marker_re) {
808                         # Re-usable pattern to match any entirel ul or ol list:
809                         $whole_list_re = '
810                                 (                                                               # $1 = whole list
811                                   (                                                             # $2
812                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
813                                         ('.$marker_re.')                        # $4 = first list item marker
814                                         [ ]+
815                                   )
816                                   (?s:.+?)
817                                   (                                                             # $5
818                                           \z
819                                         |
820                                           \n{2,}
821                                           (?=\S)
822                                           (?!                                           # Negative lookahead for another list item marker
823                                                 [ ]*
824                                                 '.$marker_re.'[ ]+
825                                           )
826                                         |
827                                           (?=                                           # Lookahead for another kind of list
828                                             \n
829                                                 \3                                              # Must have the same indentation
830                                                 '.$other_marker_re.'[ ]+
831                                           )
832                                   )
833                                 )
834                         '; // mx
835                         
836                         # We use a different prefix before nested lists than top-level lists.
837                         # See extended comment in _ProcessListItems().
838                 
839                         if ($this->list_level) {
840                                 $text = preg_replace_callback('{
841                                                 ^
842                                                 '.$whole_list_re.'
843                                         }mx',
844                                         array($this, '_doLists_callback'), $text);
845                         }
846                         else {
847                                 $text = preg_replace_callback('{
848                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
849                                                 '.$whole_list_re.'
850                                         }mx',
851                                         array($this, '_doLists_callback'), $text);
852                         }
853                 }
854
855                 return $text;
856         }
857         protected function _doLists_callback($matches) {
858                 # Re-usable patterns to match list item bullets and number markers:
859                 $marker_ul_re  = '[*+-]';
860                 $marker_ol_re  = '\d+[\.]';
861                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
862                 
863                 $list = $matches[1];
864                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
865                 
866                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
867                 
868                 $list .= "\n";
869                 $result = $this->processListItems($list, $marker_any_re);
870                 
871                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
872                 return "\n". $result ."\n\n";
873         }
874
875         protected $list_level = 0;
876
877         protected function processListItems($list_str, $marker_any_re) {
878         #
879         #       Process the contents of a single ordered or unordered list, splitting it
880         #       into individual list items.
881         #
882                 # The $this->list_level global keeps track of when we're inside a list.
883                 # Each time we enter a list, we increment it; when we leave a list,
884                 # we decrement. If it's zero, we're not in a list anymore.
885                 #
886                 # We do this because when we're not inside a list, we want to treat
887                 # something like this:
888                 #
889                 #               I recommend upgrading to version
890                 #               8. Oops, now this line is treated
891                 #               as a sub-list.
892                 #
893                 # As a single paragraph, despite the fact that the second line starts
894                 # with a digit-period-space sequence.
895                 #
896                 # Whereas when we're inside a list (or sub-list), that line will be
897                 # treated as the start of a sub-list. What a kludge, huh? This is
898                 # an aspect of Markdown's syntax that's hard to parse perfectly
899                 # without resorting to mind-reading. Perhaps the solution is to
900                 # change the syntax rules such that sub-lists must start with a
901                 # starting cardinal number; e.g. "1." or "a.".
902                 
903                 $this->list_level++;
904
905                 # trim trailing blank lines:
906                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
907
908                 $list_str = preg_replace_callback('{
909                         (\n)?                                                   # leading line = $1
910                         (^[ ]*)                                                 # leading whitespace = $2
911                         ('.$marker_any_re.'                             # list marker and space = $3
912                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
913                         )
914                         ((?s:.*?))                                              # list item text   = $4
915                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
916                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
917                         }xm',
918                         array($this, '_processListItems_callback'), $list_str);
919
920                 $this->list_level--;
921                 return $list_str;
922         }
923         protected function _processListItems_callback($matches) {
924                 $item = $matches[4];
925                 $leading_line =& $matches[1];
926                 $leading_space =& $matches[2];
927                 $marker_space = $matches[3];
928                 $tailing_blank_line =& $matches[5];
929
930                 if ($leading_line || $tailing_blank_line || 
931                         preg_match('/\n{2,}/', $item))
932                 {
933                         # Replace marker with the appropriate whitespace indentation
934                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
935                         $item = $this->runBlockGamut($this->outdent($item)."\n");
936                 }
937                 else {
938                         # Recursion for sub-lists:
939                         $item = $this->doLists($this->outdent($item));
940                         $item = preg_replace('/\n+$/', '', $item);
941                         $item = $this->runSpanGamut($item);
942                 }
943
944                 return "<li>" . $item . "</li>\n";
945         }
946
947
948         protected function doCodeBlocks($text) {
949         #
950         #       Process Markdown `<pre><code>` blocks.
951         #
952                 $text = preg_replace_callback('{
953                                 (?:\n\n|\A\n?)
954                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
955                                   (?>
956                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
957                                         .*\n+
958                                   )+
959                                 )
960                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
961                         }xm',
962                         array($this, '_doCodeBlocks_callback'), $text);
963
964                 return $text;
965         }
966         protected function _doCodeBlocks_callback($matches) {
967                 $codeblock = $matches[1];
968
969                 $codeblock = $this->outdent($codeblock);
970                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
971
972                 # trim leading newlines and trailing newlines
973                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
974
975                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
976                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
977         }
978
979
980         protected function makeCodeSpan($code) {
981         #
982         # Create a code span markup for $code. Called from handleSpanToken.
983         #
984                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
985                 return $this->hashPart("<code>$code</code>");
986         }
987
988
989         protected $em_relist = array(
990                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
991                 '*' => '(?<![\s*])\*(?!\*)',
992                 '_' => '(?<![\s_])_(?!_)',
993                 );
994         protected $strong_relist = array(
995                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
996                 '**' => '(?<![\s*])\*\*(?!\*)',
997                 '__' => '(?<![\s_])__(?!_)',
998                 );
999         protected $em_strong_relist = array(
1000                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1001                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1002                 '___' => '(?<![\s_])___(?!_)',
1003                 );
1004         protected $em_strong_prepared_relist;
1005         
1006         protected function prepareItalicsAndBold() {
1007         #
1008         # Prepare regular expressions for searching emphasis tokens in any
1009         # context.
1010         #
1011                 foreach ($this->em_relist as $em => $em_re) {
1012                         foreach ($this->strong_relist as $strong => $strong_re) {
1013                                 # Construct list of allowed token expressions.
1014                                 $token_relist = array();
1015                                 if (isset($this->em_strong_relist["$em$strong"])) {
1016                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1017                                 }
1018                                 $token_relist[] = $em_re;
1019                                 $token_relist[] = $strong_re;
1020                                 
1021                                 # Construct master expression from list.
1022                                 $token_re = '{('. implode('|', $token_relist) .')}';
1023                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1024                         }
1025                 }
1026         }
1027         
1028         protected function doItalicsAndBold($text) {
1029                 $token_stack = array('');
1030                 $text_stack = array('');
1031                 $em = '';
1032                 $strong = '';
1033                 $tree_char_em = false;
1034                 
1035                 while (1) {
1036                         #
1037                         # Get prepared regular expression for seraching emphasis tokens
1038                         # in current context.
1039                         #
1040                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1041                         
1042                         #
1043                         # Each loop iteration search for the next emphasis token. 
1044                         # Each token is then passed to handleSpanToken.
1045                         #
1046                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1047                         $text_stack[0] .= $parts[0];
1048                         $token =& $parts[1];
1049                         $text =& $parts[2];
1050                         
1051                         if (empty($token)) {
1052                                 # Reached end of text span: empty stack without emitting.
1053                                 # any more emphasis.
1054                                 while ($token_stack[0]) {
1055                                         $text_stack[1] .= array_shift($token_stack);
1056                                         $text_stack[0] .= array_shift($text_stack);
1057                                 }
1058                                 break;
1059                         }
1060                         
1061                         $token_len = strlen($token);
1062                         if ($tree_char_em) {
1063                                 # Reached closing marker while inside a three-char emphasis.
1064                                 if ($token_len == 3) {
1065                                         # Three-char closing marker, close em and strong.
1066                                         array_shift($token_stack);
1067                                         $span = array_shift($text_stack);
1068                                         $span = $this->runSpanGamut($span);
1069                                         $span = "<strong><em>$span</em></strong>";
1070                                         $text_stack[0] .= $this->hashPart($span);
1071                                         $em = '';
1072                                         $strong = '';
1073                                 } else {
1074                                         # Other closing marker: close one em or strong and
1075                                         # change current token state to match the other
1076                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1077                                         $tag = $token_len == 2 ? "strong" : "em";
1078                                         $span = $text_stack[0];
1079                                         $span = $this->runSpanGamut($span);
1080                                         $span = "<$tag>$span</$tag>";
1081                                         $text_stack[0] = $this->hashPart($span);
1082                                         $$tag = ''; # $$tag stands for $em or $strong
1083                                 }
1084                                 $tree_char_em = false;
1085                         } else if ($token_len == 3) {
1086                                 if ($em) {
1087                                         # Reached closing marker for both em and strong.
1088                                         # Closing strong marker:
1089                                         for ($i = 0; $i < 2; ++$i) {
1090                                                 $shifted_token = array_shift($token_stack);
1091                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1092                                                 $span = array_shift($text_stack);
1093                                                 $span = $this->runSpanGamut($span);
1094                                                 $span = "<$tag>$span</$tag>";
1095                                                 $text_stack[0] .= $this->hashPart($span);
1096                                                 $$tag = ''; # $$tag stands for $em or $strong
1097                                         }
1098                                 } else {
1099                                         # Reached opening three-char emphasis marker. Push on token 
1100                                         # stack; will be handled by the special condition above.
1101                                         $em = $token{0};
1102                                         $strong = "$em$em";
1103                                         array_unshift($token_stack, $token);
1104                                         array_unshift($text_stack, '');
1105                                         $tree_char_em = true;
1106                                 }
1107                         } else if ($token_len == 2) {
1108                                 if ($strong) {
1109                                         # Unwind any dangling emphasis marker:
1110                                         if (strlen($token_stack[0]) == 1) {
1111                                                 $text_stack[1] .= array_shift($token_stack);
1112                                                 $text_stack[0] .= array_shift($text_stack);
1113                                         }
1114                                         # Closing strong marker:
1115                                         array_shift($token_stack);
1116                                         $span = array_shift($text_stack);
1117                                         $span = $this->runSpanGamut($span);
1118                                         $span = "<strong>$span</strong>";
1119                                         $text_stack[0] .= $this->hashPart($span);
1120                                         $strong = '';
1121                                 } else {
1122                                         array_unshift($token_stack, $token);
1123                                         array_unshift($text_stack, '');
1124                                         $strong = $token;
1125                                 }
1126                         } else {
1127                                 # Here $token_len == 1
1128                                 if ($em) {
1129                                         if (strlen($token_stack[0]) == 1) {
1130                                                 # Closing emphasis marker:
1131                                                 array_shift($token_stack);
1132                                                 $span = array_shift($text_stack);
1133                                                 $span = $this->runSpanGamut($span);
1134                                                 $span = "<em>$span</em>";
1135                                                 $text_stack[0] .= $this->hashPart($span);
1136                                                 $em = '';
1137                                         } else {
1138                                                 $text_stack[0] .= $token;
1139                                         }
1140                                 } else {
1141                                         array_unshift($token_stack, $token);
1142                                         array_unshift($text_stack, '');
1143                                         $em = $token;
1144                                 }
1145                         }
1146                 }
1147                 return $text_stack[0];
1148         }
1149
1150
1151         protected function doBlockQuotes($text) {
1152                 $text = preg_replace_callback('/
1153                           (                                                             # Wrap whole match in $1
1154                                 (?>
1155                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1156                                         .+\n                                    # rest of the first line
1157                                   (.+\n)*                                       # subsequent consecutive lines
1158                                   \n*                                           # blanks
1159                                 )+
1160                           )
1161                         /xm',
1162                         array($this, '_doBlockQuotes_callback'), $text);
1163
1164                 return $text;
1165         }
1166         protected function _doBlockQuotes_callback($matches) {
1167                 $bq = $matches[1];
1168                 # trim one level of quoting - trim whitespace-only lines
1169                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1170                 $bq = $this->runBlockGamut($bq);                # recurse
1171
1172                 $bq = preg_replace('/^/m', "  ", $bq);
1173                 # These leading spaces cause problem with <pre> content, 
1174                 # so we need to fix that:
1175                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1176                         array($this, '_doBlockQuotes_callback2'), $bq);
1177
1178                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1179         }
1180         protected function _doBlockQuotes_callback2($matches) {
1181                 $pre = $matches[1];
1182                 $pre = preg_replace('/^  /m', '', $pre);
1183                 return $pre;
1184         }
1185
1186
1187         protected function formParagraphs($text) {
1188         #
1189         #       Params:
1190         #               $text - string to process with html <p> tags
1191         #
1192                 # Strip leading and trailing lines:
1193                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1194
1195                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1196
1197                 #
1198                 # Wrap <p> tags and unhashify HTML blocks
1199                 #
1200                 foreach ($grafs as $key => $value) {
1201                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1202                                 # Is a paragraph.
1203                                 $value = $this->runSpanGamut($value);
1204                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1205                                 $value .= "</p>";
1206                                 $grafs[$key] = $this->unhash($value);
1207                         }
1208                         else {
1209                                 # Is a block.
1210                                 # Modify elements of @grafs in-place...
1211                                 $graf = $value;
1212                                 $block = $this->html_hashes[$graf];
1213                                 $graf = $block;
1214 //                              if (preg_match('{
1215 //                                      \A
1216 //                                      (                                                       # $1 = <div> tag
1217 //                                        <div  \s+
1218 //                                        [^>]*
1219 //                                        \b
1220 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1221 //                                        1
1222 //                                        \2
1223 //                                        [^>]*
1224 //                                        >
1225 //                                      )
1226 //                                      (                                                       # $3 = contents
1227 //                                      .*
1228 //                                      )
1229 //                                      (</div>)                                        # $4 = closing tag
1230 //                                      \z
1231 //                                      }xs', $block, $matches))
1232 //                              {
1233 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1234 //
1235 //                                      # We can't call Markdown(), because that resets the hash;
1236 //                                      # that initialization code should be pulled into its own sub, though.
1237 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1238 //                                      
1239 //                                      # Run document gamut methods on the content.
1240 //                                      foreach ($this->document_gamut as $method => $priority) {
1241 //                                              $div_content = $this->$method($div_content);
1242 //                                      }
1243 //
1244 //                                      $div_open = preg_replace(
1245 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1246 //
1247 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1248 //                              }
1249                                 $grafs[$key] = $graf;
1250                         }
1251                 }
1252
1253                 return implode("\n\n", $grafs);
1254         }
1255
1256
1257         protected function encodeAttribute($text) {
1258         #
1259         # Encode text for a double-quoted HTML attribute. This function
1260         # is *not* suitable for attributes enclosed in single quotes.
1261         #
1262                 $text = $this->encodeAmpsAndAngles($text);
1263                 $text = str_replace('"', '&quot;', $text);
1264                 return $text;
1265         }
1266
1267
1268         protected function encodeURLAttribute($url, &$text = null) {
1269         #
1270         # Encode text for a double-quoted HTML attribute containing a URL,
1271         # applying the URL filter if set. Also generates the textual
1272         # representation for the URL (removing mailto: or tel:) storing it in $text.
1273         # This function is *not* suitable for attributes enclosed in single quotes.
1274         #
1275                 if ($this->url_filter_func)
1276                         $url = call_user_func($this->url_filter_func, $url);
1277
1278                 if (preg_match('{^mailto:}i', $url))
1279                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1280                 else if (preg_match('{^tel:}i', $url))
1281                 {
1282                         $url = $this->encodeAttribute($url);
1283                         $text = substr($url, 4);
1284                 }
1285                 else
1286                 {
1287                         $url = $this->encodeAttribute($url);
1288                         $text = $url;
1289                 }
1290
1291                 return $url;
1292         }
1293         
1294         
1295         protected function encodeAmpsAndAngles($text) {
1296         #
1297         # Smart processing for ampersands and angle brackets that need to 
1298         # be encoded. Valid character entities are left alone unless the
1299         # no-entities mode is set.
1300         #
1301                 if ($this->no_entities) {
1302                         $text = str_replace('&', '&amp;', $text);
1303                 } else {
1304                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1305                         # MT plugin: <http://bumppo.net/projects/amputator/>
1306                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1307                                                                 '&amp;', $text);
1308                 }
1309                 # Encode remaining <'s
1310                 $text = str_replace('<', '&lt;', $text);
1311
1312                 return $text;
1313         }
1314
1315
1316         protected function doAutoLinks($text) {
1317                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1318                         array($this, '_doAutoLinks_url_callback'), $text);
1319
1320                 # Email addresses: <address@domain.foo>
1321                 $text = preg_replace_callback('{
1322                         <
1323                         (?:mailto:)?
1324                         (
1325                                 (?:
1326                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1327                                 |
1328                                         ".*?"
1329                                 )
1330                                 \@
1331                                 (?:
1332                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1333                                 |
1334                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1335                                 )
1336                         )
1337                         >
1338                         }xi',
1339                         array($this, '_doAutoLinks_email_callback'), $text);
1340
1341                 return $text;
1342         }
1343         protected function _doAutoLinks_url_callback($matches) {
1344                 $url = $this->encodeURLAttribute($matches[1], $text);
1345                 $link = "<a href=\"$url\">$text</a>";
1346                 return $this->hashPart($link);
1347         }
1348         protected function _doAutoLinks_email_callback($matches) {
1349                 $addr = $matches[1];
1350                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1351                 $link = "<a href=\"$url\">$text</a>";
1352                 return $this->hashPart($link);
1353         }
1354
1355
1356         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1357         #
1358         #       Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1359         #
1360         #       Output: the same text but with most characters encoded as either a
1361         #               decimal or hex entity, in the hopes of foiling most address
1362         #               harvesting spam bots. E.g.:
1363         #
1364         #        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1365         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1366         #        &#x6d;
1367         #
1368         #       Note: the additional output $tail is assigned the same value as the
1369         #       ouput, minus the number of characters specified by $head_length.
1370         #
1371         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1372         #   With some optimizations by Milian Wolff. Forced encoding of HTML
1373         #       attribute special characters by Allan Odgaard.
1374         #
1375                 if ($text == "") return $tail = "";
1376
1377                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1378                 $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
1379
1380                 foreach ($chars as $key => $char) {
1381                         $ord = ord($char);
1382                         # Ignore non-ascii chars.
1383                         if ($ord < 128) {
1384                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1385                                 # roughly 10% raw, 45% hex, 45% dec
1386                                 # '@' *must* be encoded. I insist.
1387                                 # '"' and '>' have to be encoded inside the attribute
1388                                 if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
1389                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1390                                 else              $chars[$key] = '&#'.$ord.';';
1391                         }
1392                 }
1393
1394                 $text = implode('', $chars);
1395                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1396
1397                 return $text;
1398         }
1399
1400
1401         protected function parseSpan($str) {
1402         #
1403         # Take the string $str and parse it into tokens, hashing embeded HTML,
1404         # escaped characters and handling code spans.
1405         #
1406                 $output = '';
1407                 
1408                 $span_re = '{
1409                                 (
1410                                         \\\\'.$this->escape_chars_re.'
1411                                 |
1412                                         (?<![`\\\\])
1413                                         `+                                              # code span marker
1414                         '.( $this->no_markup ? '' : '
1415                                 |
1416                                         <!--    .*?     -->             # comment
1417                                 |
1418                                         <\?.*?\?> | <%.*?%>             # processing instruction
1419                                 |
1420                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1421                                         (?>
1422                                                 \s
1423                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1424                                         )?
1425                                         >
1426                                 |
1427                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1428                                 |
1429                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1430                         ').'
1431                                 )
1432                                 }xs';
1433
1434                 while (1) {
1435                         #
1436                         # Each loop iteration seach for either the next tag, the next 
1437                         # openning code span marker, or the next escaped character. 
1438                         # Each token is then passed to handleSpanToken.
1439                         #
1440                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1441                         
1442                         # Create token from text preceding tag.
1443                         if ($parts[0] != "") {
1444                                 $output .= $parts[0];
1445                         }
1446                         
1447                         # Check if we reach the end.
1448                         if (isset($parts[1])) {
1449                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1450                                 $str = $parts[2];
1451                         }
1452                         else {
1453                                 break;
1454                         }
1455                 }
1456                 
1457                 return $output;
1458         }
1459         
1460         
1461         protected function handleSpanToken($token, &$str) {
1462         #
1463         # Handle $token provided by parseSpan by determining its nature and 
1464         # returning the corresponding value that should replace it.
1465         #
1466                 switch ($token{0}) {
1467                         case "\\":
1468                                 return $this->hashPart("&#". ord($token{1}). ";");
1469                         case "`":
1470                                 # Search for end marker in remaining text.
1471                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1472                                         $str, $matches))
1473                                 {
1474                                         $str = $matches[2];
1475                                         $codespan = $this->makeCodeSpan($matches[1]);
1476                                         return $this->hashPart($codespan);
1477                                 }
1478                                 return $token; // return as text since no ending marker found.
1479                         default:
1480                                 return $this->hashPart($token);
1481                 }
1482         }
1483
1484
1485         protected function outdent($text) {
1486         #
1487         # Remove one level of line-leading tabs or spaces
1488         #
1489                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1490         }
1491
1492
1493         # String length function for detab. `_initDetab` will create a function to 
1494         # hanlde UTF-8 if the default function does not exist.
1495         protected $utf8_strlen = 'mb_strlen';
1496         
1497         protected function detab($text) {
1498         #
1499         # Replace tabs with the appropriate amount of space.
1500         #
1501                 # For each line we separate the line in blocks delemited by
1502                 # tab characters. Then we reconstruct every line by adding the 
1503                 # appropriate number of space between each blocks.
1504                 
1505                 $text = preg_replace_callback('/^.*\t.*$/m',
1506                         array($this, '_detab_callback'), $text);
1507
1508                 return $text;
1509         }
1510         protected function _detab_callback($matches) {
1511                 $line = $matches[0];
1512                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1513                 
1514                 # Split in blocks.
1515                 $blocks = explode("\t", $line);
1516                 # Add each blocks to the line.
1517                 $line = $blocks[0];
1518                 unset($blocks[0]); # Do not add first block twice.
1519                 foreach ($blocks as $block) {
1520                         # Calculate amount of space, insert spaces, insert block.
1521                         $amount = $this->tab_width - 
1522                                 $strlen($line, 'UTF-8') % $this->tab_width;
1523                         $line .= str_repeat(" ", $amount) . $block;
1524                 }
1525                 return $line;
1526         }
1527         protected function _initDetab() {
1528         #
1529         # Check for the availability of the function in the `utf8_strlen` property
1530         # (initially `mb_strlen`). If the function is not available, create a 
1531         # function that will loosely count the number of UTF-8 characters with a
1532         # regular expression.
1533         #
1534                 if (function_exists($this->utf8_strlen)) return;
1535                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1536                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1537                         $text, $m);');
1538         }
1539
1540
1541         protected function unhash($text) {
1542         #
1543         # Swap back in all the tags hashed by _HashHTMLBlocks.
1544         #
1545                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1546                         array($this, '_unhash_callback'), $text);
1547         }
1548         protected function _unhash_callback($matches) {
1549                 return $this->html_hashes[$matches[0]];
1550         }
1551
1552 }
1553
1554
1555 #
1556 # Temporary Markdown Extra Parser Implementation Class
1557 #
1558 # NOTE: DON'T USE THIS CLASS
1559 # Currently the implementation of of Extra resides here in this temporary class.
1560 # This makes it easier to propagate the changes between the three different
1561 # packaging styles of PHP Markdown. When this issue is resolved, this
1562 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1563 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1564 # one.
1565 #
1566
1567 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1568
1569         ### Configuration Variables ###
1570
1571         # Prefix for footnote ids.
1572         public $fn_id_prefix = "";
1573         
1574         # Optional title attribute for footnote links and backlinks.
1575         public $fn_link_title = "";
1576         public $fn_backlink_title = "";
1577         
1578         # Optional class attribute for footnote links and backlinks.
1579         public $fn_link_class = "footnote-ref";
1580         public $fn_backlink_class = "footnote-backref";
1581
1582         # Class name for table cell alignment (%% replaced left/center/right)
1583         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1584         # If empty, the align attribute is used instead of a class name.
1585         public $table_align_class_tmpl = '';
1586
1587         # Optional class prefix for fenced code block.
1588         public $code_class_prefix = "";
1589         # Class attribute for code blocks goes on the `code` tag;
1590         # setting this to true will put attributes on the `pre` tag instead.
1591         public $code_attr_on_pre = false;
1592         
1593         # Predefined abbreviations.
1594         public $predef_abbr = array();
1595
1596
1597         ### Parser Implementation ###
1598
1599         public function __construct() {
1600         #
1601         # Constructor function. Initialize the parser object.
1602         #
1603                 # Add extra escapable characters before parent constructor 
1604                 # initialize the table.
1605                 $this->escape_chars .= ':|';
1606                 
1607                 # Insert extra document, block, and span transformations. 
1608                 # Parent constructor will do the sorting.
1609                 $this->document_gamut += array(
1610                         "doFencedCodeBlocks" => 5,
1611                         "stripFootnotes"     => 15,
1612                         "stripAbbreviations" => 25,
1613                         "appendFootnotes"    => 50,
1614                         );
1615                 $this->block_gamut += array(
1616                         "doFencedCodeBlocks" => 5,
1617                         "doTables"           => 15,
1618                         "doDefLists"         => 45,
1619                         );
1620                 $this->span_gamut += array(
1621                         "doFootnotes"        => 5,
1622                         "doAbbreviations"    => 70,
1623                         );
1624                 
1625                 parent::__construct();
1626         }
1627         
1628         
1629         # Extra variables used during extra transformations.
1630         protected $footnotes = array();
1631         protected $footnotes_ordered = array();
1632         protected $footnotes_ref_count = array();
1633         protected $footnotes_numbers = array();
1634         protected $abbr_desciptions = array();
1635         protected $abbr_word_re = '';
1636         
1637         # Give the current footnote number.
1638         protected $footnote_counter = 1;
1639         
1640         
1641         protected function setup() {
1642         #
1643         # Setting up Extra-specific variables.
1644         #
1645                 parent::setup();
1646                 
1647                 $this->footnotes = array();
1648                 $this->footnotes_ordered = array();
1649                 $this->footnotes_ref_count = array();
1650                 $this->footnotes_numbers = array();
1651                 $this->abbr_desciptions = array();
1652                 $this->abbr_word_re = '';
1653                 $this->footnote_counter = 1;
1654                 
1655                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1656                         if ($this->abbr_word_re)
1657                                 $this->abbr_word_re .= '|';
1658                         $this->abbr_word_re .= preg_quote($abbr_word);
1659                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1660                 }
1661         }
1662         
1663         protected function teardown() {
1664         #
1665         # Clearing Extra-specific variables.
1666         #
1667                 $this->footnotes = array();
1668                 $this->footnotes_ordered = array();
1669                 $this->footnotes_ref_count = array();
1670                 $this->footnotes_numbers = array();
1671                 $this->abbr_desciptions = array();
1672                 $this->abbr_word_re = '';
1673                 
1674                 parent::teardown();
1675         }
1676         
1677         
1678         ### Extra Attribute Parser ###
1679
1680         # Expression to use to catch attributes (includes the braces)
1681         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
1682         # Expression to use when parsing in a context when no capture is desired
1683         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
1684
1685         protected function doExtraAttributes($tag_name, $attr) {
1686         #
1687         # Parse attributes caught by the $this->id_class_attr_catch_re expression
1688         # and return the HTML-formatted list of attributes.
1689         #
1690         # Currently supported attributes are .class and #id.
1691         #
1692                 if (empty($attr)) return "";
1693                 
1694                 # Split on components
1695                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
1696                 $elements = $matches[0];
1697
1698                 # handle classes and ids (only first id taken into account)
1699                 $classes = array();
1700                 $attributes = array();
1701                 $id = false;
1702                 foreach ($elements as $element) {
1703                         if ($element{0} == '.') {
1704                                 $classes[] = substr($element, 1);
1705                         } else if ($element{0} == '#') {
1706                                 if ($id === false) $id = substr($element, 1);
1707                         } else if (strpos($element, '=') > 0) {
1708                                 $parts = explode('=', $element, 2);
1709                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
1710                         }
1711                 }
1712
1713                 # compose attributes as string
1714                 $attr_str = "";
1715                 if (!empty($id)) {
1716                         $attr_str .= ' id="'.$id.'"';
1717                 }
1718                 if (!empty($classes)) {
1719                         $attr_str .= ' class="'.implode(" ", $classes).'"';
1720                 }
1721                 if (!$this->no_markup && !empty($attributes)) {
1722                         $attr_str .= ' '.implode(" ", $attributes);
1723                 }
1724                 return $attr_str;
1725         }
1726
1727
1728         protected function stripLinkDefinitions($text) {
1729         #
1730         # Strips link definitions from text, stores the URLs and titles in
1731         # hash references.
1732         #
1733                 $less_than_tab = $this->tab_width - 1;
1734
1735                 # Link defs are in the form: ^[id]: url "optional title"
1736                 $text = preg_replace_callback('{
1737                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
1738                                                           [ ]*
1739                                                           \n?                           # maybe *one* newline
1740                                                           [ ]*
1741                                                         (?:
1742                                                           <(.+?)>                       # url = $2
1743                                                         |
1744                                                           (\S+?)                        # url = $3
1745                                                         )
1746                                                           [ ]*
1747                                                           \n?                           # maybe one newline
1748                                                           [ ]*
1749                                                         (?:
1750                                                                 (?<=\s)                 # lookbehind for whitespace
1751                                                                 ["(]
1752                                                                 (.*?)                   # title = $4
1753                                                                 [")]
1754                                                                 [ ]*
1755                                                         )?      # title is optional
1756                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1757                                                         (?:\n+|\Z)
1758                         }xm',
1759                         array($this, '_stripLinkDefinitions_callback'),
1760                         $text);
1761                 return $text;
1762         }
1763         protected function _stripLinkDefinitions_callback($matches) {
1764                 $link_id = strtolower($matches[1]);
1765                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
1766                 $this->urls[$link_id] = $url;
1767                 $this->titles[$link_id] =& $matches[4];
1768                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1769                 return ''; # String that will replace the block
1770         }
1771
1772
1773         ### HTML Block Parser ###
1774         
1775         # Tags that are always treated as block tags:
1776         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
1777                                                    
1778         # Tags treated as block tags only if the opening tag is alone on its line:
1779         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1780         
1781         # Tags where markdown="1" default to span mode:
1782         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1783         
1784         # Tags which must not have their contents modified, no matter where 
1785         # they appear:
1786         protected $clean_tags_re = 'script|style|math|svg';
1787         
1788         # Tags that do not need to be closed.
1789         protected $auto_close_tags_re = 'hr|img|param|source|track';
1790         
1791
1792         protected function hashHTMLBlocks($text) {
1793         #
1794         # Hashify HTML Blocks and "clean tags".
1795         #
1796         # We only want to do this for block-level HTML tags, such as headers,
1797         # lists, and tables. That's because we still want to wrap <p>s around
1798         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1799         # phrase emphasis, and spans. The list of tags we're looking for is
1800         # hard-coded.
1801         #
1802         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1803         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1804         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1805         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1806         # These two functions are calling each other. It's recursive!
1807         #
1808                 if ($this->no_markup)  return $text;
1809
1810                 #
1811                 # Call the HTML-in-Markdown hasher.
1812                 #
1813                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1814                 
1815                 return $text;
1816         }
1817         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1818                                                                                 $enclosing_tag_re = '', $span = false)
1819         {
1820         #
1821         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1822         #
1823         # *   $indent is the number of space to be ignored when checking for code 
1824         #     blocks. This is important because if we don't take the indent into 
1825         #     account, something like this (which looks right) won't work as expected:
1826         #
1827         #     <div>
1828         #         <div markdown="1">
1829         #         Hello World.  <-- Is this a Markdown code block or text?
1830         #         </div>  <-- Is this a Markdown code block or a real tag?
1831         #     <div>
1832         #
1833         #     If you don't like this, just don't indent the tag on which
1834         #     you apply the markdown="1" attribute.
1835         #
1836         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
1837         #     tag with that name. Nested tags supported.
1838         #
1839         # *   If $span is true, text inside must treated as span. So any double 
1840         #     newline will be replaced by a single newline so that it does not create 
1841         #     paragraphs.
1842         #
1843         # Returns an array of that form: ( processed text , remaining text )
1844         #
1845                 if ($text === '') return array('', '');
1846
1847                 # Regex to check for the presense of newlines around a block tag.
1848                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1849                 $newline_after_re = 
1850                         '{
1851                                 ^                                               # Start of text following the tag.
1852                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1853                                 [ ]*\n                                  # Must be followed by newline.
1854                         }xs';
1855                 
1856                 # Regex to match any tag.
1857                 $block_tag_re =
1858                         '{
1859                                 (                                       # $2: Capture whole tag.
1860                                         </?                                     # Any opening or closing tag.
1861                                                 (?>                             # Tag name.
1862                                                         '.$this->block_tags_re.'                        |
1863                                                         '.$this->context_block_tags_re.'        |
1864                                                         '.$this->clean_tags_re.'                |
1865                                                         (?!\s)'.$enclosing_tag_re.'
1866                                                 )
1867                                                 (?:
1868                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1869                                                         (?>
1870                                                                 ".*?"           |       # Double quotes (can contain `>`)
1871                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1872                                                                 .+?                             # Anything but quotes and `>`.
1873                                                         )*?
1874                                                 )?
1875                                         >                                       # End of tag.
1876                                 |
1877                                         <!--    .*?     -->     # HTML Comment
1878                                 |
1879                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1880                                 |
1881                                         <!\[CDATA\[.*?\]\]>     # CData Block
1882                                 '. ( !$span ? ' # If not in span.
1883                                 |
1884                                         # Indented code block
1885                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1886                                         [ ]{'.($indent+4).'}[^\n]* \n
1887                                         (?>
1888                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1889                                         )*
1890                                 |
1891                                         # Fenced code block marker
1892                                         (?<= ^ | \n )
1893                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1894                                                                         [ ]*
1895                                         (?:
1896                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
1897                                         |
1898                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
1899                                         )?
1900                                         [ ]*
1901                                         (?= \n )
1902                                 ' : '' ). ' # End (if not is span).
1903                                 |
1904                                         # Code span marker
1905                                         # Note, this regex needs to go after backtick fenced
1906                                         # code blocks but it should also be kept outside of the
1907                                         # "if not in span" condition adding backticks to the parser
1908                                         `+
1909                                 )
1910                         }xs';
1911
1912                 
1913                 $depth = 0;             # Current depth inside the tag tree.
1914                 $parsed = "";   # Parsed text that will be returned.
1915
1916                 #
1917                 # Loop through every tag until we find the closing tag of the parent
1918                 # or loop until reaching the end of text if no parent tag specified.
1919                 #
1920                 do {
1921                         #
1922                         # Split the text using the first $tag_match pattern found.
1923                         # Text before  pattern will be first in the array, text after
1924                         # pattern will be at the end, and between will be any catches made 
1925                         # by the pattern.
1926                         #
1927                         $parts = preg_split($block_tag_re, $text, 2, 
1928                                                                 PREG_SPLIT_DELIM_CAPTURE);
1929                         
1930                         # If in Markdown span mode, add a empty-string span-level hash 
1931                         # after each newline to prevent triggering any block element.
1932                         if ($span) {
1933                                 $void = $this->hashPart("", ':');
1934                                 $newline = "$void\n";
1935                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1936                         }
1937                         
1938                         $parsed .= $parts[0]; # Text before current tag.
1939                         
1940                         # If end of $text has been reached. Stop loop.
1941                         if (count($parts) < 3) {
1942                                 $text = "";
1943                                 break;
1944                         }
1945                         
1946                         $tag  = $parts[1]; # Tag to handle.
1947                         $text = $parts[2]; # Remaining text after current tag.
1948                         $tag_re = preg_quote($tag); # For use in a regular expression.
1949                         
1950                         #
1951                         # Check for: Fenced code block marker.
1952                         # Note: need to recheck the whole tag to disambiguate backtick
1953                         # fences from code spans
1954                         #
1955                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1956                                 # Fenced code block marker: find matching end marker.
1957                                 $fence_indent = strlen($capture[1]); # use captured indent in re
1958                                 $fence_re = $capture[2]; # use captured fence in re
1959                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1960                                         $matches)) 
1961                                 {
1962                                         # End marker found: pass text unchanged until marker.
1963                                         $parsed .= $tag . $matches[0];
1964                                         $text = substr($text, strlen($matches[0]));
1965                                 }
1966                                 else {
1967                                         # No end marker: just skip it.
1968                                         $parsed .= $tag;
1969                                 }
1970                         }
1971                         #
1972                         # Check for: Indented code block.
1973                         #
1974                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1975                                 # Indented code block: pass it unchanged, will be handled 
1976                                 # later.
1977                                 $parsed .= $tag;
1978                         }
1979                         #
1980                         # Check for: Code span marker
1981                         # Note: need to check this after backtick fenced code blocks
1982                         #
1983                         else if ($tag{0} == "`") {
1984                                 # Find corresponding end marker.
1985                                 $tag_re = preg_quote($tag);
1986                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1987                                         $text, $matches))
1988                                 {
1989                                         # End marker found: pass text unchanged until marker.
1990                                         $parsed .= $tag . $matches[0];
1991                                         $text = substr($text, strlen($matches[0]));
1992                                 }
1993                                 else {
1994                                         # Unmatched marker: just skip it.
1995                                         $parsed .= $tag;
1996                                 }
1997                         }
1998                         #
1999                         # Check for: Opening Block level tag or
2000                         #            Opening Context Block tag (like ins and del) 
2001                         #               used as a block tag (tag is alone on it's line).
2002                         #
2003                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
2004                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
2005                                         preg_match($newline_before_re, $parsed) &&
2006                                         preg_match($newline_after_re, $text)    )
2007                                 )
2008                         {
2009                                 # Need to parse tag and following text using the HTML parser.
2010                                 list($block_text, $text) = 
2011                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
2012                                 
2013                                 # Make sure it stays outside of any paragraph by adding newlines.
2014                                 $parsed .= "\n\n$block_text\n\n";
2015                         }
2016                         #
2017                         # Check for: Clean tag (like script, math)
2018                         #            HTML Comments, processing instructions.
2019                         #
2020                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
2021                                 $tag{1} == '!' || $tag{1} == '?')
2022                         {
2023                                 # Need to parse tag and following text using the HTML parser.
2024                                 # (don't check for markdown attribute)
2025                                 list($block_text, $text) = 
2026                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2027                                 
2028                                 $parsed .= $block_text;
2029                         }
2030                         #
2031                         # Check for: Tag with same name as enclosing tag.
2032                         #
2033                         else if ($enclosing_tag_re !== '' &&
2034                                 # Same name as enclosing tag.
2035                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2036                         {
2037                                 #
2038                                 # Increase/decrease nested tag count.
2039                                 #
2040                                 if ($tag{1} == '/')                                             $depth--;
2041                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2042
2043                                 if ($depth < 0) {
2044                                         #
2045                                         # Going out of parent element. Clean up and break so we
2046                                         # return to the calling function.
2047                                         #
2048                                         $text = $tag . $text;
2049                                         break;
2050                                 }
2051                                 
2052                                 $parsed .= $tag;
2053                         }
2054                         else {
2055                                 $parsed .= $tag;
2056                         }
2057                 } while ($depth >= 0);
2058                 
2059                 return array($parsed, $text);
2060         }
2061         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2062         #
2063         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2064         #
2065         # *   Calls $hash_method to convert any blocks.
2066         # *   Stops when the first opening tag closes.
2067         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2068         #     (it is not inside clean tags)
2069         #
2070         # Returns an array of that form: ( processed text , remaining text )
2071         #
2072                 if ($text === '') return array('', '');
2073                 
2074                 # Regex to match `markdown` attribute inside of a tag.
2075                 $markdown_attr_re = '
2076                         {
2077                                 \s*                     # Eat whitespace before the `markdown` attribute
2078                                 markdown
2079                                 \s*=\s*
2080                                 (?>
2081                                         (["\'])         # $1: quote delimiter           
2082                                         (.*?)           # $2: attribute value
2083                                         \1                      # matching delimiter    
2084                                 |
2085                                         ([^\s>]*)       # $3: unquoted attribute value
2086                                 )
2087                                 ()                              # $4: make $3 always defined (avoid warnings)
2088                         }xs';
2089                 
2090                 # Regex to match any tag.
2091                 $tag_re = '{
2092                                 (                                       # $2: Capture whole tag.
2093                                         </?                                     # Any opening or closing tag.
2094                                                 [\w:$]+                 # Tag name.
2095                                                 (?:
2096                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2097                                                         (?>
2098                                                                 ".*?"           |       # Double quotes (can contain `>`)
2099                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2100                                                                 .+?                             # Anything but quotes and `>`.
2101                                                         )*?
2102                                                 )?
2103                                         >                                       # End of tag.
2104                                 |
2105                                         <!--    .*?     -->     # HTML Comment
2106                                 |
2107                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2108                                 |
2109                                         <!\[CDATA\[.*?\]\]>     # CData Block
2110                                 )
2111                         }xs';
2112                 
2113                 $original_text = $text;         # Save original text in case of faliure.
2114                 
2115                 $depth          = 0;    # Current depth inside the tag tree.
2116                 $block_text     = "";   # Temporary text holder for current text.
2117                 $parsed         = "";   # Parsed text that will be returned.
2118
2119                 #
2120                 # Get the name of the starting tag.
2121                 # (This pattern makes $base_tag_name_re safe without quoting.)
2122                 #
2123                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2124                         $base_tag_name_re = $matches[1];
2125
2126                 #
2127                 # Loop through every tag until we find the corresponding closing tag.
2128                 #
2129                 do {
2130                         #
2131                         # Split the text using the first $tag_match pattern found.
2132                         # Text before  pattern will be first in the array, text after
2133                         # pattern will be at the end, and between will be any catches made 
2134                         # by the pattern.
2135                         #
2136                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2137                         
2138                         if (count($parts) < 3) {
2139                                 #
2140                                 # End of $text reached with unbalenced tag(s).
2141                                 # In that case, we return original text unchanged and pass the
2142                                 # first character as filtered to prevent an infinite loop in the 
2143                                 # parent function.
2144                                 #
2145                                 return array($original_text{0}, substr($original_text, 1));
2146                         }
2147                         
2148                         $block_text .= $parts[0]; # Text before current tag.
2149                         $tag         = $parts[1]; # Tag to handle.
2150                         $text        = $parts[2]; # Remaining text after current tag.
2151                         
2152                         #
2153                         # Check for: Auto-close tag (like <hr/>)
2154                         #                        Comments and Processing Instructions.
2155                         #
2156                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2157                                 $tag{1} == '!' || $tag{1} == '?')
2158                         {
2159                                 # Just add the tag to the block as if it was text.
2160                                 $block_text .= $tag;
2161                         }
2162                         else {
2163                                 #
2164                                 # Increase/decrease nested tag count. Only do so if
2165                                 # the tag's name match base tag's.
2166                                 #
2167                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2168                                         if ($tag{1} == '/')                                             $depth--;
2169                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2170                                 }
2171                                 
2172                                 #
2173                                 # Check for `markdown="1"` attribute and handle it.
2174                                 #
2175                                 if ($md_attr && 
2176                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2177                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2178                                 {
2179                                         # Remove `markdown` attribute from opening tag.
2180                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2181                                         
2182                                         # Check if text inside this tag must be parsed in span mode.
2183                                         $this->mode = $attr_m[2] . $attr_m[3];
2184                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2185                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2186                                         
2187                                         # Calculate indent before tag.
2188                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2189                                                 $strlen = $this->utf8_strlen;
2190                                                 $indent = $strlen($matches[1], 'UTF-8');
2191                                         } else {
2192                                                 $indent = 0;
2193                                         }
2194                                         
2195                                         # End preceding block with this tag.
2196                                         $block_text .= $tag;
2197                                         $parsed .= $this->$hash_method($block_text);
2198                                         
2199                                         # Get enclosing tag name for the ParseMarkdown function.
2200                                         # (This pattern makes $tag_name_re safe without quoting.)
2201                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2202                                         $tag_name_re = $matches[1];
2203                                         
2204                                         # Parse the content using the HTML-in-Markdown parser.
2205                                         list ($block_text, $text)
2206                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
2207                                                         $tag_name_re, $span_mode);
2208                                         
2209                                         # Outdent markdown text.
2210                                         if ($indent > 0) {
2211                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
2212                                                                                                         $block_text);
2213                                         }
2214                                         
2215                                         # Append tag content to parsed text.
2216                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2217                                         else                            $parsed .= "$block_text";
2218                                         
2219                                         # Start over with a new block.
2220                                         $block_text = "";
2221                                 }
2222                                 else $block_text .= $tag;
2223                         }
2224                         
2225                 } while ($depth > 0);
2226                 
2227                 #
2228                 # Hash last block text that wasn't processed inside the loop.
2229                 #
2230                 $parsed .= $this->$hash_method($block_text);
2231                 
2232                 return array($parsed, $text);
2233         }
2234
2235
2236         protected function hashClean($text) {
2237         #
2238         # Called whenever a tag must be hashed when a function inserts a "clean" tag
2239         # in $text, it passes through this function and is automaticaly escaped, 
2240         # blocking invalid nested overlap.
2241         #
2242                 return $this->hashPart($text, 'C');
2243         }
2244
2245
2246         protected function doAnchors($text) {
2247         #
2248         # Turn Markdown link shortcuts into XHTML <a> tags.
2249         #
2250                 if ($this->in_anchor) return $text;
2251                 $this->in_anchor = true;
2252                 
2253                 #
2254                 # First, handle reference-style links: [link text] [id]
2255                 #
2256                 $text = preg_replace_callback('{
2257                         (                                       # wrap whole match in $1
2258                           \[
2259                                 ('.$this->nested_brackets_re.') # link text = $2
2260                           \]
2261
2262                           [ ]?                          # one optional space
2263                           (?:\n[ ]*)?           # one optional newline followed by spaces
2264
2265                           \[
2266                                 (.*?)           # id = $3
2267                           \]
2268                         )
2269                         }xs',
2270                         array($this, '_doAnchors_reference_callback'), $text);
2271
2272                 #
2273                 # Next, inline-style links: [link text](url "optional title")
2274                 #
2275                 $text = preg_replace_callback('{
2276                         (                               # wrap whole match in $1
2277                           \[
2278                                 ('.$this->nested_brackets_re.') # link text = $2
2279                           \]
2280                           \(                    # literal paren
2281                                 [ \n]*
2282                                 (?:
2283                                         <(.+?)> # href = $3
2284                                 |
2285                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
2286                                 )
2287                                 [ \n]*
2288                                 (                       # $5
2289                                   ([\'"])       # quote char = $6
2290                                   (.*?)         # Title = $7
2291                                   \6            # matching quote
2292                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
2293                                 )?                      # title is optional
2294                           \)
2295                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2296                         )
2297                         }xs',
2298                         array($this, '_doAnchors_inline_callback'), $text);
2299
2300                 #
2301                 # Last, handle reference-style shortcuts: [link text]
2302                 # These must come last in case you've also got [link text][1]
2303                 # or [link text](/foo)
2304                 #
2305                 $text = preg_replace_callback('{
2306                         (                                       # wrap whole match in $1
2307                           \[
2308                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
2309                           \]
2310                         )
2311                         }xs',
2312                         array($this, '_doAnchors_reference_callback'), $text);
2313
2314                 $this->in_anchor = false;
2315                 return $text;
2316         }
2317         protected function _doAnchors_reference_callback($matches) {
2318                 $whole_match =  $matches[1];
2319                 $link_text   =  $matches[2];
2320                 $link_id     =& $matches[3];
2321
2322                 if ($link_id == "") {
2323                         # for shortcut links like [this][] or [this].
2324                         $link_id = $link_text;
2325                 }
2326                 
2327                 # lower-case and turn embedded newlines into spaces
2328                 $link_id = strtolower($link_id);
2329                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2330
2331                 if (isset($this->urls[$link_id])) {
2332                         $url = $this->urls[$link_id];
2333                         $url = $this->encodeURLAttribute($url);
2334                         
2335                         $result = "<a href=\"$url\"";
2336                         if ( isset( $this->titles[$link_id] ) ) {
2337                                 $title = $this->titles[$link_id];
2338                                 $title = $this->encodeAttribute($title);
2339                                 $result .=  " title=\"$title\"";
2340                         }
2341                         if (isset($this->ref_attr[$link_id]))
2342                                 $result .= $this->ref_attr[$link_id];
2343                 
2344                         $link_text = $this->runSpanGamut($link_text);
2345                         $result .= ">$link_text</a>";
2346                         $result = $this->hashPart($result);
2347                 }
2348                 else {
2349                         $result = $whole_match;
2350                 }
2351                 return $result;
2352         }
2353         protected function _doAnchors_inline_callback($matches) {
2354                 $whole_match    =  $matches[1];
2355                 $link_text              =  $this->runSpanGamut($matches[2]);
2356                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
2357                 $title                  =& $matches[7];
2358                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2359
2360                 // if the URL was of the form <s p a c e s> it got caught by the HTML
2361                 // tag parser and hashed. Need to reverse the process before using the URL.
2362                 $unhashed = $this->unhash($url);
2363                 if ($unhashed != $url)
2364                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
2365
2366                 $url = $this->encodeURLAttribute($url);
2367
2368                 $result = "<a href=\"$url\"";
2369                 if (isset($title)) {
2370                         $title = $this->encodeAttribute($title);
2371                         $result .=  " title=\"$title\"";
2372                 }
2373                 $result .= $attr;
2374                 
2375                 $link_text = $this->runSpanGamut($link_text);
2376                 $result .= ">$link_text</a>";
2377
2378                 return $this->hashPart($result);
2379         }
2380
2381
2382         protected function doImages($text) {
2383         #
2384         # Turn Markdown image shortcuts into <img> tags.
2385         #
2386                 #
2387                 # First, handle reference-style labeled images: ![alt text][id]
2388                 #
2389                 $text = preg_replace_callback('{
2390                         (                               # wrap whole match in $1
2391                           !\[
2392                                 ('.$this->nested_brackets_re.')         # alt text = $2
2393                           \]
2394
2395                           [ ]?                          # one optional space
2396                           (?:\n[ ]*)?           # one optional newline followed by spaces
2397
2398                           \[
2399                                 (.*?)           # id = $3
2400                           \]
2401
2402                         )
2403                         }xs', 
2404                         array($this, '_doImages_reference_callback'), $text);
2405
2406                 #
2407                 # Next, handle inline images:  ![alt text](url "optional title")
2408                 # Don't forget: encode * and _
2409                 #
2410                 $text = preg_replace_callback('{
2411                         (                               # wrap whole match in $1
2412                           !\[
2413                                 ('.$this->nested_brackets_re.')         # alt text = $2
2414                           \]
2415                           \s?                   # One optional whitespace character
2416                           \(                    # literal paren
2417                                 [ \n]*
2418                                 (?:
2419                                         <(\S*)> # src url = $3
2420                                 |
2421                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
2422                                 )
2423                                 [ \n]*
2424                                 (                       # $5
2425                                   ([\'"])       # quote char = $6
2426                                   (.*?)         # title = $7
2427                                   \6            # matching quote
2428                                   [ \n]*
2429                                 )?                      # title is optional
2430                           \)
2431                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2432                         )
2433                         }xs',
2434                         array($this, '_doImages_inline_callback'), $text);
2435
2436                 return $text;
2437         }
2438         protected function _doImages_reference_callback($matches) {
2439                 $whole_match = $matches[1];
2440                 $alt_text    = $matches[2];
2441                 $link_id     = strtolower($matches[3]);
2442
2443                 if ($link_id == "") {
2444                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2445                 }
2446
2447                 $alt_text = $this->encodeAttribute($alt_text);
2448                 if (isset($this->urls[$link_id])) {
2449                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
2450                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
2451                         if (isset($this->titles[$link_id])) {
2452                                 $title = $this->titles[$link_id];
2453                                 $title = $this->encodeAttribute($title);
2454                                 $result .=  " title=\"$title\"";
2455                         }
2456                         if (isset($this->ref_attr[$link_id]))
2457                                 $result .= $this->ref_attr[$link_id];
2458                         $result .= $this->empty_element_suffix;
2459                         $result = $this->hashPart($result);
2460                 }
2461                 else {
2462                         # If there's no such link ID, leave intact:
2463                         $result = $whole_match;
2464                 }
2465
2466                 return $result;
2467         }
2468         protected function _doImages_inline_callback($matches) {
2469                 $whole_match    = $matches[1];
2470                 $alt_text               = $matches[2];
2471                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
2472                 $title                  =& $matches[7];
2473                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2474
2475                 $alt_text = $this->encodeAttribute($alt_text);
2476                 $url = $this->encodeURLAttribute($url);
2477                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2478                 if (isset($title)) {
2479                         $title = $this->encodeAttribute($title);
2480                         $result .=  " title=\"$title\""; # $title already quoted
2481                 }
2482                 $result .= $attr;
2483                 $result .= $this->empty_element_suffix;
2484
2485                 return $this->hashPart($result);
2486         }
2487
2488
2489         protected function doHeaders($text) {
2490         #
2491         # Redefined to add id and class attribute support.
2492         #
2493                 # Setext-style headers:
2494                 #         Header 1  {#header1}
2495                 #         ========
2496                 #  
2497                 #         Header 2  {#header2 .class1 .class2}
2498                 #         --------
2499                 #
2500                 $text = preg_replace_callback(
2501                         '{
2502                                 (^.+?)                                                          # $1: Header text
2503                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2504                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2505                         }mx',
2506                         array($this, '_doHeaders_callback_setext'), $text);
2507
2508                 # atx-style headers:
2509                 #       # Header 1        {#header1}
2510                 #       ## Header 2       {#header2}
2511                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
2512                 #       ...
2513                 #       ###### Header 6   {.class2}
2514                 #
2515                 $text = preg_replace_callback('{
2516                                 ^(\#{1,6})      # $1 = string of #\'s
2517                                 [ ]*
2518                                 (.+?)           # $2 = Header text
2519                                 [ ]*
2520                                 \#*                     # optional closing #\'s (not counted)
2521                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2522                                 [ ]*
2523                                 \n+
2524                         }xm',
2525                         array($this, '_doHeaders_callback_atx'), $text);
2526
2527                 return $text;
2528         }
2529         protected function _doHeaders_callback_setext($matches) {
2530                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2531                         return $matches[0];
2532                 $level = $matches[3]{0} == '=' ? 1 : 2;
2533                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2534                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2535                 return "\n" . $this->hashBlock($block) . "\n\n";
2536         }
2537         protected function _doHeaders_callback_atx($matches) {
2538                 $level = strlen($matches[1]);
2539                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2540                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2541                 return "\n" . $this->hashBlock($block) . "\n\n";
2542         }
2543
2544
2545         protected function doTables($text) {
2546         #
2547         # Form HTML tables.
2548         #
2549                 $less_than_tab = $this->tab_width - 1;
2550                 #
2551                 # Find tables with leading pipe.
2552                 #
2553                 #       | Header 1 | Header 2
2554                 #       | -------- | --------
2555                 #       | Cell 1   | Cell 2
2556                 #       | Cell 3   | Cell 4
2557                 #
2558                 $text = preg_replace_callback('
2559                         {
2560                                 ^                                                       # Start of a line
2561                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2562                                 [|]                                                     # Optional leading pipe (present)
2563                                 (.+) \n                                         # $1: Header row (at least one pipe)
2564                                 
2565                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2566                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2567                                 
2568                                 (                                                       # $3: Cells
2569                                         (?>
2570                                                 [ ]*                            # Allowed whitespace.
2571                                                 [|] .* \n                       # Row content.
2572                                         )*
2573                                 )
2574                                 (?=\n|\Z)                                       # Stop at final double newline.
2575                         }xm',
2576                         array($this, '_doTable_leadingPipe_callback'), $text);
2577                 
2578                 #
2579                 # Find tables without leading pipe.
2580                 #
2581                 #       Header 1 | Header 2
2582                 #       -------- | --------
2583                 #       Cell 1   | Cell 2
2584                 #       Cell 3   | Cell 4
2585                 #
2586                 $text = preg_replace_callback('
2587                         {
2588                                 ^                                                       # Start of a line
2589                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2590                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2591                                 
2592                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2593                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2594                                 
2595                                 (                                                       # $3: Cells
2596                                         (?>
2597                                                 .* [|] .* \n            # Row content
2598                                         )*
2599                                 )
2600                                 (?=\n|\Z)                                       # Stop at final double newline.
2601                         }xm',
2602                         array($this, '_DoTable_callback'), $text);
2603
2604                 return $text;
2605         }
2606         protected function _doTable_leadingPipe_callback($matches) {
2607                 $head           = $matches[1];
2608                 $underline      = $matches[2];
2609                 $content        = $matches[3];
2610                 
2611                 # Remove leading pipe for each row.
2612                 $content        = preg_replace('/^ *[|]/m', '', $content);
2613                 
2614                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2615         }
2616         protected function _doTable_makeAlignAttr($alignname)
2617         {
2618                 if (empty($this->table_align_class_tmpl))
2619                         return " align=\"$alignname\"";
2620
2621                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2622                 return " class=\"$classname\"";
2623         }
2624         protected function _doTable_callback($matches) {
2625                 $head           = $matches[1];
2626                 $underline      = $matches[2];
2627                 $content        = $matches[3];
2628
2629                 # Remove any tailing pipes for each line.
2630                 $head           = preg_replace('/[|] *$/m', '', $head);
2631                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2632                 $content        = preg_replace('/[|] *$/m', '', $content);
2633                 
2634                 # Reading alignement from header underline.
2635                 $separators     = preg_split('/ *[|] */', $underline);
2636                 foreach ($separators as $n => $s) {
2637                         if (preg_match('/^ *-+: *$/', $s))
2638                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
2639                         else if (preg_match('/^ *:-+: *$/', $s))
2640                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
2641                         else if (preg_match('/^ *:-+ *$/', $s))
2642                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
2643                         else
2644                                 $attr[$n] = '';
2645                 }
2646                 
2647                 # Parsing span elements, including code spans, character escapes, 
2648                 # and inline HTML tags, so that pipes inside those gets ignored.
2649                 $head           = $this->parseSpan($head);
2650                 $headers        = preg_split('/ *[|] */', $head);
2651                 $col_count      = count($headers);
2652                 $attr       = array_pad($attr, $col_count, '');
2653                 
2654                 # Write column headers.
2655                 $text = "<table>\n";
2656                 $text .= "<thead>\n";
2657                 $text .= "<tr>\n";
2658                 foreach ($headers as $n => $header)
2659                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2660                 $text .= "</tr>\n";
2661                 $text .= "</thead>\n";
2662                 
2663                 # Split content by row.
2664                 $rows = explode("\n", trim($content, "\n"));
2665                 
2666                 $text .= "<tbody>\n";
2667                 foreach ($rows as $row) {
2668                         # Parsing span elements, including code spans, character escapes, 
2669                         # and inline HTML tags, so that pipes inside those gets ignored.
2670                         $row = $this->parseSpan($row);
2671                         
2672                         # Split row by cell.
2673                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2674                         $row_cells = array_pad($row_cells, $col_count, '');
2675                         
2676                         $text .= "<tr>\n";
2677                         foreach ($row_cells as $n => $cell)
2678                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2679                         $text .= "</tr>\n";
2680                 }
2681                 $text .= "</tbody>\n";
2682                 $text .= "</table>";
2683                 
2684                 return $this->hashBlock($text) . "\n";
2685         }
2686
2687         
2688         protected function doDefLists($text) {
2689         #
2690         # Form HTML definition lists.
2691         #
2692                 $less_than_tab = $this->tab_width - 1;
2693
2694                 # Re-usable pattern to match any entire dl list:
2695                 $whole_list_re = '(?>
2696                         (                                                               # $1 = whole list
2697                           (                                                             # $2
2698                                 [ ]{0,'.$less_than_tab.'}
2699                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2700                                 \n?
2701                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2702                           )
2703                           (?s:.+?)
2704                           (                                                             # $4
2705                                   \z
2706                                 |
2707                                   \n{2,}
2708                                   (?=\S)
2709                                   (?!                                           # Negative lookahead for another term
2710                                         [ ]{0,'.$less_than_tab.'}
2711                                         (?: \S.*\n )+?                  # defined term
2712                                         \n?
2713                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2714                                   )
2715                                   (?!                                           # Negative lookahead for another definition
2716                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2717                                   )
2718                           )
2719                         )
2720                 )'; // mx
2721
2722                 $text = preg_replace_callback('{
2723                                 (?>\A\n?|(?<=\n\n))
2724                                 '.$whole_list_re.'
2725                         }mx',
2726                         array($this, '_doDefLists_callback'), $text);
2727
2728                 return $text;
2729         }
2730         protected function _doDefLists_callback($matches) {
2731                 # Re-usable patterns to match list item bullets and number markers:
2732                 $list = $matches[1];
2733                 
2734                 # Turn double returns into triple returns, so that we can make a
2735                 # paragraph for the last item in a list, if necessary:
2736                 $result = trim($this->processDefListItems($list));
2737                 $result = "<dl>\n" . $result . "\n</dl>";
2738                 return $this->hashBlock($result) . "\n\n";
2739         }
2740
2741
2742         protected function processDefListItems($list_str) {
2743         #
2744         #       Process the contents of a single definition list, splitting it
2745         #       into individual term and definition list items.
2746         #
2747                 $less_than_tab = $this->tab_width - 1;
2748                 
2749                 # trim trailing blank lines:
2750                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2751
2752                 # Process definition terms.
2753                 $list_str = preg_replace_callback('{
2754                         (?>\A\n?|\n\n+)                                 # leading line
2755                         (                                                               # definition terms = $1
2756                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2757                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
2758                                                                                         #   mark (colon) or more whitespace.
2759                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
2760                         )                       
2761                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
2762                                                                                         #   with a definition mark.
2763                         }xm',
2764                         array($this, '_processDefListItems_callback_dt'), $list_str);
2765
2766                 # Process actual definitions.
2767                 $list_str = preg_replace_callback('{
2768                         \n(\n+)?                                                # leading line = $1
2769                         (                                                               # marker space = $2
2770                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2771                                 \:[ ]+                                          # definition mark (colon)
2772                         )
2773                         ((?s:.+?))                                              # definition text = $3
2774                         (?= \n+                                                 # stop at next definition mark,
2775                                 (?:                                                     # next term or end of text
2776                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
2777                                         <dt> | \z
2778                                 )                                               
2779                         )                                       
2780                         }xm',
2781                         array($this, '_processDefListItems_callback_dd'), $list_str);
2782
2783                 return $list_str;
2784         }
2785         protected function _processDefListItems_callback_dt($matches) {
2786                 $terms = explode("\n", trim($matches[1]));
2787                 $text = '';
2788                 foreach ($terms as $term) {
2789                         $term = $this->runSpanGamut(trim($term));
2790                         $text .= "\n<dt>" . $term . "</dt>";
2791                 }
2792                 return $text . "\n";
2793         }
2794         protected function _processDefListItems_callback_dd($matches) {
2795                 $leading_line   = $matches[1];
2796                 $marker_space   = $matches[2];
2797                 $def                    = $matches[3];
2798
2799                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2800                         # Replace marker with the appropriate whitespace indentation
2801                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2802                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2803                         $def = "\n". $def ."\n";
2804                 }
2805                 else {
2806                         $def = rtrim($def);
2807                         $def = $this->runSpanGamut($this->outdent($def));
2808                 }
2809
2810                 return "\n<dd>" . $def . "</dd>\n";
2811         }
2812
2813
2814         protected function doFencedCodeBlocks($text) {
2815         #
2816         # Adding the fenced code block syntax to regular Markdown:
2817         #
2818         # ~~~
2819         # Code block
2820         # ~~~
2821         #
2822                 $less_than_tab = $this->tab_width;
2823                 
2824                 $text = preg_replace_callback('{
2825                                 (?:\n|\A)
2826                                 # 1: Opening marker
2827                                 (
2828                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2829                                 )
2830                                 [ ]*
2831                                 (?:
2832                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2833                                 |
2834                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2835                                 )?
2836                                 [ ]* \n # Whitespace and newline following marker.
2837                                 
2838                                 # 4: Content
2839                                 (
2840                                         (?>
2841                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2842                                                 .*\n+
2843                                         )+
2844                                 )
2845                                 
2846                                 # Closing marker.
2847                                 \1 [ ]* (?= \n )
2848                         }xm',
2849                         array($this, '_doFencedCodeBlocks_callback'), $text);
2850
2851                 return $text;
2852         }
2853         protected function _doFencedCodeBlocks_callback($matches) {
2854                 $classname =& $matches[2];
2855                 $attrs     =& $matches[3];
2856                 $codeblock = $matches[4];
2857                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2858                 $codeblock = preg_replace_callback('/^\n+/',
2859                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
2860
2861                 if ($classname != "") {
2862                         if ($classname{0} == '.')
2863                                 $classname = substr($classname, 1);
2864                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2865                 } else {
2866                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2867                 }
2868                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2869                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2870                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2871                 
2872                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2873         }
2874         protected function _doFencedCodeBlocks_newlines($matches) {
2875                 return str_repeat("<br$this->empty_element_suffix", 
2876                         strlen($matches[0]));
2877         }
2878
2879
2880         #
2881         # Redefining emphasis markers so that emphasis by underscore does not
2882         # work in the middle of a word.
2883         #
2884         protected $em_relist = array(
2885                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
2886                 '*' => '(?<![\s*])\*(?!\*)',
2887                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
2888                 );
2889         protected $strong_relist = array(
2890                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
2891                 '**' => '(?<![\s*])\*\*(?!\*)',
2892                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
2893                 );
2894         protected $em_strong_relist = array(
2895                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
2896                 '***' => '(?<![\s*])\*\*\*(?!\*)',
2897                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
2898                 );
2899
2900
2901         protected function formParagraphs($text) {
2902         #
2903         #       Params:
2904         #               $text - string to process with html <p> tags
2905         #
2906                 # Strip leading and trailing lines:
2907                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2908                 
2909                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2910
2911                 #
2912                 # Wrap <p> tags and unhashify HTML blocks
2913                 #
2914                 foreach ($grafs as $key => $value) {
2915                         $value = trim($this->runSpanGamut($value));
2916                         
2917                         # Check if this should be enclosed in a paragraph.
2918                         # Clean tag hashes & block tag hashes are left alone.
2919                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2920                         
2921                         if ($is_p) {
2922                                 $value = "<p>$value</p>";
2923                         }
2924                         $grafs[$key] = $value;
2925                 }
2926                 
2927                 # Join grafs in one text, then unhash HTML tags. 
2928                 $text = implode("\n\n", $grafs);
2929                 
2930                 # Finish by removing any tag hashes still present in $text.
2931                 $text = $this->unhash($text);
2932                 
2933                 return $text;
2934         }
2935         
2936         
2937         ### Footnotes
2938         
2939         protected function stripFootnotes($text) {
2940         #
2941         # Strips link definitions from text, stores the URLs and titles in
2942         # hash references.
2943         #
2944                 $less_than_tab = $this->tab_width - 1;
2945
2946                 # Link defs are in the form: [^id]: url "optional title"
2947                 $text = preg_replace_callback('{
2948                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2949                           [ ]*
2950                           \n?                                   # maybe *one* newline
2951                         (                                               # text = $2 (no blank lines allowed)
2952                                 (?:                                     
2953                                         .+                              # actual text
2954                                 |
2955                                         \n                              # newlines but 
2956                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
2957                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
2958                                                                         # by non-indented content
2959                                 )*
2960                         )               
2961                         }xm',
2962                         array($this, '_stripFootnotes_callback'),
2963                         $text);
2964                 return $text;
2965         }
2966         protected function _stripFootnotes_callback($matches) {
2967                 $note_id = $this->fn_id_prefix . $matches[1];
2968                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2969                 return ''; # String that will replace the block
2970         }
2971
2972
2973         protected function doFootnotes($text) {
2974         #
2975         # Replace footnote references in $text [^id] with a special text-token 
2976         # which will be replaced by the actual footnote marker in appendFootnotes.
2977         #
2978                 if (!$this->in_anchor) {
2979                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2980                 }
2981                 return $text;
2982         }
2983
2984         
2985         protected function appendFootnotes($text) {
2986         #
2987         # Append footnote list to text.
2988         #
2989                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2990                         array($this, '_appendFootnotes_callback'), $text);
2991         
2992                 if (!empty($this->footnotes_ordered)) {
2993                         $text .= "\n\n";
2994                         $text .= "<div class=\"footnotes\">\n";
2995                         $text .= "<hr". $this->empty_element_suffix ."\n";
2996                         $text .= "<ol>\n\n";
2997
2998                         $attr = "";
2999                         if ($this->fn_backlink_class != "") {
3000                                 $class = $this->fn_backlink_class;
3001                                 $class = $this->encodeAttribute($class);
3002                                 $attr .= " class=\"$class\"";
3003                         }
3004                         if ($this->fn_backlink_title != "") {
3005                                 $title = $this->fn_backlink_title;
3006                                 $title = $this->encodeAttribute($title);
3007                                 $attr .= " title=\"$title\"";
3008                         }
3009                         $num = 0;
3010                         
3011                         while (!empty($this->footnotes_ordered)) {
3012                                 $footnote = reset($this->footnotes_ordered);
3013                                 $note_id = key($this->footnotes_ordered);
3014                                 unset($this->footnotes_ordered[$note_id]);
3015                                 $ref_count = $this->footnotes_ref_count[$note_id];
3016                                 unset($this->footnotes_ref_count[$note_id]);
3017                                 unset($this->footnotes[$note_id]);
3018                                 
3019                                 $footnote .= "\n"; # Need to append newline before parsing.
3020                                 $footnote = $this->runBlockGamut("$footnote\n");                                
3021                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
3022                                         array($this, '_appendFootnotes_callback'), $footnote);
3023                                 
3024                                 $attr = str_replace("%%", ++$num, $attr);
3025                                 $note_id = $this->encodeAttribute($note_id);
3026
3027                                 # Prepare backlink, multiple backlinks if multiple references
3028                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
3029                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
3030                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
3031                                 }
3032                                 # Add backlink to last paragraph; create new paragraph if needed.
3033                                 if (preg_match('{</p>$}', $footnote)) {
3034                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
3035                                 } else {
3036                                         $footnote .= "\n\n<p>$backlink</p>";
3037                                 }
3038                                 
3039                                 $text .= "<li id=\"fn:$note_id\">\n";
3040                                 $text .= $footnote . "\n";
3041                                 $text .= "</li>\n\n";
3042                         }
3043                         
3044                         $text .= "</ol>\n";
3045                         $text .= "</div>";
3046                 }
3047                 return $text;
3048         }
3049         protected function _appendFootnotes_callback($matches) {
3050                 $node_id = $this->fn_id_prefix . $matches[1];
3051                 
3052                 # Create footnote marker only if it has a corresponding footnote *and*
3053                 # the footnote hasn't been used by another marker.
3054                 if (isset($this->footnotes[$node_id])) {
3055                         $num =& $this->footnotes_numbers[$node_id];
3056                         if (!isset($num)) {
3057                                 # Transfer footnote content to the ordered list and give it its
3058                                 # number
3059                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3060                                 $this->footnotes_ref_count[$node_id] = 1;
3061                                 $num = $this->footnote_counter++;
3062                                 $ref_count_mark = '';
3063                         } else {
3064                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3065                         }
3066
3067                         $attr = "";
3068                         if ($this->fn_link_class != "") {
3069                                 $class = $this->fn_link_class;
3070                                 $class = $this->encodeAttribute($class);
3071                                 $attr .= " class=\"$class\"";
3072                         }
3073                         if ($this->fn_link_title != "") {
3074                                 $title = $this->fn_link_title;
3075                                 $title = $this->encodeAttribute($title);
3076                                 $attr .= " title=\"$title\"";
3077                         }
3078                         
3079                         $attr = str_replace("%%", $num, $attr);
3080                         $node_id = $this->encodeAttribute($node_id);
3081                         
3082                         return
3083                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
3084                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
3085                                 "</sup>";
3086                 }
3087                 
3088                 return "[^".$matches[1]."]";
3089         }
3090                 
3091         
3092         ### Abbreviations ###
3093         
3094         protected function stripAbbreviations($text) {
3095         #
3096         # Strips abbreviations from text, stores titles in hash references.
3097         #
3098                 $less_than_tab = $this->tab_width - 1;
3099
3100                 # Link defs are in the form: [id]*: url "optional title"
3101                 $text = preg_replace_callback('{
3102                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
3103                         (.*)                                    # text = $2 (no blank lines allowed)    
3104                         }xm',
3105                         array($this, '_stripAbbreviations_callback'),
3106                         $text);
3107                 return $text;
3108         }
3109         protected function _stripAbbreviations_callback($matches) {
3110                 $abbr_word = $matches[1];
3111                 $abbr_desc = $matches[2];
3112                 if ($this->abbr_word_re)
3113                         $this->abbr_word_re .= '|';
3114                 $this->abbr_word_re .= preg_quote($abbr_word);
3115                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3116                 return ''; # String that will replace the block
3117         }
3118         
3119         
3120         protected function doAbbreviations($text) {
3121         #
3122         # Find defined abbreviations in text and wrap them in <abbr> elements.
3123         #
3124                 if ($this->abbr_word_re) {
3125                         // cannot use the /x modifier because abbr_word_re may 
3126                         // contain significant spaces:
3127                         $text = preg_replace_callback('{'.
3128                                 '(?<![\w\x1A])'.
3129                                 '(?:'.$this->abbr_word_re.')'.
3130                                 '(?![\w\x1A])'.
3131                                 '}', 
3132                                 array($this, '_doAbbreviations_callback'), $text);
3133                 }
3134                 return $text;
3135         }
3136         protected function _doAbbreviations_callback($matches) {
3137                 $abbr = $matches[0];
3138                 if (isset($this->abbr_desciptions[$abbr])) {
3139                         $desc = $this->abbr_desciptions[$abbr];
3140                         if (empty($desc)) {
3141                                 return $this->hashPart("<abbr>$abbr</abbr>");
3142                         } else {
3143                                 $desc = $this->encodeAttribute($desc);
3144                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3145                         }
3146                 } else {
3147                         return $matches[0];
3148                 }
3149         }
3150
3151 }