]> git.mxchange.org Git - quix0rs-gnu-social.git/blob - extlib/Michelf/Markdown.php
Updating Markdown class + use spl_autoload_register
[quix0rs-gnu-social.git] / extlib / Michelf / Markdown.php
1 <?php
2 #
3 # Markdown  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown  
6 # Copyright (c) 2004-2013 Michel Fortin  
7 # <http://michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown  
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
13 namespace Michelf;
14
15
16 #
17 # Markdown Parser Class
18 #
19
20 class Markdown implements MarkdownInterface {
21
22         ### Version ###
23
24         const  MARKDOWNLIB_VERSION  =  "1.4.0";
25
26         ### Simple Function Interface ###
27
28         public static function defaultTransform($text) {
29         #
30         # Initialize the parser and return the result of its transform method.
31         # This will work fine for derived classes too.
32         #
33                 # Take parser class on which this function was called.
34                 $parser_class = \get_called_class();
35
36                 # try to take parser from the static parser list
37                 static $parser_list;
38                 $parser =& $parser_list[$parser_class];
39
40                 # create the parser it not already set
41                 if (!$parser)
42                         $parser = new $parser_class;
43
44                 # Transform text using parser.
45                 return $parser->transform($text);
46         }
47
48         ### Configuration Variables ###
49
50         # Change to ">" for HTML output.
51         public $empty_element_suffix = " />";
52         public $tab_width = 4;
53         
54         # Change to `true` to disallow markup or entities.
55         public $no_markup = false;
56         public $no_entities = false;
57         
58         # Predefined urls and titles for reference links and images.
59         public $predef_urls = array();
60         public $predef_titles = array();
61
62
63         ### Parser Implementation ###
64
65         # Regex to match balanced [brackets].
66         # Needed to insert a maximum bracked depth while converting to PHP.
67         protected $nested_brackets_depth = 6;
68         protected $nested_brackets_re;
69         
70         protected $nested_url_parenthesis_depth = 4;
71         protected $nested_url_parenthesis_re;
72
73         # Table of hash values for escaped characters:
74         protected $escape_chars = '\`*_{}[]()>#+-.!';
75         protected $escape_chars_re;
76
77
78         public function __construct() {
79         #
80         # Constructor function. Initialize appropriate member variables.
81         #
82                 $this->_initDetab();
83                 $this->prepareItalicsAndBold();
84         
85                 $this->nested_brackets_re = 
86                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
87                         str_repeat('\])*', $this->nested_brackets_depth);
88         
89                 $this->nested_url_parenthesis_re = 
90                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
91                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
92                 
93                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
94                 
95                 # Sort document, block, and span gamut in ascendent priority order.
96                 asort($this->document_gamut);
97                 asort($this->block_gamut);
98                 asort($this->span_gamut);
99         }
100
101
102         # Internal hashes used during transformation.
103         protected $urls = array();
104         protected $titles = array();
105         protected $html_hashes = array();
106         
107         # Status flag to avoid invalid nesting.
108         protected $in_anchor = false;
109         
110         
111         protected function setup() {
112         #
113         # Called before the transformation process starts to setup parser 
114         # states.
115         #
116                 # Clear global hashes.
117                 $this->urls = $this->predef_urls;
118                 $this->titles = $this->predef_titles;
119                 $this->html_hashes = array();
120                 
121                 $this->in_anchor = false;
122         }
123         
124         protected function teardown() {
125         #
126         # Called after the transformation process to clear any variable 
127         # which may be taking up memory unnecessarly.
128         #
129                 $this->urls = array();
130                 $this->titles = array();
131                 $this->html_hashes = array();
132         }
133
134
135         public function transform($text) {
136         #
137         # Main function. Performs some preprocessing on the input text
138         # and pass it through the document gamut.
139         #
140                 $this->setup();
141         
142                 # Remove UTF-8 BOM and marker character in input, if present.
143                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
144
145                 # Standardize line endings:
146                 #   DOS to Unix and Mac to Unix
147                 $text = preg_replace('{\r\n?}', "\n", $text);
148
149                 # Make sure $text ends with a couple of newlines:
150                 $text .= "\n\n";
151
152                 # Convert all tabs to spaces.
153                 $text = $this->detab($text);
154
155                 # Turn block-level HTML blocks into hash entries
156                 $text = $this->hashHTMLBlocks($text);
157
158                 # Strip any lines consisting only of spaces and tabs.
159                 # This makes subsequent regexen easier to write, because we can
160                 # match consecutive blank lines with /\n+/ instead of something
161                 # contorted like /[ ]*\n+/ .
162                 $text = preg_replace('/^[ ]+$/m', '', $text);
163
164                 # Run document gamut methods.
165                 foreach ($this->document_gamut as $method => $priority) {
166                         $text = $this->$method($text);
167                 }
168                 
169                 $this->teardown();
170
171                 return $text . "\n";
172         }
173         
174         protected $document_gamut = array(
175                 # Strip link definitions, store in hashes.
176                 "stripLinkDefinitions" => 20,
177                 
178                 "runBasicBlockGamut"   => 30,
179                 );
180
181
182         protected function stripLinkDefinitions($text) {
183         #
184         # Strips link definitions from text, stores the URLs and titles in
185         # hash references.
186         #
187                 $less_than_tab = $this->tab_width - 1;
188
189                 # Link defs are in the form: ^[id]: url "optional title"
190                 $text = preg_replace_callback('{
191                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
192                                                           [ ]*
193                                                           \n?                           # maybe *one* newline
194                                                           [ ]*
195                                                         (?:
196                                                           <(.+?)>                       # url = $2
197                                                         |
198                                                           (\S+?)                        # url = $3
199                                                         )
200                                                           [ ]*
201                                                           \n?                           # maybe one newline
202                                                           [ ]*
203                                                         (?:
204                                                                 (?<=\s)                 # lookbehind for whitespace
205                                                                 ["(]
206                                                                 (.*?)                   # title = $4
207                                                                 [")]
208                                                                 [ ]*
209                                                         )?      # title is optional
210                                                         (?:\n+|\Z)
211                         }xm',
212                         array(&$this, '_stripLinkDefinitions_callback'),
213                         $text);
214                 return $text;
215         }
216         protected function _stripLinkDefinitions_callback($matches) {
217                 $link_id = strtolower($matches[1]);
218                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
219                 $this->urls[$link_id] = $url;
220                 $this->titles[$link_id] =& $matches[4];
221                 return ''; # String that will replace the block
222         }
223
224
225         protected function hashHTMLBlocks($text) {
226                 if ($this->no_markup)  return $text;
227
228                 $less_than_tab = $this->tab_width - 1;
229
230                 # Hashify HTML blocks:
231                 # We only want to do this for block-level HTML tags, such as headers,
232                 # lists, and tables. That's because we still want to wrap <p>s around
233                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
234                 # phrase emphasis, and spans. The list of tags we're looking for is
235                 # hard-coded:
236                 #
237                 # *  List "a" is made of tags which can be both inline or block-level.
238                 #    These will be treated block-level when the start tag is alone on 
239                 #    its line, otherwise they're not matched here and will be taken as 
240                 #    inline later.
241                 # *  List "b" is made of tags which are always block-level;
242                 #
243                 $block_tags_a_re = 'ins|del';
244                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
245                                                    'script|noscript|form|fieldset|iframe|math|svg|'.
246                                                    'article|section|nav|aside|hgroup|header|footer|'.
247                                                    'figure';
248
249                 # Regular expression for the content of a block tag.
250                 $nested_tags_level = 4;
251                 $attr = '
252                         (?>                             # optional tag attributes
253                           \s                    # starts with whitespace
254                           (?>
255                                 [^>"/]+         # text outside quotes
256                           |
257                                 /+(?!>)         # slash not followed by ">"
258                           |
259                                 "[^"]*"         # text inside double quotes (tolerate ">")
260                           |
261                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
262                           )*
263                         )?      
264                         ';
265                 $content =
266                         str_repeat('
267                                 (?>
268                                   [^<]+                 # content without tag
269                                 |
270                                   <\2                   # nested opening tag
271                                         '.$attr.'       # attributes
272                                         (?>
273                                           />
274                                         |
275                                           >', $nested_tags_level).      # end of opening tag
276                                           '.*?'.                                        # last level nested tag content
277                         str_repeat('
278                                           </\2\s*>      # closing nested tag
279                                         )
280                                   |                             
281                                         <(?!/\2\s*>     # other tags with a different name
282                                   )
283                                 )*',
284                                 $nested_tags_level);
285                 $content2 = str_replace('\2', '\3', $content);
286
287                 # First, look for nested blocks, e.g.:
288                 #       <div>
289                 #               <div>
290                 #               tags for inner block must be indented.
291                 #               </div>
292                 #       </div>
293                 #
294                 # The outermost tags must start at the left margin for this to match, and
295                 # the inner nested divs must be indented.
296                 # We need to do this before the next, more liberal match, because the next
297                 # match will start at the first `<div>` and stop at the first `</div>`.
298                 $text = preg_replace_callback('{(?>
299                         (?>
300                                 (?<=\n\n)               # Starting after a blank line
301                                 |                               # or
302                                 \A\n?                   # the beginning of the doc
303                         )
304                         (                                               # save in $1
305
306                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
307                           # in between.
308                                         
309                                                 [ ]{0,'.$less_than_tab.'}
310                                                 <('.$block_tags_b_re.')# start tag = $2
311                                                 '.$attr.'>                      # attributes followed by > and \n
312                                                 '.$content.'            # content, support nesting
313                                                 </\2>                           # the matching end tag
314                                                 [ ]*                            # trailing spaces/tabs
315                                                 (?=\n+|\Z)      # followed by a newline or end of document
316
317                         | # Special version for tags of group a.
318
319                                                 [ ]{0,'.$less_than_tab.'}
320                                                 <('.$block_tags_a_re.')# start tag = $3
321                                                 '.$attr.'>[ ]*\n        # attributes followed by >
322                                                 '.$content2.'           # content, support nesting
323                                                 </\3>                           # the matching end tag
324                                                 [ ]*                            # trailing spaces/tabs
325                                                 (?=\n+|\Z)      # followed by a newline or end of document
326                                         
327                         | # Special case just for <hr />. It was easier to make a special 
328                           # case than to make the other regex more complicated.
329                         
330                                                 [ ]{0,'.$less_than_tab.'}
331                                                 <(hr)                           # start tag = $2
332                                                 '.$attr.'                       # attributes
333                                                 /?>                                     # the matching end tag
334                                                 [ ]*
335                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
336                         
337                         | # Special case for standalone HTML comments:
338                         
339                                         [ ]{0,'.$less_than_tab.'}
340                                         (?s:
341                                                 <!-- .*? -->
342                                         )
343                                         [ ]*
344                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
345                         
346                         | # PHP and ASP-style processor instructions (<? and <%)
347                         
348                                         [ ]{0,'.$less_than_tab.'}
349                                         (?s:
350                                                 <([?%])                 # $2
351                                                 .*?
352                                                 \2>
353                                         )
354                                         [ ]*
355                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
356                                         
357                         )
358                         )}Sxmi',
359                         array(&$this, '_hashHTMLBlocks_callback'),
360                         $text);
361
362                 return $text;
363         }
364         protected function _hashHTMLBlocks_callback($matches) {
365                 $text = $matches[1];
366                 $key  = $this->hashBlock($text);
367                 return "\n\n$key\n\n";
368         }
369         
370         
371         protected function hashPart($text, $boundary = 'X') {
372         #
373         # Called whenever a tag must be hashed when a function insert an atomic 
374         # element in the text stream. Passing $text to through this function gives
375         # a unique text-token which will be reverted back when calling unhash.
376         #
377         # The $boundary argument specify what character should be used to surround
378         # the token. By convension, "B" is used for block elements that needs not
379         # to be wrapped into paragraph tags at the end, ":" is used for elements
380         # that are word separators and "X" is used in the general case.
381         #
382                 # Swap back any tag hash found in $text so we do not have to `unhash`
383                 # multiple times at the end.
384                 $text = $this->unhash($text);
385                 
386                 # Then hash the block.
387                 static $i = 0;
388                 $key = "$boundary\x1A" . ++$i . $boundary;
389                 $this->html_hashes[$key] = $text;
390                 return $key; # String that will replace the tag.
391         }
392
393
394         protected function hashBlock($text) {
395         #
396         # Shortcut function for hashPart with block-level boundaries.
397         #
398                 return $this->hashPart($text, 'B');
399         }
400
401
402         protected $block_gamut = array(
403         #
404         # These are all the transformations that form block-level
405         # tags like paragraphs, headers, and list items.
406         #
407                 "doHeaders"         => 10,
408                 "doHorizontalRules" => 20,
409                 
410                 "doLists"           => 40,
411                 "doCodeBlocks"      => 50,
412                 "doBlockQuotes"     => 60,
413                 );
414
415         protected function runBlockGamut($text) {
416         #
417         # Run block gamut tranformations.
418         #
419                 # We need to escape raw HTML in Markdown source before doing anything 
420                 # else. This need to be done for each block, and not only at the 
421                 # begining in the Markdown function since hashed blocks can be part of
422                 # list items and could have been indented. Indented blocks would have 
423                 # been seen as a code block in a previous pass of hashHTMLBlocks.
424                 $text = $this->hashHTMLBlocks($text);
425                 
426                 return $this->runBasicBlockGamut($text);
427         }
428         
429         protected function runBasicBlockGamut($text) {
430         #
431         # Run block gamut tranformations, without hashing HTML blocks. This is 
432         # useful when HTML blocks are known to be already hashed, like in the first
433         # whole-document pass.
434         #
435                 foreach ($this->block_gamut as $method => $priority) {
436                         $text = $this->$method($text);
437                 }
438                 
439                 # Finally form paragraph and restore hashed blocks.
440                 $text = $this->formParagraphs($text);
441
442                 return $text;
443         }
444         
445         
446         protected function doHorizontalRules($text) {
447                 # Do Horizontal Rules:
448                 return preg_replace(
449                         '{
450                                 ^[ ]{0,3}       # Leading space
451                                 ([-*_])         # $1: First marker
452                                 (?>                     # Repeated marker group
453                                         [ ]{0,2}        # Zero, one, or two spaces.
454                                         \1                      # Marker character
455                                 ){2,}           # Group repeated at least twice
456                                 [ ]*            # Tailing spaces
457                                 $                       # End of line.
458                         }mx',
459                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
460                         $text);
461         }
462
463
464         protected $span_gamut = array(
465         #
466         # These are all the transformations that occur *within* block-level
467         # tags like paragraphs, headers, and list items.
468         #
469                 # Process character escapes, code spans, and inline HTML
470                 # in one shot.
471                 "parseSpan"           => -30,
472
473                 # Process anchor and image tags. Images must come first,
474                 # because ![foo][f] looks like an anchor.
475                 "doImages"            =>  10,
476                 "doAnchors"           =>  20,
477                 
478                 # Make links out of things like `<http://example.com/>`
479                 # Must come after doAnchors, because you can use < and >
480                 # delimiters in inline links like [this](<url>).
481                 "doAutoLinks"         =>  30,
482                 "encodeAmpsAndAngles" =>  40,
483
484                 "doItalicsAndBold"    =>  50,
485                 "doHardBreaks"        =>  60,
486                 );
487
488         protected function runSpanGamut($text) {
489         #
490         # Run span gamut tranformations.
491         #
492                 foreach ($this->span_gamut as $method => $priority) {
493                         $text = $this->$method($text);
494                 }
495
496                 return $text;
497         }
498         
499         
500         protected function doHardBreaks($text) {
501                 # Do hard breaks:
502                 return preg_replace_callback('/ {2,}\n/', 
503                         array(&$this, '_doHardBreaks_callback'), $text);
504         }
505         protected function _doHardBreaks_callback($matches) {
506                 return $this->hashPart("<br$this->empty_element_suffix\n");
507         }
508
509
510         protected function doAnchors($text) {
511         #
512         # Turn Markdown link shortcuts into XHTML <a> tags.
513         #
514                 if ($this->in_anchor) return $text;
515                 $this->in_anchor = true;
516                 
517                 #
518                 # First, handle reference-style links: [link text] [id]
519                 #
520                 $text = preg_replace_callback('{
521                         (                                       # wrap whole match in $1
522                           \[
523                                 ('.$this->nested_brackets_re.') # link text = $2
524                           \]
525
526                           [ ]?                          # one optional space
527                           (?:\n[ ]*)?           # one optional newline followed by spaces
528
529                           \[
530                                 (.*?)           # id = $3
531                           \]
532                         )
533                         }xs',
534                         array(&$this, '_doAnchors_reference_callback'), $text);
535
536                 #
537                 # Next, inline-style links: [link text](url "optional title")
538                 #
539                 $text = preg_replace_callback('{
540                         (                               # wrap whole match in $1
541                           \[
542                                 ('.$this->nested_brackets_re.') # link text = $2
543                           \]
544                           \(                    # literal paren
545                                 [ \n]*
546                                 (?:
547                                         <(.+?)> # href = $3
548                                 |
549                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
550                                 )
551                                 [ \n]*
552                                 (                       # $5
553                                   ([\'"])       # quote char = $6
554                                   (.*?)         # Title = $7
555                                   \6            # matching quote
556                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
557                                 )?                      # title is optional
558                           \)
559                         )
560                         }xs',
561                         array(&$this, '_doAnchors_inline_callback'), $text);
562
563                 #
564                 # Last, handle reference-style shortcuts: [link text]
565                 # These must come last in case you've also got [link text][1]
566                 # or [link text](/foo)
567                 #
568                 $text = preg_replace_callback('{
569                         (                                       # wrap whole match in $1
570                           \[
571                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
572                           \]
573                         )
574                         }xs',
575                         array(&$this, '_doAnchors_reference_callback'), $text);
576
577                 $this->in_anchor = false;
578                 return $text;
579         }
580         protected function _doAnchors_reference_callback($matches) {
581                 $whole_match =  $matches[1];
582                 $link_text   =  $matches[2];
583                 $link_id     =& $matches[3];
584
585                 if ($link_id == "") {
586                         # for shortcut links like [this][] or [this].
587                         $link_id = $link_text;
588                 }
589                 
590                 # lower-case and turn embedded newlines into spaces
591                 $link_id = strtolower($link_id);
592                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
593
594                 if (isset($this->urls[$link_id])) {
595                         $url = $this->urls[$link_id];
596                         $url = $this->encodeAttribute($url);
597                         
598                         $result = "<a href=\"$url\"";
599                         if ( isset( $this->titles[$link_id] ) ) {
600                                 $title = $this->titles[$link_id];
601                                 $title = $this->encodeAttribute($title);
602                                 $result .=  " title=\"$title\"";
603                         }
604                 
605                         $link_text = $this->runSpanGamut($link_text);
606                         $result .= ">$link_text</a>";
607                         $result = $this->hashPart($result);
608                 }
609                 else {
610                         $result = $whole_match;
611                 }
612                 return $result;
613         }
614         protected function _doAnchors_inline_callback($matches) {
615                 $whole_match    =  $matches[1];
616                 $link_text              =  $this->runSpanGamut($matches[2]);
617                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
618                 $title                  =& $matches[7];
619
620                 $url = $this->encodeAttribute($url);
621
622                 $result = "<a href=\"$url\"";
623                 if (isset($title)) {
624                         $title = $this->encodeAttribute($title);
625                         $result .=  " title=\"$title\"";
626                 }
627                 
628                 $link_text = $this->runSpanGamut($link_text);
629                 $result .= ">$link_text</a>";
630
631                 return $this->hashPart($result);
632         }
633
634
635         protected function doImages($text) {
636         #
637         # Turn Markdown image shortcuts into <img> tags.
638         #
639                 #
640                 # First, handle reference-style labeled images: ![alt text][id]
641                 #
642                 $text = preg_replace_callback('{
643                         (                               # wrap whole match in $1
644                           !\[
645                                 ('.$this->nested_brackets_re.')         # alt text = $2
646                           \]
647
648                           [ ]?                          # one optional space
649                           (?:\n[ ]*)?           # one optional newline followed by spaces
650
651                           \[
652                                 (.*?)           # id = $3
653                           \]
654
655                         )
656                         }xs', 
657                         array(&$this, '_doImages_reference_callback'), $text);
658
659                 #
660                 # Next, handle inline images:  ![alt text](url "optional title")
661                 # Don't forget: encode * and _
662                 #
663                 $text = preg_replace_callback('{
664                         (                               # wrap whole match in $1
665                           !\[
666                                 ('.$this->nested_brackets_re.')         # alt text = $2
667                           \]
668                           \s?                   # One optional whitespace character
669                           \(                    # literal paren
670                                 [ \n]*
671                                 (?:
672                                         <(\S*)> # src url = $3
673                                 |
674                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
675                                 )
676                                 [ \n]*
677                                 (                       # $5
678                                   ([\'"])       # quote char = $6
679                                   (.*?)         # title = $7
680                                   \6            # matching quote
681                                   [ \n]*
682                                 )?                      # title is optional
683                           \)
684                         )
685                         }xs',
686                         array(&$this, '_doImages_inline_callback'), $text);
687
688                 return $text;
689         }
690         protected function _doImages_reference_callback($matches) {
691                 $whole_match = $matches[1];
692                 $alt_text    = $matches[2];
693                 $link_id     = strtolower($matches[3]);
694
695                 if ($link_id == "") {
696                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
697                 }
698
699                 $alt_text = $this->encodeAttribute($alt_text);
700                 if (isset($this->urls[$link_id])) {
701                         $url = $this->encodeAttribute($this->urls[$link_id]);
702                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
703                         if (isset($this->titles[$link_id])) {
704                                 $title = $this->titles[$link_id];
705                                 $title = $this->encodeAttribute($title);
706                                 $result .=  " title=\"$title\"";
707                         }
708                         $result .= $this->empty_element_suffix;
709                         $result = $this->hashPart($result);
710                 }
711                 else {
712                         # If there's no such link ID, leave intact:
713                         $result = $whole_match;
714                 }
715
716                 return $result;
717         }
718         protected function _doImages_inline_callback($matches) {
719                 $whole_match    = $matches[1];
720                 $alt_text               = $matches[2];
721                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
722                 $title                  =& $matches[7];
723
724                 $alt_text = $this->encodeAttribute($alt_text);
725                 $url = $this->encodeAttribute($url);
726                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
727                 if (isset($title)) {
728                         $title = $this->encodeAttribute($title);
729                         $result .=  " title=\"$title\""; # $title already quoted
730                 }
731                 $result .= $this->empty_element_suffix;
732
733                 return $this->hashPart($result);
734         }
735
736
737         protected function doHeaders($text) {
738                 # Setext-style headers:
739                 #         Header 1
740                 #         ========
741                 #  
742                 #         Header 2
743                 #         --------
744                 #
745                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
746                         array(&$this, '_doHeaders_callback_setext'), $text);
747
748                 # atx-style headers:
749                 #       # Header 1
750                 #       ## Header 2
751                 #       ## Header 2 with closing hashes ##
752                 #       ...
753                 #       ###### Header 6
754                 #
755                 $text = preg_replace_callback('{
756                                 ^(\#{1,6})      # $1 = string of #\'s
757                                 [ ]*
758                                 (.+?)           # $2 = Header text
759                                 [ ]*
760                                 \#*                     # optional closing #\'s (not counted)
761                                 \n+
762                         }xm',
763                         array(&$this, '_doHeaders_callback_atx'), $text);
764
765                 return $text;
766         }
767         protected function _doHeaders_callback_setext($matches) {
768                 # Terrible hack to check we haven't found an empty list item.
769                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
770                         return $matches[0];
771                 
772                 $level = $matches[2]{0} == '=' ? 1 : 2;
773                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
774                 return "\n" . $this->hashBlock($block) . "\n\n";
775         }
776         protected function _doHeaders_callback_atx($matches) {
777                 $level = strlen($matches[1]);
778                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
779                 return "\n" . $this->hashBlock($block) . "\n\n";
780         }
781
782
783         protected function doLists($text) {
784         #
785         # Form HTML ordered (numbered) and unordered (bulleted) lists.
786         #
787                 $less_than_tab = $this->tab_width - 1;
788
789                 # Re-usable patterns to match list item bullets and number markers:
790                 $marker_ul_re  = '[*+-]';
791                 $marker_ol_re  = '\d+[\.]';
792                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
793
794                 $markers_relist = array(
795                         $marker_ul_re => $marker_ol_re,
796                         $marker_ol_re => $marker_ul_re,
797                         );
798
799                 foreach ($markers_relist as $marker_re => $other_marker_re) {
800                         # Re-usable pattern to match any entirel ul or ol list:
801                         $whole_list_re = '
802                                 (                                                               # $1 = whole list
803                                   (                                                             # $2
804                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
805                                         ('.$marker_re.')                        # $4 = first list item marker
806                                         [ ]+
807                                   )
808                                   (?s:.+?)
809                                   (                                                             # $5
810                                           \z
811                                         |
812                                           \n{2,}
813                                           (?=\S)
814                                           (?!                                           # Negative lookahead for another list item marker
815                                                 [ ]*
816                                                 '.$marker_re.'[ ]+
817                                           )
818                                         |
819                                           (?=                                           # Lookahead for another kind of list
820                                             \n
821                                                 \3                                              # Must have the same indentation
822                                                 '.$other_marker_re.'[ ]+
823                                           )
824                                   )
825                                 )
826                         '; // mx
827                         
828                         # We use a different prefix before nested lists than top-level lists.
829                         # See extended comment in _ProcessListItems().
830                 
831                         if ($this->list_level) {
832                                 $text = preg_replace_callback('{
833                                                 ^
834                                                 '.$whole_list_re.'
835                                         }mx',
836                                         array(&$this, '_doLists_callback'), $text);
837                         }
838                         else {
839                                 $text = preg_replace_callback('{
840                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
841                                                 '.$whole_list_re.'
842                                         }mx',
843                                         array(&$this, '_doLists_callback'), $text);
844                         }
845                 }
846
847                 return $text;
848         }
849         protected function _doLists_callback($matches) {
850                 # Re-usable patterns to match list item bullets and number markers:
851                 $marker_ul_re  = '[*+-]';
852                 $marker_ol_re  = '\d+[\.]';
853                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
854                 
855                 $list = $matches[1];
856                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
857                 
858                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
859                 
860                 $list .= "\n";
861                 $result = $this->processListItems($list, $marker_any_re);
862                 
863                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
864                 return "\n". $result ."\n\n";
865         }
866
867         protected $list_level = 0;
868
869         protected function processListItems($list_str, $marker_any_re) {
870         #
871         #       Process the contents of a single ordered or unordered list, splitting it
872         #       into individual list items.
873         #
874                 # The $this->list_level global keeps track of when we're inside a list.
875                 # Each time we enter a list, we increment it; when we leave a list,
876                 # we decrement. If it's zero, we're not in a list anymore.
877                 #
878                 # We do this because when we're not inside a list, we want to treat
879                 # something like this:
880                 #
881                 #               I recommend upgrading to version
882                 #               8. Oops, now this line is treated
883                 #               as a sub-list.
884                 #
885                 # As a single paragraph, despite the fact that the second line starts
886                 # with a digit-period-space sequence.
887                 #
888                 # Whereas when we're inside a list (or sub-list), that line will be
889                 # treated as the start of a sub-list. What a kludge, huh? This is
890                 # an aspect of Markdown's syntax that's hard to parse perfectly
891                 # without resorting to mind-reading. Perhaps the solution is to
892                 # change the syntax rules such that sub-lists must start with a
893                 # starting cardinal number; e.g. "1." or "a.".
894                 
895                 $this->list_level++;
896
897                 # trim trailing blank lines:
898                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
899
900                 $list_str = preg_replace_callback('{
901                         (\n)?                                                   # leading line = $1
902                         (^[ ]*)                                                 # leading whitespace = $2
903                         ('.$marker_any_re.'                             # list marker and space = $3
904                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
905                         )
906                         ((?s:.*?))                                              # list item text   = $4
907                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
908                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
909                         }xm',
910                         array(&$this, '_processListItems_callback'), $list_str);
911
912                 $this->list_level--;
913                 return $list_str;
914         }
915         protected function _processListItems_callback($matches) {
916                 $item = $matches[4];
917                 $leading_line =& $matches[1];
918                 $leading_space =& $matches[2];
919                 $marker_space = $matches[3];
920                 $tailing_blank_line =& $matches[5];
921
922                 if ($leading_line || $tailing_blank_line || 
923                         preg_match('/\n{2,}/', $item))
924                 {
925                         # Replace marker with the appropriate whitespace indentation
926                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
927                         $item = $this->runBlockGamut($this->outdent($item)."\n");
928                 }
929                 else {
930                         # Recursion for sub-lists:
931                         $item = $this->doLists($this->outdent($item));
932                         $item = preg_replace('/\n+$/', '', $item);
933                         $item = $this->runSpanGamut($item);
934                 }
935
936                 return "<li>" . $item . "</li>\n";
937         }
938
939
940         protected function doCodeBlocks($text) {
941         #
942         #       Process Markdown `<pre><code>` blocks.
943         #
944                 $text = preg_replace_callback('{
945                                 (?:\n\n|\A\n?)
946                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
947                                   (?>
948                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
949                                         .*\n+
950                                   )+
951                                 )
952                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
953                         }xm',
954                         array(&$this, '_doCodeBlocks_callback'), $text);
955
956                 return $text;
957         }
958         protected function _doCodeBlocks_callback($matches) {
959                 $codeblock = $matches[1];
960
961                 $codeblock = $this->outdent($codeblock);
962                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
963
964                 # trim leading newlines and trailing newlines
965                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
966
967                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
968                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
969         }
970
971
972         protected function makeCodeSpan($code) {
973         #
974         # Create a code span markup for $code. Called from handleSpanToken.
975         #
976                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
977                 return $this->hashPart("<code>$code</code>");
978         }
979
980
981         protected $em_relist = array(
982                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
983                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
984                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
985                 );
986         protected $strong_relist = array(
987                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
988                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
989                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
990                 );
991         protected $em_strong_relist = array(
992                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
993                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
994                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
995                 );
996         protected $em_strong_prepared_relist;
997         
998         protected function prepareItalicsAndBold() {
999         #
1000         # Prepare regular expressions for searching emphasis tokens in any
1001         # context.
1002         #
1003                 foreach ($this->em_relist as $em => $em_re) {
1004                         foreach ($this->strong_relist as $strong => $strong_re) {
1005                                 # Construct list of allowed token expressions.
1006                                 $token_relist = array();
1007                                 if (isset($this->em_strong_relist["$em$strong"])) {
1008                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1009                                 }
1010                                 $token_relist[] = $em_re;
1011                                 $token_relist[] = $strong_re;
1012                                 
1013                                 # Construct master expression from list.
1014                                 $token_re = '{('. implode('|', $token_relist) .')}';
1015                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1016                         }
1017                 }
1018         }
1019         
1020         protected function doItalicsAndBold($text) {
1021                 $token_stack = array('');
1022                 $text_stack = array('');
1023                 $em = '';
1024                 $strong = '';
1025                 $tree_char_em = false;
1026                 
1027                 while (1) {
1028                         #
1029                         # Get prepared regular expression for seraching emphasis tokens
1030                         # in current context.
1031                         #
1032                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1033                         
1034                         #
1035                         # Each loop iteration search for the next emphasis token. 
1036                         # Each token is then passed to handleSpanToken.
1037                         #
1038                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1039                         $text_stack[0] .= $parts[0];
1040                         $token =& $parts[1];
1041                         $text =& $parts[2];
1042                         
1043                         if (empty($token)) {
1044                                 # Reached end of text span: empty stack without emitting.
1045                                 # any more emphasis.
1046                                 while ($token_stack[0]) {
1047                                         $text_stack[1] .= array_shift($token_stack);
1048                                         $text_stack[0] .= array_shift($text_stack);
1049                                 }
1050                                 break;
1051                         }
1052                         
1053                         $token_len = strlen($token);
1054                         if ($tree_char_em) {
1055                                 # Reached closing marker while inside a three-char emphasis.
1056                                 if ($token_len == 3) {
1057                                         # Three-char closing marker, close em and strong.
1058                                         array_shift($token_stack);
1059                                         $span = array_shift($text_stack);
1060                                         $span = $this->runSpanGamut($span);
1061                                         $span = "<strong><em>$span</em></strong>";
1062                                         $text_stack[0] .= $this->hashPart($span);
1063                                         $em = '';
1064                                         $strong = '';
1065                                 } else {
1066                                         # Other closing marker: close one em or strong and
1067                                         # change current token state to match the other
1068                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1069                                         $tag = $token_len == 2 ? "strong" : "em";
1070                                         $span = $text_stack[0];
1071                                         $span = $this->runSpanGamut($span);
1072                                         $span = "<$tag>$span</$tag>";
1073                                         $text_stack[0] = $this->hashPart($span);
1074                                         $$tag = ''; # $$tag stands for $em or $strong
1075                                 }
1076                                 $tree_char_em = false;
1077                         } else if ($token_len == 3) {
1078                                 if ($em) {
1079                                         # Reached closing marker for both em and strong.
1080                                         # Closing strong marker:
1081                                         for ($i = 0; $i < 2; ++$i) {
1082                                                 $shifted_token = array_shift($token_stack);
1083                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1084                                                 $span = array_shift($text_stack);
1085                                                 $span = $this->runSpanGamut($span);
1086                                                 $span = "<$tag>$span</$tag>";
1087                                                 $text_stack[0] .= $this->hashPart($span);
1088                                                 $$tag = ''; # $$tag stands for $em or $strong
1089                                         }
1090                                 } else {
1091                                         # Reached opening three-char emphasis marker. Push on token 
1092                                         # stack; will be handled by the special condition above.
1093                                         $em = $token{0};
1094                                         $strong = "$em$em";
1095                                         array_unshift($token_stack, $token);
1096                                         array_unshift($text_stack, '');
1097                                         $tree_char_em = true;
1098                                 }
1099                         } else if ($token_len == 2) {
1100                                 if ($strong) {
1101                                         # Unwind any dangling emphasis marker:
1102                                         if (strlen($token_stack[0]) == 1) {
1103                                                 $text_stack[1] .= array_shift($token_stack);
1104                                                 $text_stack[0] .= array_shift($text_stack);
1105                                         }
1106                                         # Closing strong marker:
1107                                         array_shift($token_stack);
1108                                         $span = array_shift($text_stack);
1109                                         $span = $this->runSpanGamut($span);
1110                                         $span = "<strong>$span</strong>";
1111                                         $text_stack[0] .= $this->hashPart($span);
1112                                         $strong = '';
1113                                 } else {
1114                                         array_unshift($token_stack, $token);
1115                                         array_unshift($text_stack, '');
1116                                         $strong = $token;
1117                                 }
1118                         } else {
1119                                 # Here $token_len == 1
1120                                 if ($em) {
1121                                         if (strlen($token_stack[0]) == 1) {
1122                                                 # Closing emphasis marker:
1123                                                 array_shift($token_stack);
1124                                                 $span = array_shift($text_stack);
1125                                                 $span = $this->runSpanGamut($span);
1126                                                 $span = "<em>$span</em>";
1127                                                 $text_stack[0] .= $this->hashPart($span);
1128                                                 $em = '';
1129                                         } else {
1130                                                 $text_stack[0] .= $token;
1131                                         }
1132                                 } else {
1133                                         array_unshift($token_stack, $token);
1134                                         array_unshift($text_stack, '');
1135                                         $em = $token;
1136                                 }
1137                         }
1138                 }
1139                 return $text_stack[0];
1140         }
1141
1142
1143         protected function doBlockQuotes($text) {
1144                 $text = preg_replace_callback('/
1145                           (                                                             # Wrap whole match in $1
1146                                 (?>
1147                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1148                                         .+\n                                    # rest of the first line
1149                                   (.+\n)*                                       # subsequent consecutive lines
1150                                   \n*                                           # blanks
1151                                 )+
1152                           )
1153                         /xm',
1154                         array(&$this, '_doBlockQuotes_callback'), $text);
1155
1156                 return $text;
1157         }
1158         protected function _doBlockQuotes_callback($matches) {
1159                 $bq = $matches[1];
1160                 # trim one level of quoting - trim whitespace-only lines
1161                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1162                 $bq = $this->runBlockGamut($bq);                # recurse
1163
1164                 $bq = preg_replace('/^/m', "  ", $bq);
1165                 # These leading spaces cause problem with <pre> content, 
1166                 # so we need to fix that:
1167                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1168                         array(&$this, '_doBlockQuotes_callback2'), $bq);
1169
1170                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1171         }
1172         protected function _doBlockQuotes_callback2($matches) {
1173                 $pre = $matches[1];
1174                 $pre = preg_replace('/^  /m', '', $pre);
1175                 return $pre;
1176         }
1177
1178
1179         protected function formParagraphs($text) {
1180         #
1181         #       Params:
1182         #               $text - string to process with html <p> tags
1183         #
1184                 # Strip leading and trailing lines:
1185                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1186
1187                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1188
1189                 #
1190                 # Wrap <p> tags and unhashify HTML blocks
1191                 #
1192                 foreach ($grafs as $key => $value) {
1193                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1194                                 # Is a paragraph.
1195                                 $value = $this->runSpanGamut($value);
1196                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1197                                 $value .= "</p>";
1198                                 $grafs[$key] = $this->unhash($value);
1199                         }
1200                         else {
1201                                 # Is a block.
1202                                 # Modify elements of @grafs in-place...
1203                                 $graf = $value;
1204                                 $block = $this->html_hashes[$graf];
1205                                 $graf = $block;
1206 //                              if (preg_match('{
1207 //                                      \A
1208 //                                      (                                                       # $1 = <div> tag
1209 //                                        <div  \s+
1210 //                                        [^>]*
1211 //                                        \b
1212 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1213 //                                        1
1214 //                                        \2
1215 //                                        [^>]*
1216 //                                        >
1217 //                                      )
1218 //                                      (                                                       # $3 = contents
1219 //                                      .*
1220 //                                      )
1221 //                                      (</div>)                                        # $4 = closing tag
1222 //                                      \z
1223 //                                      }xs', $block, $matches))
1224 //                              {
1225 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1226 //
1227 //                                      # We can't call Markdown(), because that resets the hash;
1228 //                                      # that initialization code should be pulled into its own sub, though.
1229 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1230 //                                      
1231 //                                      # Run document gamut methods on the content.
1232 //                                      foreach ($this->document_gamut as $method => $priority) {
1233 //                                              $div_content = $this->$method($div_content);
1234 //                                      }
1235 //
1236 //                                      $div_open = preg_replace(
1237 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1238 //
1239 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1240 //                              }
1241                                 $grafs[$key] = $graf;
1242                         }
1243                 }
1244
1245                 return implode("\n\n", $grafs);
1246         }
1247
1248
1249         protected function encodeAttribute($text) {
1250         #
1251         # Encode text for a double-quoted HTML attribute. This function
1252         # is *not* suitable for attributes enclosed in single quotes.
1253         #
1254                 $text = $this->encodeAmpsAndAngles($text);
1255                 $text = str_replace('"', '&quot;', $text);
1256                 return $text;
1257         }
1258         
1259         
1260         protected function encodeAmpsAndAngles($text) {
1261         #
1262         # Smart processing for ampersands and angle brackets that need to 
1263         # be encoded. Valid character entities are left alone unless the
1264         # no-entities mode is set.
1265         #
1266                 if ($this->no_entities) {
1267                         $text = str_replace('&', '&amp;', $text);
1268                 } else {
1269                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1270                         # MT plugin: <http://bumppo.net/projects/amputator/>
1271                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1272                                                                 '&amp;', $text);
1273                 }
1274                 # Encode remaining <'s
1275                 $text = str_replace('<', '&lt;', $text);
1276
1277                 return $text;
1278         }
1279
1280
1281         protected function doAutoLinks($text) {
1282                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1283                         array(&$this, '_doAutoLinks_url_callback'), $text);
1284
1285                 # Email addresses: <address@domain.foo>
1286                 $text = preg_replace_callback('{
1287                         <
1288                         (?:mailto:)?
1289                         (
1290                                 (?:
1291                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1292                                 |
1293                                         ".*?"
1294                                 )
1295                                 \@
1296                                 (?:
1297                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1298                                 |
1299                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1300                                 )
1301                         )
1302                         >
1303                         }xi',
1304                         array(&$this, '_doAutoLinks_email_callback'), $text);
1305                 $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array(&$this, '_doAutoLinks_tel_callback'), $text);
1306
1307                 return $text;
1308         }
1309         protected function _doAutoLinks_tel_callback($matches) {
1310                 $url = $this->encodeAttribute($matches[1]);
1311                 $tel = $this->encodeAttribute($matches[2]);
1312                 $link = "<a href=\"$url\">$tel</a>";
1313                 return $this->hashPart($link);
1314         }
1315         protected function _doAutoLinks_url_callback($matches) {
1316                 $url = $this->encodeAttribute($matches[1]);
1317                 $link = "<a href=\"$url\">$url</a>";
1318                 return $this->hashPart($link);
1319         }
1320         protected function _doAutoLinks_email_callback($matches) {
1321                 $address = $matches[1];
1322                 $link = $this->encodeEmailAddress($address);
1323                 return $this->hashPart($link);
1324         }
1325
1326
1327         protected function encodeEmailAddress($addr) {
1328         #
1329         #       Input: an email address, e.g. "foo@example.com"
1330         #
1331         #       Output: the email address as a mailto link, with each character
1332         #               of the address encoded as either a decimal or hex entity, in
1333         #               the hopes of foiling most address harvesting spam bots. E.g.:
1334         #
1335         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1336         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1337         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1338         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1339         #
1340         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1341         #   With some optimizations by Milian Wolff.
1342         #
1343                 $addr = "mailto:" . $addr;
1344                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1345                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1346                 
1347                 foreach ($chars as $key => $char) {
1348                         $ord = ord($char);
1349                         # Ignore non-ascii chars.
1350                         if ($ord < 128) {
1351                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1352                                 # roughly 10% raw, 45% hex, 45% dec
1353                                 # '@' *must* be encoded. I insist.
1354                                 if ($r > 90 && $char != '@') /* do nothing */;
1355                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1356                                 else              $chars[$key] = '&#'.$ord.';';
1357                         }
1358                 }
1359                 
1360                 $addr = implode('', $chars);
1361                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1362                 $addr = "<a href=\"$addr\">$text</a>";
1363
1364                 return $addr;
1365         }
1366
1367
1368         protected function parseSpan($str) {
1369         #
1370         # Take the string $str and parse it into tokens, hashing embeded HTML,
1371         # escaped characters and handling code spans.
1372         #
1373                 $output = '';
1374                 
1375                 $span_re = '{
1376                                 (
1377                                         \\\\'.$this->escape_chars_re.'
1378                                 |
1379                                         (?<![`\\\\])
1380                                         `+                                              # code span marker
1381                         '.( $this->no_markup ? '' : '
1382                                 |
1383                                         <!--    .*?     -->             # comment
1384                                 |
1385                                         <\?.*?\?> | <%.*?%>             # processing instruction
1386                                 |
1387                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1388                                         (?>
1389                                                 \s
1390                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1391                                         )?
1392                                         >
1393                                 |
1394                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1395                                 |
1396                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1397                         ').'
1398                                 )
1399                                 }xs';
1400
1401                 while (1) {
1402                         #
1403                         # Each loop iteration seach for either the next tag, the next 
1404                         # openning code span marker, or the next escaped character. 
1405                         # Each token is then passed to handleSpanToken.
1406                         #
1407                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1408                         
1409                         # Create token from text preceding tag.
1410                         if ($parts[0] != "") {
1411                                 $output .= $parts[0];
1412                         }
1413                         
1414                         # Check if we reach the end.
1415                         if (isset($parts[1])) {
1416                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1417                                 $str = $parts[2];
1418                         }
1419                         else {
1420                                 break;
1421                         }
1422                 }
1423                 
1424                 return $output;
1425         }
1426         
1427         
1428         protected function handleSpanToken($token, &$str) {
1429         #
1430         # Handle $token provided by parseSpan by determining its nature and 
1431         # returning the corresponding value that should replace it.
1432         #
1433                 switch ($token{0}) {
1434                         case "\\":
1435                                 return $this->hashPart("&#". ord($token{1}). ";");
1436                         case "`":
1437                                 # Search for end marker in remaining text.
1438                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1439                                         $str, $matches))
1440                                 {
1441                                         $str = $matches[2];
1442                                         $codespan = $this->makeCodeSpan($matches[1]);
1443                                         return $this->hashPart($codespan);
1444                                 }
1445                                 return $token; // return as text since no ending marker found.
1446                         default:
1447                                 return $this->hashPart($token);
1448                 }
1449         }
1450
1451
1452         protected function outdent($text) {
1453         #
1454         # Remove one level of line-leading tabs or spaces
1455         #
1456                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1457         }
1458
1459
1460         # String length function for detab. `_initDetab` will create a function to 
1461         # hanlde UTF-8 if the default function does not exist.
1462         protected $utf8_strlen = 'mb_strlen';
1463         
1464         protected function detab($text) {
1465         #
1466         # Replace tabs with the appropriate amount of space.
1467         #
1468                 # For each line we separate the line in blocks delemited by
1469                 # tab characters. Then we reconstruct every line by adding the 
1470                 # appropriate number of space between each blocks.
1471                 
1472                 $text = preg_replace_callback('/^.*\t.*$/m',
1473                         array(&$this, '_detab_callback'), $text);
1474
1475                 return $text;
1476         }
1477         protected function _detab_callback($matches) {
1478                 $line = $matches[0];
1479                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1480                 
1481                 # Split in blocks.
1482                 $blocks = explode("\t", $line);
1483                 # Add each blocks to the line.
1484                 $line = $blocks[0];
1485                 unset($blocks[0]); # Do not add first block twice.
1486                 foreach ($blocks as $block) {
1487                         # Calculate amount of space, insert spaces, insert block.
1488                         $amount = $this->tab_width - 
1489                                 $strlen($line, 'UTF-8') % $this->tab_width;
1490                         $line .= str_repeat(" ", $amount) . $block;
1491                 }
1492                 return $line;
1493         }
1494         protected function _initDetab() {
1495         #
1496         # Check for the availability of the function in the `utf8_strlen` property
1497         # (initially `mb_strlen`). If the function is not available, create a 
1498         # function that will loosely count the number of UTF-8 characters with a
1499         # regular expression.
1500         #
1501                 if (function_exists($this->utf8_strlen)) return;
1502                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1503                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1504                         $text, $m);');
1505         }
1506
1507
1508         protected function unhash($text) {
1509         #
1510         # Swap back in all the tags hashed by _HashHTMLBlocks.
1511         #
1512                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1513                         array(&$this, '_unhash_callback'), $text);
1514         }
1515         protected function _unhash_callback($matches) {
1516                 return $this->html_hashes[$matches[0]];
1517         }
1518
1519 }
1520
1521
1522 #
1523 # Temporary Markdown Extra Parser Implementation Class
1524 #
1525 # NOTE: DON'T USE THIS CLASS
1526 # Currently the implementation of of Extra resides here in this temporary class.
1527 # This makes it easier to propagate the changes between the three different
1528 # packaging styles of PHP Markdown. When this issue is resolved, this
1529 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1530 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1531 # one.
1532 #
1533
1534 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1535
1536         ### Configuration Variables ###
1537
1538         # Prefix for footnote ids.
1539         public $fn_id_prefix = "";
1540         
1541         # Optional title attribute for footnote links and backlinks.
1542         public $fn_link_title = "";
1543         public $fn_backlink_title = "";
1544         
1545         # Optional class attribute for footnote links and backlinks.
1546         public $fn_link_class = "footnote-ref";
1547         public $fn_backlink_class = "footnote-backref";
1548
1549         # Class name for table cell alignment (%% replaced left/center/right)
1550         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1551         # If empty, the align attribute is used instead of a class name.
1552         public $table_align_class_tmpl = '';
1553
1554         # Optional class prefix for fenced code block.
1555         public $code_class_prefix = "";
1556         # Class attribute for code blocks goes on the `code` tag;
1557         # setting this to true will put attributes on the `pre` tag instead.
1558         public $code_attr_on_pre = false;
1559         
1560         # Predefined abbreviations.
1561         public $predef_abbr = array();
1562
1563
1564         ### Parser Implementation ###
1565
1566         public function __construct() {
1567         #
1568         # Constructor function. Initialize the parser object.
1569         #
1570                 # Add extra escapable characters before parent constructor 
1571                 # initialize the table.
1572                 $this->escape_chars .= ':|';
1573                 
1574                 # Insert extra document, block, and span transformations. 
1575                 # Parent constructor will do the sorting.
1576                 $this->document_gamut += array(
1577                         "doFencedCodeBlocks" => 5,
1578                         "stripFootnotes"     => 15,
1579                         "stripAbbreviations" => 25,
1580                         "appendFootnotes"    => 50,
1581                         );
1582                 $this->block_gamut += array(
1583                         "doFencedCodeBlocks" => 5,
1584                         "doTables"           => 15,
1585                         "doDefLists"         => 45,
1586                         );
1587                 $this->span_gamut += array(
1588                         "doFootnotes"        => 5,
1589                         "doAbbreviations"    => 70,
1590                         );
1591                 
1592                 parent::__construct();
1593         }
1594         
1595         
1596         # Extra variables used during extra transformations.
1597         protected $footnotes = array();
1598         protected $footnotes_ordered = array();
1599         protected $footnotes_ref_count = array();
1600         protected $footnotes_numbers = array();
1601         protected $abbr_desciptions = array();
1602         protected $abbr_word_re = '';
1603         
1604         # Give the current footnote number.
1605         protected $footnote_counter = 1;
1606         
1607         
1608         protected function setup() {
1609         #
1610         # Setting up Extra-specific variables.
1611         #
1612                 parent::setup();
1613                 
1614                 $this->footnotes = array();
1615                 $this->footnotes_ordered = array();
1616                 $this->footnotes_ref_count = array();
1617                 $this->footnotes_numbers = array();
1618                 $this->abbr_desciptions = array();
1619                 $this->abbr_word_re = '';
1620                 $this->footnote_counter = 1;
1621                 
1622                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1623                         if ($this->abbr_word_re)
1624                                 $this->abbr_word_re .= '|';
1625                         $this->abbr_word_re .= preg_quote($abbr_word);
1626                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1627                 }
1628         }
1629         
1630         protected function teardown() {
1631         #
1632         # Clearing Extra-specific variables.
1633         #
1634                 $this->footnotes = array();
1635                 $this->footnotes_ordered = array();
1636                 $this->footnotes_ref_count = array();
1637                 $this->footnotes_numbers = array();
1638                 $this->abbr_desciptions = array();
1639                 $this->abbr_word_re = '';
1640                 
1641                 parent::teardown();
1642         }
1643         
1644         
1645         ### Extra Attribute Parser ###
1646
1647         # Expression to use to catch attributes (includes the braces)
1648         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}';
1649         # Expression to use when parsing in a context when no capture is desired
1650         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}';
1651
1652         protected function doExtraAttributes($tag_name, $attr) {
1653         #
1654         # Parse attributes caught by the $this->id_class_attr_catch_re expression
1655         # and return the HTML-formatted list of attributes.
1656         #
1657         # Currently supported attributes are .class and #id.
1658         #
1659                 if (empty($attr)) return "";
1660                 
1661                 # Split on components
1662                 preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches);
1663                 $elements = $matches[0];
1664
1665                 # handle classes and ids (only first id taken into account)
1666                 $classes = array();
1667                 $id = false;
1668                 foreach ($elements as $element) {
1669                         if ($element{0} == '.') {
1670                                 $classes[] = substr($element, 1);
1671                         } else if ($element{0} == '#') {
1672                                 if ($id === false) $id = substr($element, 1);
1673                         }
1674                 }
1675
1676                 # compose attributes as string
1677                 $attr_str = "";
1678                 if (!empty($id)) {
1679                         $attr_str .= ' id="'.$id.'"';
1680                 }
1681                 if (!empty($classes)) {
1682                         $attr_str .= ' class="'.implode(" ", $classes).'"';
1683                 }
1684                 return $attr_str;
1685         }
1686
1687
1688         protected function stripLinkDefinitions($text) {
1689         #
1690         # Strips link definitions from text, stores the URLs and titles in
1691         # hash references.
1692         #
1693                 $less_than_tab = $this->tab_width - 1;
1694
1695                 # Link defs are in the form: ^[id]: url "optional title"
1696                 $text = preg_replace_callback('{
1697                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
1698                                                           [ ]*
1699                                                           \n?                           # maybe *one* newline
1700                                                           [ ]*
1701                                                         (?:
1702                                                           <(.+?)>                       # url = $2
1703                                                         |
1704                                                           (\S+?)                        # url = $3
1705                                                         )
1706                                                           [ ]*
1707                                                           \n?                           # maybe one newline
1708                                                           [ ]*
1709                                                         (?:
1710                                                                 (?<=\s)                 # lookbehind for whitespace
1711                                                                 ["(]
1712                                                                 (.*?)                   # title = $4
1713                                                                 [")]
1714                                                                 [ ]*
1715                                                         )?      # title is optional
1716                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1717                                                         (?:\n+|\Z)
1718                         }xm',
1719                         array(&$this, '_stripLinkDefinitions_callback'),
1720                         $text);
1721                 return $text;
1722         }
1723         protected function _stripLinkDefinitions_callback($matches) {
1724                 $link_id = strtolower($matches[1]);
1725                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
1726                 $this->urls[$link_id] = $url;
1727                 $this->titles[$link_id] =& $matches[4];
1728                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1729                 return ''; # String that will replace the block
1730         }
1731
1732
1733         ### HTML Block Parser ###
1734         
1735         # Tags that are always treated as block tags:
1736         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption';
1737                                                    
1738         # Tags treated as block tags only if the opening tag is alone on its line:
1739         protected $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1740         
1741         # Tags where markdown="1" default to span mode:
1742         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1743         
1744         # Tags which must not have their contents modified, no matter where 
1745         # they appear:
1746         protected $clean_tags_re = 'script|math|svg';
1747         
1748         # Tags that do not need to be closed.
1749         protected $auto_close_tags_re = 'hr|img|param|source|track';
1750         
1751
1752         protected function hashHTMLBlocks($text) {
1753         #
1754         # Hashify HTML Blocks and "clean tags".
1755         #
1756         # We only want to do this for block-level HTML tags, such as headers,
1757         # lists, and tables. That's because we still want to wrap <p>s around
1758         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1759         # phrase emphasis, and spans. The list of tags we're looking for is
1760         # hard-coded.
1761         #
1762         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1763         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1764         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1765         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1766         # These two functions are calling each other. It's recursive!
1767         #
1768                 if ($this->no_markup)  return $text;
1769
1770                 #
1771                 # Call the HTML-in-Markdown hasher.
1772                 #
1773                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1774                 
1775                 return $text;
1776         }
1777         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1778                                                                                 $enclosing_tag_re = '', $span = false)
1779         {
1780         #
1781         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1782         #
1783         # *   $indent is the number of space to be ignored when checking for code 
1784         #     blocks. This is important because if we don't take the indent into 
1785         #     account, something like this (which looks right) won't work as expected:
1786         #
1787         #     <div>
1788         #         <div markdown="1">
1789         #         Hello World.  <-- Is this a Markdown code block or text?
1790         #         </div>  <-- Is this a Markdown code block or a real tag?
1791         #     <div>
1792         #
1793         #     If you don't like this, just don't indent the tag on which
1794         #     you apply the markdown="1" attribute.
1795         #
1796         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
1797         #     tag with that name. Nested tags supported.
1798         #
1799         # *   If $span is true, text inside must treated as span. So any double 
1800         #     newline will be replaced by a single newline so that it does not create 
1801         #     paragraphs.
1802         #
1803         # Returns an array of that form: ( processed text , remaining text )
1804         #
1805                 if ($text === '') return array('', '');
1806
1807                 # Regex to check for the presense of newlines around a block tag.
1808                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1809                 $newline_after_re = 
1810                         '{
1811                                 ^                                               # Start of text following the tag.
1812                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1813                                 [ ]*\n                                  # Must be followed by newline.
1814                         }xs';
1815                 
1816                 # Regex to match any tag.
1817                 $block_tag_re =
1818                         '{
1819                                 (                                       # $2: Capture whole tag.
1820                                         </?                                     # Any opening or closing tag.
1821                                                 (?>                             # Tag name.
1822                                                         '.$this->block_tags_re.'                        |
1823                                                         '.$this->context_block_tags_re.'        |
1824                                                         '.$this->clean_tags_re.'                |
1825                                                         (?!\s)'.$enclosing_tag_re.'
1826                                                 )
1827                                                 (?:
1828                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1829                                                         (?>
1830                                                                 ".*?"           |       # Double quotes (can contain `>`)
1831                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1832                                                                 .+?                             # Anything but quotes and `>`.
1833                                                         )*?
1834                                                 )?
1835                                         >                                       # End of tag.
1836                                 |
1837                                         <!--    .*?     -->     # HTML Comment
1838                                 |
1839                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1840                                 |
1841                                         <!\[CDATA\[.*?\]\]>     # CData Block
1842                                 '. ( !$span ? ' # If not in span.
1843                                 |
1844                                         # Indented code block
1845                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1846                                         [ ]{'.($indent+4).'}[^\n]* \n
1847                                         (?>
1848                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1849                                         )*
1850                                 |
1851                                         # Fenced code block marker
1852                                         (?<= ^ | \n )
1853                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1854                                                                         [ ]*
1855                                         (?:
1856                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
1857                                         |
1858                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
1859                                         )?
1860                                         [ ]*
1861                                         (?= \n )
1862                                 ' : '' ). ' # End (if not is span).
1863                                 |
1864                                         # Code span marker
1865                                         # Note, this regex needs to go after backtick fenced
1866                                         # code blocks but it should also be kept outside of the
1867                                         # "if not in span" condition adding backticks to the parser
1868                                         `+
1869                                 )
1870                         }xs';
1871
1872                 
1873                 $depth = 0;             # Current depth inside the tag tree.
1874                 $parsed = "";   # Parsed text that will be returned.
1875
1876                 #
1877                 # Loop through every tag until we find the closing tag of the parent
1878                 # or loop until reaching the end of text if no parent tag specified.
1879                 #
1880                 do {
1881                         #
1882                         # Split the text using the first $tag_match pattern found.
1883                         # Text before  pattern will be first in the array, text after
1884                         # pattern will be at the end, and between will be any catches made 
1885                         # by the pattern.
1886                         #
1887                         $parts = preg_split($block_tag_re, $text, 2, 
1888                                                                 PREG_SPLIT_DELIM_CAPTURE);
1889                         
1890                         # If in Markdown span mode, add a empty-string span-level hash 
1891                         # after each newline to prevent triggering any block element.
1892                         if ($span) {
1893                                 $void = $this->hashPart("", ':');
1894                                 $newline = "$void\n";
1895                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1896                         }
1897                         
1898                         $parsed .= $parts[0]; # Text before current tag.
1899                         
1900                         # If end of $text has been reached. Stop loop.
1901                         if (count($parts) < 3) {
1902                                 $text = "";
1903                                 break;
1904                         }
1905                         
1906                         $tag  = $parts[1]; # Tag to handle.
1907                         $text = $parts[2]; # Remaining text after current tag.
1908                         $tag_re = preg_quote($tag); # For use in a regular expression.
1909                         
1910                         #
1911                         # Check for: Fenced code block marker.
1912                         # Note: need to recheck the whole tag to disambiguate backtick
1913                         # fences from code spans
1914                         #
1915                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1916                                 # Fenced code block marker: find matching end marker.
1917                                 $fence_indent = strlen($capture[1]); # use captured indent in re
1918                                 $fence_re = $capture[2]; # use captured fence in re
1919                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1920                                         $matches)) 
1921                                 {
1922                                         # End marker found: pass text unchanged until marker.
1923                                         $parsed .= $tag . $matches[0];
1924                                         $text = substr($text, strlen($matches[0]));
1925                                 }
1926                                 else {
1927                                         # No end marker: just skip it.
1928                                         $parsed .= $tag;
1929                                 }
1930                         }
1931                         #
1932                         # Check for: Indented code block.
1933                         #
1934                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1935                                 # Indented code block: pass it unchanged, will be handled 
1936                                 # later.
1937                                 $parsed .= $tag;
1938                         }
1939                         #
1940                         # Check for: Code span marker
1941                         # Note: need to check this after backtick fenced code blocks
1942                         #
1943                         else if ($tag{0} == "`") {
1944                                 # Find corresponding end marker.
1945                                 $tag_re = preg_quote($tag);
1946                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1947                                         $text, $matches))
1948                                 {
1949                                         # End marker found: pass text unchanged until marker.
1950                                         $parsed .= $tag . $matches[0];
1951                                         $text = substr($text, strlen($matches[0]));
1952                                 }
1953                                 else {
1954                                         # Unmatched marker: just skip it.
1955                                         $parsed .= $tag;
1956                                 }
1957                         }
1958                         #
1959                         # Check for: Opening Block level tag or
1960                         #            Opening Context Block tag (like ins and del) 
1961                         #               used as a block tag (tag is alone on it's line).
1962                         #
1963                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1964                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1965                                         preg_match($newline_before_re, $parsed) &&
1966                                         preg_match($newline_after_re, $text)    )
1967                                 )
1968                         {
1969                                 # Need to parse tag and following text using the HTML parser.
1970                                 list($block_text, $text) = 
1971                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1972                                 
1973                                 # Make sure it stays outside of any paragraph by adding newlines.
1974                                 $parsed .= "\n\n$block_text\n\n";
1975                         }
1976                         #
1977                         # Check for: Clean tag (like script, math)
1978                         #            HTML Comments, processing instructions.
1979                         #
1980                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1981                                 $tag{1} == '!' || $tag{1} == '?')
1982                         {
1983                                 # Need to parse tag and following text using the HTML parser.
1984                                 # (don't check for markdown attribute)
1985                                 list($block_text, $text) = 
1986                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1987                                 
1988                                 $parsed .= $block_text;
1989                         }
1990                         #
1991                         # Check for: Tag with same name as enclosing tag.
1992                         #
1993                         else if ($enclosing_tag_re !== '' &&
1994                                 # Same name as enclosing tag.
1995                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1996                         {
1997                                 #
1998                                 # Increase/decrease nested tag count.
1999                                 #
2000                                 if ($tag{1} == '/')                                             $depth--;
2001                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2002
2003                                 if ($depth < 0) {
2004                                         #
2005                                         # Going out of parent element. Clean up and break so we
2006                                         # return to the calling function.
2007                                         #
2008                                         $text = $tag . $text;
2009                                         break;
2010                                 }
2011                                 
2012                                 $parsed .= $tag;
2013                         }
2014                         else {
2015                                 $parsed .= $tag;
2016                         }
2017                 } while ($depth >= 0);
2018                 
2019                 return array($parsed, $text);
2020         }
2021         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2022         #
2023         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2024         #
2025         # *   Calls $hash_method to convert any blocks.
2026         # *   Stops when the first opening tag closes.
2027         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2028         #     (it is not inside clean tags)
2029         #
2030         # Returns an array of that form: ( processed text , remaining text )
2031         #
2032                 if ($text === '') return array('', '');
2033                 
2034                 # Regex to match `markdown` attribute inside of a tag.
2035                 $markdown_attr_re = '
2036                         {
2037                                 \s*                     # Eat whitespace before the `markdown` attribute
2038                                 markdown
2039                                 \s*=\s*
2040                                 (?>
2041                                         (["\'])         # $1: quote delimiter           
2042                                         (.*?)           # $2: attribute value
2043                                         \1                      # matching delimiter    
2044                                 |
2045                                         ([^\s>]*)       # $3: unquoted attribute value
2046                                 )
2047                                 ()                              # $4: make $3 always defined (avoid warnings)
2048                         }xs';
2049                 
2050                 # Regex to match any tag.
2051                 $tag_re = '{
2052                                 (                                       # $2: Capture whole tag.
2053                                         </?                                     # Any opening or closing tag.
2054                                                 [\w:$]+                 # Tag name.
2055                                                 (?:
2056                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2057                                                         (?>
2058                                                                 ".*?"           |       # Double quotes (can contain `>`)
2059                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2060                                                                 .+?                             # Anything but quotes and `>`.
2061                                                         )*?
2062                                                 )?
2063                                         >                                       # End of tag.
2064                                 |
2065                                         <!--    .*?     -->     # HTML Comment
2066                                 |
2067                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2068                                 |
2069                                         <!\[CDATA\[.*?\]\]>     # CData Block
2070                                 )
2071                         }xs';
2072                 
2073                 $original_text = $text;         # Save original text in case of faliure.
2074                 
2075                 $depth          = 0;    # Current depth inside the tag tree.
2076                 $block_text     = "";   # Temporary text holder for current text.
2077                 $parsed         = "";   # Parsed text that will be returned.
2078
2079                 #
2080                 # Get the name of the starting tag.
2081                 # (This pattern makes $base_tag_name_re safe without quoting.)
2082                 #
2083                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2084                         $base_tag_name_re = $matches[1];
2085
2086                 #
2087                 # Loop through every tag until we find the corresponding closing tag.
2088                 #
2089                 do {
2090                         #
2091                         # Split the text using the first $tag_match pattern found.
2092                         # Text before  pattern will be first in the array, text after
2093                         # pattern will be at the end, and between will be any catches made 
2094                         # by the pattern.
2095                         #
2096                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2097                         
2098                         if (count($parts) < 3) {
2099                                 #
2100                                 # End of $text reached with unbalenced tag(s).
2101                                 # In that case, we return original text unchanged and pass the
2102                                 # first character as filtered to prevent an infinite loop in the 
2103                                 # parent function.
2104                                 #
2105                                 return array($original_text{0}, substr($original_text, 1));
2106                         }
2107                         
2108                         $block_text .= $parts[0]; # Text before current tag.
2109                         $tag         = $parts[1]; # Tag to handle.
2110                         $text        = $parts[2]; # Remaining text after current tag.
2111                         
2112                         #
2113                         # Check for: Auto-close tag (like <hr/>)
2114                         #                        Comments and Processing Instructions.
2115                         #
2116                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2117                                 $tag{1} == '!' || $tag{1} == '?')
2118                         {
2119                                 # Just add the tag to the block as if it was text.
2120                                 $block_text .= $tag;
2121                         }
2122                         else {
2123                                 #
2124                                 # Increase/decrease nested tag count. Only do so if
2125                                 # the tag's name match base tag's.
2126                                 #
2127                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2128                                         if ($tag{1} == '/')                                             $depth--;
2129                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2130                                 }
2131                                 
2132                                 #
2133                                 # Check for `markdown="1"` attribute and handle it.
2134                                 #
2135                                 if ($md_attr && 
2136                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2137                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2138                                 {
2139                                         # Remove `markdown` attribute from opening tag.
2140                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2141                                         
2142                                         # Check if text inside this tag must be parsed in span mode.
2143                                         $this->mode = $attr_m[2] . $attr_m[3];
2144                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2145                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2146                                         
2147                                         # Calculate indent before tag.
2148                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2149                                                 $strlen = $this->utf8_strlen;
2150                                                 $indent = $strlen($matches[1], 'UTF-8');
2151                                         } else {
2152                                                 $indent = 0;
2153                                         }
2154                                         
2155                                         # End preceding block with this tag.
2156                                         $block_text .= $tag;
2157                                         $parsed .= $this->$hash_method($block_text);
2158                                         
2159                                         # Get enclosing tag name for the ParseMarkdown function.
2160                                         # (This pattern makes $tag_name_re safe without quoting.)
2161                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2162                                         $tag_name_re = $matches[1];
2163                                         
2164                                         # Parse the content using the HTML-in-Markdown parser.
2165                                         list ($block_text, $text)
2166                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
2167                                                         $tag_name_re, $span_mode);
2168                                         
2169                                         # Outdent markdown text.
2170                                         if ($indent > 0) {
2171                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
2172                                                                                                         $block_text);
2173                                         }
2174                                         
2175                                         # Append tag content to parsed text.
2176                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2177                                         else                            $parsed .= "$block_text";
2178                                         
2179                                         # Start over with a new block.
2180                                         $block_text = "";
2181                                 }
2182                                 else $block_text .= $tag;
2183                         }
2184                         
2185                 } while ($depth > 0);
2186                 
2187                 #
2188                 # Hash last block text that wasn't processed inside the loop.
2189                 #
2190                 $parsed .= $this->$hash_method($block_text);
2191                 
2192                 return array($parsed, $text);
2193         }
2194
2195
2196         protected function hashClean($text) {
2197         #
2198         # Called whenever a tag must be hashed when a function inserts a "clean" tag
2199         # in $text, it passes through this function and is automaticaly escaped, 
2200         # blocking invalid nested overlap.
2201         #
2202                 return $this->hashPart($text, 'C');
2203         }
2204
2205
2206         protected function doAnchors($text) {
2207         #
2208         # Turn Markdown link shortcuts into XHTML <a> tags.
2209         #
2210                 if ($this->in_anchor) return $text;
2211                 $this->in_anchor = true;
2212                 
2213                 #
2214                 # First, handle reference-style links: [link text] [id]
2215                 #
2216                 $text = preg_replace_callback('{
2217                         (                                       # wrap whole match in $1
2218                           \[
2219                                 ('.$this->nested_brackets_re.') # link text = $2
2220                           \]
2221
2222                           [ ]?                          # one optional space
2223                           (?:\n[ ]*)?           # one optional newline followed by spaces
2224
2225                           \[
2226                                 (.*?)           # id = $3
2227                           \]
2228                         )
2229                         }xs',
2230                         array(&$this, '_doAnchors_reference_callback'), $text);
2231
2232                 #
2233                 # Next, inline-style links: [link text](url "optional title")
2234                 #
2235                 $text = preg_replace_callback('{
2236                         (                               # wrap whole match in $1
2237                           \[
2238                                 ('.$this->nested_brackets_re.') # link text = $2
2239                           \]
2240                           \(                    # literal paren
2241                                 [ \n]*
2242                                 (?:
2243                                         <(.+?)> # href = $3
2244                                 |
2245                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
2246                                 )
2247                                 [ \n]*
2248                                 (                       # $5
2249                                   ([\'"])       # quote char = $6
2250                                   (.*?)         # Title = $7
2251                                   \6            # matching quote
2252                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
2253                                 )?                      # title is optional
2254                           \)
2255                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2256                         )
2257                         }xs',
2258                         array(&$this, '_doAnchors_inline_callback'), $text);
2259
2260                 #
2261                 # Last, handle reference-style shortcuts: [link text]
2262                 # These must come last in case you've also got [link text][1]
2263                 # or [link text](/foo)
2264                 #
2265                 $text = preg_replace_callback('{
2266                         (                                       # wrap whole match in $1
2267                           \[
2268                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
2269                           \]
2270                         )
2271                         }xs',
2272                         array(&$this, '_doAnchors_reference_callback'), $text);
2273
2274                 $this->in_anchor = false;
2275                 return $text;
2276         }
2277         protected function _doAnchors_reference_callback($matches) {
2278                 $whole_match =  $matches[1];
2279                 $link_text   =  $matches[2];
2280                 $link_id     =& $matches[3];
2281
2282                 if ($link_id == "") {
2283                         # for shortcut links like [this][] or [this].
2284                         $link_id = $link_text;
2285                 }
2286                 
2287                 # lower-case and turn embedded newlines into spaces
2288                 $link_id = strtolower($link_id);
2289                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2290
2291                 if (isset($this->urls[$link_id])) {
2292                         $url = $this->urls[$link_id];
2293                         $url = $this->encodeAttribute($url);
2294                         
2295                         $result = "<a href=\"$url\"";
2296                         if ( isset( $this->titles[$link_id] ) ) {
2297                                 $title = $this->titles[$link_id];
2298                                 $title = $this->encodeAttribute($title);
2299                                 $result .=  " title=\"$title\"";
2300                         }
2301                         if (isset($this->ref_attr[$link_id]))
2302                                 $result .= $this->ref_attr[$link_id];
2303                 
2304                         $link_text = $this->runSpanGamut($link_text);
2305                         $result .= ">$link_text</a>";
2306                         $result = $this->hashPart($result);
2307                 }
2308                 else {
2309                         $result = $whole_match;
2310                 }
2311                 return $result;
2312         }
2313         protected function _doAnchors_inline_callback($matches) {
2314                 $whole_match    =  $matches[1];
2315                 $link_text              =  $this->runSpanGamut($matches[2]);
2316                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
2317                 $title                  =& $matches[7];
2318                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2319
2320
2321                 $url = $this->encodeAttribute($url);
2322
2323                 $result = "<a href=\"$url\"";
2324                 if (isset($title)) {
2325                         $title = $this->encodeAttribute($title);
2326                         $result .=  " title=\"$title\"";
2327                 }
2328                 $result .= $attr;
2329                 
2330                 $link_text = $this->runSpanGamut($link_text);
2331                 $result .= ">$link_text</a>";
2332
2333                 return $this->hashPart($result);
2334         }
2335
2336
2337         protected function doImages($text) {
2338         #
2339         # Turn Markdown image shortcuts into <img> tags.
2340         #
2341                 #
2342                 # First, handle reference-style labeled images: ![alt text][id]
2343                 #
2344                 $text = preg_replace_callback('{
2345                         (                               # wrap whole match in $1
2346                           !\[
2347                                 ('.$this->nested_brackets_re.')         # alt text = $2
2348                           \]
2349
2350                           [ ]?                          # one optional space
2351                           (?:\n[ ]*)?           # one optional newline followed by spaces
2352
2353                           \[
2354                                 (.*?)           # id = $3
2355                           \]
2356
2357                         )
2358                         }xs', 
2359                         array(&$this, '_doImages_reference_callback'), $text);
2360
2361                 #
2362                 # Next, handle inline images:  ![alt text](url "optional title")
2363                 # Don't forget: encode * and _
2364                 #
2365                 $text = preg_replace_callback('{
2366                         (                               # wrap whole match in $1
2367                           !\[
2368                                 ('.$this->nested_brackets_re.')         # alt text = $2
2369                           \]
2370                           \s?                   # One optional whitespace character
2371                           \(                    # literal paren
2372                                 [ \n]*
2373                                 (?:
2374                                         <(\S*)> # src url = $3
2375                                 |
2376                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
2377                                 )
2378                                 [ \n]*
2379                                 (                       # $5
2380                                   ([\'"])       # quote char = $6
2381                                   (.*?)         # title = $7
2382                                   \6            # matching quote
2383                                   [ \n]*
2384                                 )?                      # title is optional
2385                           \)
2386                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2387                         )
2388                         }xs',
2389                         array(&$this, '_doImages_inline_callback'), $text);
2390
2391                 return $text;
2392         }
2393         protected function _doImages_reference_callback($matches) {
2394                 $whole_match = $matches[1];
2395                 $alt_text    = $matches[2];
2396                 $link_id     = strtolower($matches[3]);
2397
2398                 if ($link_id == "") {
2399                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2400                 }
2401
2402                 $alt_text = $this->encodeAttribute($alt_text);
2403                 if (isset($this->urls[$link_id])) {
2404                         $url = $this->encodeAttribute($this->urls[$link_id]);
2405                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
2406                         if (isset($this->titles[$link_id])) {
2407                                 $title = $this->titles[$link_id];
2408                                 $title = $this->encodeAttribute($title);
2409                                 $result .=  " title=\"$title\"";
2410                         }
2411                         if (isset($this->ref_attr[$link_id]))
2412                                 $result .= $this->ref_attr[$link_id];
2413                         $result .= $this->empty_element_suffix;
2414                         $result = $this->hashPart($result);
2415                 }
2416                 else {
2417                         # If there's no such link ID, leave intact:
2418                         $result = $whole_match;
2419                 }
2420
2421                 return $result;
2422         }
2423         protected function _doImages_inline_callback($matches) {
2424                 $whole_match    = $matches[1];
2425                 $alt_text               = $matches[2];
2426                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
2427                 $title                  =& $matches[7];
2428                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2429
2430                 $alt_text = $this->encodeAttribute($alt_text);
2431                 $url = $this->encodeAttribute($url);
2432                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2433                 if (isset($title)) {
2434                         $title = $this->encodeAttribute($title);
2435                         $result .=  " title=\"$title\""; # $title already quoted
2436                 }
2437                 $result .= $attr;
2438                 $result .= $this->empty_element_suffix;
2439
2440                 return $this->hashPart($result);
2441         }
2442
2443
2444         protected function doHeaders($text) {
2445         #
2446         # Redefined to add id and class attribute support.
2447         #
2448                 # Setext-style headers:
2449                 #         Header 1  {#header1}
2450                 #         ========
2451                 #  
2452                 #         Header 2  {#header2 .class1 .class2}
2453                 #         --------
2454                 #
2455                 $text = preg_replace_callback(
2456                         '{
2457                                 (^.+?)                                                          # $1: Header text
2458                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2459                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2460                         }mx',
2461                         array(&$this, '_doHeaders_callback_setext'), $text);
2462
2463                 # atx-style headers:
2464                 #       # Header 1        {#header1}
2465                 #       ## Header 2       {#header2}
2466                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
2467                 #       ...
2468                 #       ###### Header 6   {.class2}
2469                 #
2470                 $text = preg_replace_callback('{
2471                                 ^(\#{1,6})      # $1 = string of #\'s
2472                                 [ ]*
2473                                 (.+?)           # $2 = Header text
2474                                 [ ]*
2475                                 \#*                     # optional closing #\'s (not counted)
2476                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2477                                 [ ]*
2478                                 \n+
2479                         }xm',
2480                         array(&$this, '_doHeaders_callback_atx'), $text);
2481
2482                 return $text;
2483         }
2484         protected function _doHeaders_callback_setext($matches) {
2485                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2486                         return $matches[0];
2487                 $level = $matches[3]{0} == '=' ? 1 : 2;
2488                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2489                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2490                 return "\n" . $this->hashBlock($block) . "\n\n";
2491         }
2492         protected function _doHeaders_callback_atx($matches) {
2493                 $level = strlen($matches[1]);
2494                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2495                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2496                 return "\n" . $this->hashBlock($block) . "\n\n";
2497         }
2498
2499
2500         protected function doTables($text) {
2501         #
2502         # Form HTML tables.
2503         #
2504                 $less_than_tab = $this->tab_width - 1;
2505                 #
2506                 # Find tables with leading pipe.
2507                 #
2508                 #       | Header 1 | Header 2
2509                 #       | -------- | --------
2510                 #       | Cell 1   | Cell 2
2511                 #       | Cell 3   | Cell 4
2512                 #
2513                 $text = preg_replace_callback('
2514                         {
2515                                 ^                                                       # Start of a line
2516                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2517                                 [|]                                                     # Optional leading pipe (present)
2518                                 (.+) \n                                         # $1: Header row (at least one pipe)
2519                                 
2520                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2521                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2522                                 
2523                                 (                                                       # $3: Cells
2524                                         (?>
2525                                                 [ ]*                            # Allowed whitespace.
2526                                                 [|] .* \n                       # Row content.
2527                                         )*
2528                                 )
2529                                 (?=\n|\Z)                                       # Stop at final double newline.
2530                         }xm',
2531                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2532                 
2533                 #
2534                 # Find tables without leading pipe.
2535                 #
2536                 #       Header 1 | Header 2
2537                 #       -------- | --------
2538                 #       Cell 1   | Cell 2
2539                 #       Cell 3   | Cell 4
2540                 #
2541                 $text = preg_replace_callback('
2542                         {
2543                                 ^                                                       # Start of a line
2544                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2545                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2546                                 
2547                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2548                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2549                                 
2550                                 (                                                       # $3: Cells
2551                                         (?>
2552                                                 .* [|] .* \n            # Row content
2553                                         )*
2554                                 )
2555                                 (?=\n|\Z)                                       # Stop at final double newline.
2556                         }xm',
2557                         array(&$this, '_DoTable_callback'), $text);
2558
2559                 return $text;
2560         }
2561         protected function _doTable_leadingPipe_callback($matches) {
2562                 $head           = $matches[1];
2563                 $underline      = $matches[2];
2564                 $content        = $matches[3];
2565                 
2566                 # Remove leading pipe for each row.
2567                 $content        = preg_replace('/^ *[|]/m', '', $content);
2568                 
2569                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2570         }
2571         protected function _doTable_makeAlignAttr($alignname)
2572         {
2573                 if (empty($this->table_align_class_tmpl))
2574                         return " align=\"$alignname\"";
2575
2576                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2577                 return " class=\"$classname\"";
2578         }
2579         protected function _doTable_callback($matches) {
2580                 $head           = $matches[1];
2581                 $underline      = $matches[2];
2582                 $content        = $matches[3];
2583
2584                 # Remove any tailing pipes for each line.
2585                 $head           = preg_replace('/[|] *$/m', '', $head);
2586                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2587                 $content        = preg_replace('/[|] *$/m', '', $content);
2588                 
2589                 # Reading alignement from header underline.
2590                 $separators     = preg_split('/ *[|] */', $underline);
2591                 foreach ($separators as $n => $s) {
2592                         if (preg_match('/^ *-+: *$/', $s))
2593                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
2594                         else if (preg_match('/^ *:-+: *$/', $s))
2595                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
2596                         else if (preg_match('/^ *:-+ *$/', $s))
2597                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
2598                         else
2599                                 $attr[$n] = '';
2600                 }
2601                 
2602                 # Parsing span elements, including code spans, character escapes, 
2603                 # and inline HTML tags, so that pipes inside those gets ignored.
2604                 $head           = $this->parseSpan($head);
2605                 $headers        = preg_split('/ *[|] */', $head);
2606                 $col_count      = count($headers);
2607                 $attr       = array_pad($attr, $col_count, '');
2608                 
2609                 # Write column headers.
2610                 $text = "<table>\n";
2611                 $text .= "<thead>\n";
2612                 $text .= "<tr>\n";
2613                 foreach ($headers as $n => $header)
2614                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2615                 $text .= "</tr>\n";
2616                 $text .= "</thead>\n";
2617                 
2618                 # Split content by row.
2619                 $rows = explode("\n", trim($content, "\n"));
2620                 
2621                 $text .= "<tbody>\n";
2622                 foreach ($rows as $row) {
2623                         # Parsing span elements, including code spans, character escapes, 
2624                         # and inline HTML tags, so that pipes inside those gets ignored.
2625                         $row = $this->parseSpan($row);
2626                         
2627                         # Split row by cell.
2628                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2629                         $row_cells = array_pad($row_cells, $col_count, '');
2630                         
2631                         $text .= "<tr>\n";
2632                         foreach ($row_cells as $n => $cell)
2633                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2634                         $text .= "</tr>\n";
2635                 }
2636                 $text .= "</tbody>\n";
2637                 $text .= "</table>";
2638                 
2639                 return $this->hashBlock($text) . "\n";
2640         }
2641
2642         
2643         protected function doDefLists($text) {
2644         #
2645         # Form HTML definition lists.
2646         #
2647                 $less_than_tab = $this->tab_width - 1;
2648
2649                 # Re-usable pattern to match any entire dl list:
2650                 $whole_list_re = '(?>
2651                         (                                                               # $1 = whole list
2652                           (                                                             # $2
2653                                 [ ]{0,'.$less_than_tab.'}
2654                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2655                                 \n?
2656                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2657                           )
2658                           (?s:.+?)
2659                           (                                                             # $4
2660                                   \z
2661                                 |
2662                                   \n{2,}
2663                                   (?=\S)
2664                                   (?!                                           # Negative lookahead for another term
2665                                         [ ]{0,'.$less_than_tab.'}
2666                                         (?: \S.*\n )+?                  # defined term
2667                                         \n?
2668                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2669                                   )
2670                                   (?!                                           # Negative lookahead for another definition
2671                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2672                                   )
2673                           )
2674                         )
2675                 )'; // mx
2676
2677                 $text = preg_replace_callback('{
2678                                 (?>\A\n?|(?<=\n\n))
2679                                 '.$whole_list_re.'
2680                         }mx',
2681                         array(&$this, '_doDefLists_callback'), $text);
2682
2683                 return $text;
2684         }
2685         protected function _doDefLists_callback($matches) {
2686                 # Re-usable patterns to match list item bullets and number markers:
2687                 $list = $matches[1];
2688                 
2689                 # Turn double returns into triple returns, so that we can make a
2690                 # paragraph for the last item in a list, if necessary:
2691                 $result = trim($this->processDefListItems($list));
2692                 $result = "<dl>\n" . $result . "\n</dl>";
2693                 return $this->hashBlock($result) . "\n\n";
2694         }
2695
2696
2697         protected function processDefListItems($list_str) {
2698         #
2699         #       Process the contents of a single definition list, splitting it
2700         #       into individual term and definition list items.
2701         #
2702                 $less_than_tab = $this->tab_width - 1;
2703                 
2704                 # trim trailing blank lines:
2705                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2706
2707                 # Process definition terms.
2708                 $list_str = preg_replace_callback('{
2709                         (?>\A\n?|\n\n+)                                 # leading line
2710                         (                                                               # definition terms = $1
2711                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2712                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
2713                                                                                         #   mark (colon) or more whitespace.
2714                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
2715                         )                       
2716                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
2717                                                                                         #   with a definition mark.
2718                         }xm',
2719                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2720
2721                 # Process actual definitions.
2722                 $list_str = preg_replace_callback('{
2723                         \n(\n+)?                                                # leading line = $1
2724                         (                                                               # marker space = $2
2725                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2726                                 \:[ ]+                                          # definition mark (colon)
2727                         )
2728                         ((?s:.+?))                                              # definition text = $3
2729                         (?= \n+                                                 # stop at next definition mark,
2730                                 (?:                                                     # next term or end of text
2731                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
2732                                         <dt> | \z
2733                                 )                                               
2734                         )                                       
2735                         }xm',
2736                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2737
2738                 return $list_str;
2739         }
2740         protected function _processDefListItems_callback_dt($matches) {
2741                 $terms = explode("\n", trim($matches[1]));
2742                 $text = '';
2743                 foreach ($terms as $term) {
2744                         $term = $this->runSpanGamut(trim($term));
2745                         $text .= "\n<dt>" . $term . "</dt>";
2746                 }
2747                 return $text . "\n";
2748         }
2749         protected function _processDefListItems_callback_dd($matches) {
2750                 $leading_line   = $matches[1];
2751                 $marker_space   = $matches[2];
2752                 $def                    = $matches[3];
2753
2754                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2755                         # Replace marker with the appropriate whitespace indentation
2756                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2757                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2758                         $def = "\n". $def ."\n";
2759                 }
2760                 else {
2761                         $def = rtrim($def);
2762                         $def = $this->runSpanGamut($this->outdent($def));
2763                 }
2764
2765                 return "\n<dd>" . $def . "</dd>\n";
2766         }
2767
2768
2769         protected function doFencedCodeBlocks($text) {
2770         #
2771         # Adding the fenced code block syntax to regular Markdown:
2772         #
2773         # ~~~
2774         # Code block
2775         # ~~~
2776         #
2777                 $less_than_tab = $this->tab_width;
2778                 
2779                 $text = preg_replace_callback('{
2780                                 (?:\n|\A)
2781                                 # 1: Opening marker
2782                                 (
2783                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2784                                 )
2785                                 [ ]*
2786                                 (?:
2787                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2788                                 |
2789                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2790                                 )?
2791                                 [ ]* \n # Whitespace and newline following marker.
2792                                 
2793                                 # 4: Content
2794                                 (
2795                                         (?>
2796                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2797                                                 .*\n+
2798                                         )+
2799                                 )
2800                                 
2801                                 # Closing marker.
2802                                 \1 [ ]* (?= \n )
2803                         }xm',
2804                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
2805
2806                 return $text;
2807         }
2808         protected function _doFencedCodeBlocks_callback($matches) {
2809                 $classname =& $matches[2];
2810                 $attrs     =& $matches[3];
2811                 $codeblock = $matches[4];
2812                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2813                 $codeblock = preg_replace_callback('/^\n+/',
2814                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2815
2816                 if ($classname != "") {
2817                         if ($classname{0} == '.')
2818                                 $classname = substr($classname, 1);
2819                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2820                 } else {
2821                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2822                 }
2823                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2824                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2825                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2826                 
2827                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2828         }
2829         protected function _doFencedCodeBlocks_newlines($matches) {
2830                 return str_repeat("<br$this->empty_element_suffix", 
2831                         strlen($matches[0]));
2832         }
2833
2834
2835         #
2836         # Redefining emphasis markers so that emphasis by underscore does not
2837         # work in the middle of a word.
2838         #
2839         protected $em_relist = array(
2840                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
2841                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2842                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2843                 );
2844         protected $strong_relist = array(
2845                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
2846                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2847                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2848                 );
2849         protected $em_strong_relist = array(
2850                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
2851                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2852                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2853                 );
2854
2855
2856         protected function formParagraphs($text) {
2857         #
2858         #       Params:
2859         #               $text - string to process with html <p> tags
2860         #
2861                 # Strip leading and trailing lines:
2862                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2863                 
2864                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2865
2866                 #
2867                 # Wrap <p> tags and unhashify HTML blocks
2868                 #
2869                 foreach ($grafs as $key => $value) {
2870                         $value = trim($this->runSpanGamut($value));
2871                         
2872                         # Check if this should be enclosed in a paragraph.
2873                         # Clean tag hashes & block tag hashes are left alone.
2874                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2875                         
2876                         if ($is_p) {
2877                                 $value = "<p>$value</p>";
2878                         }
2879                         $grafs[$key] = $value;
2880                 }
2881                 
2882                 # Join grafs in one text, then unhash HTML tags. 
2883                 $text = implode("\n\n", $grafs);
2884                 
2885                 # Finish by removing any tag hashes still present in $text.
2886                 $text = $this->unhash($text);
2887                 
2888                 return $text;
2889         }
2890         
2891         
2892         ### Footnotes
2893         
2894         protected function stripFootnotes($text) {
2895         #
2896         # Strips link definitions from text, stores the URLs and titles in
2897         # hash references.
2898         #
2899                 $less_than_tab = $this->tab_width - 1;
2900
2901                 # Link defs are in the form: [^id]: url "optional title"
2902                 $text = preg_replace_callback('{
2903                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2904                           [ ]*
2905                           \n?                                   # maybe *one* newline
2906                         (                                               # text = $2 (no blank lines allowed)
2907                                 (?:                                     
2908                                         .+                              # actual text
2909                                 |
2910                                         \n                              # newlines but 
2911                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2912                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
2913                                                                         # by non-indented content
2914                                 )*
2915                         )               
2916                         }xm',
2917                         array(&$this, '_stripFootnotes_callback'),
2918                         $text);
2919                 return $text;
2920         }
2921         protected function _stripFootnotes_callback($matches) {
2922                 $note_id = $this->fn_id_prefix . $matches[1];
2923                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2924                 return ''; # String that will replace the block
2925         }
2926
2927
2928         protected function doFootnotes($text) {
2929         #
2930         # Replace footnote references in $text [^id] with a special text-token 
2931         # which will be replaced by the actual footnote marker in appendFootnotes.
2932         #
2933                 if (!$this->in_anchor) {
2934                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2935                 }
2936                 return $text;
2937         }
2938
2939         
2940         protected function appendFootnotes($text) {
2941         #
2942         # Append footnote list to text.
2943         #
2944                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2945                         array(&$this, '_appendFootnotes_callback'), $text);
2946         
2947                 if (!empty($this->footnotes_ordered)) {
2948                         $text .= "\n\n";
2949                         $text .= "<div class=\"footnotes\">\n";
2950                         $text .= "<hr". $this->empty_element_suffix ."\n";
2951                         $text .= "<ol>\n\n";
2952
2953                         $attr = "";
2954                         if ($this->fn_backlink_class != "") {
2955                                 $class = $this->fn_backlink_class;
2956                                 $class = $this->encodeAttribute($class);
2957                                 $attr .= " class=\"$class\"";
2958                         }
2959                         if ($this->fn_backlink_title != "") {
2960                                 $title = $this->fn_backlink_title;
2961                                 $title = $this->encodeAttribute($title);
2962                                 $attr .= " title=\"$title\"";
2963                         }
2964                         $num = 0;
2965                         
2966                         while (!empty($this->footnotes_ordered)) {
2967                                 $footnote = reset($this->footnotes_ordered);
2968                                 $note_id = key($this->footnotes_ordered);
2969                                 unset($this->footnotes_ordered[$note_id]);
2970                                 $ref_count = $this->footnotes_ref_count[$note_id];
2971                                 unset($this->footnotes_ref_count[$note_id]);
2972                                 unset($this->footnotes[$note_id]);
2973                                 
2974                                 $footnote .= "\n"; # Need to append newline before parsing.
2975                                 $footnote = $this->runBlockGamut("$footnote\n");                                
2976                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2977                                         array(&$this, '_appendFootnotes_callback'), $footnote);
2978                                 
2979                                 $attr = str_replace("%%", ++$num, $attr);
2980                                 $note_id = $this->encodeAttribute($note_id);
2981
2982                                 # Prepare backlink, multiple backlinks if multiple references
2983                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2984                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
2985                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
2986                                 }
2987                                 # Add backlink to last paragraph; create new paragraph if needed.
2988                                 if (preg_match('{</p>$}', $footnote)) {
2989                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2990                                 } else {
2991                                         $footnote .= "\n\n<p>$backlink</p>";
2992                                 }
2993                                 
2994                                 $text .= "<li id=\"fn:$note_id\">\n";
2995                                 $text .= $footnote . "\n";
2996                                 $text .= "</li>\n\n";
2997                         }
2998                         
2999                         $text .= "</ol>\n";
3000                         $text .= "</div>";
3001                 }
3002                 return $text;
3003         }
3004         protected function _appendFootnotes_callback($matches) {
3005                 $node_id = $this->fn_id_prefix . $matches[1];
3006                 
3007                 # Create footnote marker only if it has a corresponding footnote *and*
3008                 # the footnote hasn't been used by another marker.
3009                 if (isset($this->footnotes[$node_id])) {
3010                         $num =& $this->footnotes_numbers[$node_id];
3011                         if (!isset($num)) {
3012                                 # Transfer footnote content to the ordered list and give it its
3013                                 # number
3014                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3015                                 $this->footnotes_ref_count[$node_id] = 1;
3016                                 $num = $this->footnote_counter++;
3017                                 $ref_count_mark = '';
3018                         } else {
3019                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3020                         }
3021
3022                         $attr = "";
3023                         if ($this->fn_link_class != "") {
3024                                 $class = $this->fn_link_class;
3025                                 $class = $this->encodeAttribute($class);
3026                                 $attr .= " class=\"$class\"";
3027                         }
3028                         if ($this->fn_link_title != "") {
3029                                 $title = $this->fn_link_title;
3030                                 $title = $this->encodeAttribute($title);
3031                                 $attr .= " title=\"$title\"";
3032                         }
3033                         
3034                         $attr = str_replace("%%", $num, $attr);
3035                         $node_id = $this->encodeAttribute($node_id);
3036                         
3037                         return
3038                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
3039                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
3040                                 "</sup>";
3041                 }
3042                 
3043                 return "[^".$matches[1]."]";
3044         }
3045                 
3046         
3047         ### Abbreviations ###
3048         
3049         protected function stripAbbreviations($text) {
3050         #
3051         # Strips abbreviations from text, stores titles in hash references.
3052         #
3053                 $less_than_tab = $this->tab_width - 1;
3054
3055                 # Link defs are in the form: [id]*: url "optional title"
3056                 $text = preg_replace_callback('{
3057                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
3058                         (.*)                                    # text = $2 (no blank lines allowed)    
3059                         }xm',
3060                         array(&$this, '_stripAbbreviations_callback'),
3061                         $text);
3062                 return $text;
3063         }
3064         protected function _stripAbbreviations_callback($matches) {
3065                 $abbr_word = $matches[1];
3066                 $abbr_desc = $matches[2];
3067                 if ($this->abbr_word_re)
3068                         $this->abbr_word_re .= '|';
3069                 $this->abbr_word_re .= preg_quote($abbr_word);
3070                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3071                 return ''; # String that will replace the block
3072         }
3073         
3074         
3075         protected function doAbbreviations($text) {
3076         #
3077         # Find defined abbreviations in text and wrap them in <abbr> elements.
3078         #
3079                 if ($this->abbr_word_re) {
3080                         // cannot use the /x modifier because abbr_word_re may 
3081                         // contain significant spaces:
3082                         $text = preg_replace_callback('{'.
3083                                 '(?<![\w\x1A])'.
3084                                 '(?:'.$this->abbr_word_re.')'.
3085                                 '(?![\w\x1A])'.
3086                                 '}', 
3087                                 array(&$this, '_doAbbreviations_callback'), $text);
3088                 }
3089                 return $text;
3090         }
3091         protected function _doAbbreviations_callback($matches) {
3092                 $abbr = $matches[0];
3093                 if (isset($this->abbr_desciptions[$abbr])) {
3094                         $desc = $this->abbr_desciptions[$abbr];
3095                         if (empty($desc)) {
3096                                 return $this->hashPart("<abbr>$abbr</abbr>");
3097                         } else {
3098                                 $desc = $this->encodeAttribute($desc);
3099                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3100                         }
3101                 } else {
3102                         return $matches[0];
3103                 }
3104         }
3105
3106 }