extlib/Michelf/Markdown.php

   1 <?php
   2 #
   3 # Markdown  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown
   6 # Copyright (c) 2004-2013 Michel Fortin
   7 # <http://michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13 namespace Michelf;
  14
  15
  16 #
  17 # Markdown Parser Class
  18 #
  19
  20 class Markdown implements MarkdownInterface {
  21
  22         ### Version ###
  23
  24         const  MARKDOWNLIB_VERSION  =  "1.4.0";
  25
  26         ### Simple Function Interface ###
  27
  28         public static function defaultTransform($text) {
  29         #
  30         # Initialize the parser and return the result of its transform method.
  31         # This will work fine for derived classes too.
  32         #
  33                 # Take parser class on which this function was called.
  34                 $parser_class = \get_called_class();
  35
  36                 # try to take parser from the static parser list
  37                 static $parser_list;
  38                 $parser =& $parser_list[$parser_class];
  39
  40                 # create the parser it not already set
  41                 if (!$parser)
  42                         $parser = new $parser_class;
  43
  44                 # Transform text using parser.
  45                 return $parser->transform($text);
  46         }
  47
  48         ### Configuration Variables ###
  49
  50         # Change to ">" for HTML output.
  51         public $empty_element_suffix = " />";
  52         public $tab_width = 4;
  53
  54         # Change to `true` to disallow markup or entities.
  55         public $no_markup = false;
  56         public $no_entities = false;
  57
  58         # Predefined urls and titles for reference links and images.
  59         public $predef_urls = array();
  60         public $predef_titles = array();
  61
  62
  63         ### Parser Implementation ###
  64
  65         # Regex to match balanced [brackets].
  66         # Needed to insert a maximum bracked depth while converting to PHP.
  67         protected $nested_brackets_depth = 6;
  68         protected $nested_brackets_re;
  69
  70         protected $nested_url_parenthesis_depth = 4;
  71         protected $nested_url_parenthesis_re;
  72
  73         # Table of hash values for escaped characters:
  74         protected $escape_chars = '\`*_{}[]()>#+-.!';
  75         protected $escape_chars_re;
  76
  77
  78         public function __construct() {
  79         #
  80         # Constructor function. Initialize appropriate member variables.
  81         #
  82                 $this->_initDetab();
  83                 $this->prepareItalicsAndBold();
  84
  85                 $this->nested_brackets_re =
  86                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  87                         str_repeat('\])*', $this->nested_brackets_depth);
  88
  89                 $this->nested_url_parenthesis_re =
  90                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  91                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  92
  93                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  94
  95                 # Sort document, block, and span gamut in ascendent priority order.
  96                 asort($this->document_gamut);
  97                 asort($this->block_gamut);
  98                 asort($this->span_gamut);
  99         }
 100
 101
 102         # Internal hashes used during transformation.
 103         protected $urls = array();
 104         protected $titles = array();
 105         protected $html_hashes = array();
 106
 107         # Status flag to avoid invalid nesting.
 108         protected $in_anchor = false;
 109
 110
 111         protected function setup() {
 112         #
 113         # Called before the transformation process starts to setup parser
 114         # states.
 115         #
 116                 # Clear global hashes.
 117                 $this->urls = $this->predef_urls;
 118                 $this->titles = $this->predef_titles;
 119                 $this->html_hashes = array();
 120
 121                 $this->in_anchor = false;
 122         }
 123
 124         protected function teardown() {
 125         #
 126         # Called after the transformation process to clear any variable
 127         # which may be taking up memory unnecessarly.
 128         #
 129                 $this->urls = array();
 130                 $this->titles = array();
 131                 $this->html_hashes = array();
 132         }
 133
 134
 135         public function transform($text) {
 136         #
 137         # Main function. Performs some preprocessing on the input text
 138         # and pass it through the document gamut.
 139         #
 140                 $this->setup();
 141
 142                 # Remove UTF-8 BOM and marker character in input, if present.
 143                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 144
 145                 # Standardize line endings:
 146                 #   DOS to Unix and Mac to Unix
 147                 $text = preg_replace('{\r\n?}', "\n", $text);
 148
 149                 # Make sure $text ends with a couple of newlines:
 150                 $text .= "\n\n";
 151
 152                 # Convert all tabs to spaces.
 153                 $text = $this->detab($text);
 154
 155                 # Turn block-level HTML blocks into hash entries
 156                 $text = $this->hashHTMLBlocks($text);
 157
 158                 # Strip any lines consisting only of spaces and tabs.
 159                 # This makes subsequent regexen easier to write, because we can
 160                 # match consecutive blank lines with /\n+/ instead of something
 161                 # contorted like /[ ]*\n+/ .
 162                 $text = preg_replace('/^[ ]+$/m', '', $text);
 163
 164                 # Run document gamut methods.
 165                 foreach ($this->document_gamut as $method => $priority) {
 166                         $text = $this->$method($text);
 167                 }
 168
 169                 $this->teardown();
 170
 171                 return $text . "\n";
 172         }
 173
 174         protected $document_gamut = array(
 175                 # Strip link definitions, store in hashes.
 176                 "stripLinkDefinitions" => 20,
 177
 178                 "runBasicBlockGamut"   => 30,
 179                 );
 180
 181
 182         protected function stripLinkDefinitions($text) {
 183         #
 184         # Strips link definitions from text, stores the URLs and titles in
 185         # hash references.
 186         #
 187                 $less_than_tab = $this->tab_width - 1;
 188
 189                 # Link defs are in the form: ^[id]: url "optional title"
 190                 $text = preg_replace_callback('{
 191                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 192                                                           [ ]*
 193                                                           \n?                           # maybe *one* newline
 194                                                           [ ]*
 195                                                         (?:
 196                                                           <(.+?)>                       # url = $2
 197                                                         |
 198                                                           (\S+?)                        # url = $3
 199                                                         )
 200                                                           [ ]*
 201                                                           \n?                           # maybe one newline
 202                                                           [ ]*
 203                                                         (?:
 204                                                                 (?<=\s)                 # lookbehind for whitespace
 205                                                                 ["(]
 206                                                                 (.*?)                   # title = $4
 207                                                                 [")]
 208                                                                 [ ]*
 209                                                         )?      # title is optional
 210                                                         (?:\n+|\Z)
 211                         }xm',
 212                         array(&$this, '_stripLinkDefinitions_callback'),
 213                         $text);
 214                 return $text;
 215         }
 216         protected function _stripLinkDefinitions_callback($matches) {
 217                 $link_id = strtolower($matches[1]);
 218                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 219                 $this->urls[$link_id] = $url;
 220                 $this->titles[$link_id] =& $matches[4];
 221                 return ''; # String that will replace the block
 222         }
 223
 224
 225         protected function hashHTMLBlocks($text) {
 226                 if ($this->no_markup)  return $text;
 227
 228                 $less_than_tab = $this->tab_width - 1;
 229
 230                 # Hashify HTML blocks:
 231                 # We only want to do this for block-level HTML tags, such as headers,
 232                 # lists, and tables. That's because we still want to wrap <p>s around
 233                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 234                 # phrase emphasis, and spans. The list of tags we're looking for is
 235                 # hard-coded:
 236                 #
 237                 # *  List "a" is made of tags which can be both inline or block-level.
 238                 #    These will be treated block-level when the start tag is alone on
 239                 #    its line, otherwise they're not matched here and will be taken as
 240                 #    inline later.
 241                 # *  List "b" is made of tags which are always block-level;
 242                 #
 243                 $block_tags_a_re = 'ins|del';
 244                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 245                                                    'script|noscript|form|fieldset|iframe|math|svg|'.
 246                                                    'article|section|nav|aside|hgroup|header|footer|'.
 247                                                    'figure';
 248
 249                 # Regular expression for the content of a block tag.
 250                 $nested_tags_level = 4;
 251                 $attr = '
 252                         (?>                             # optional tag attributes
 253                           \s                    # starts with whitespace
 254                           (?>
 255                                 [^>"/]+         # text outside quotes
 256                           |
 257                                 /+(?!>)         # slash not followed by ">"
 258                           |
 259                                 "[^"]*"         # text inside double quotes (tolerate ">")
 260                           |
 261                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 262                           )*
 263                         )?
 264                         ';
 265                 $content =
 266                         str_repeat('
 267                                 (?>
 268                                   [^<]+                 # content without tag
 269                                 |
 270                                   <\2                   # nested opening tag
 271                                         '.$attr.'       # attributes
 272                                         (?>
 273                                           />
 274                                         |
 275                                           >', $nested_tags_level).      # end of opening tag
 276                                           '.*?'.                                        # last level nested tag content
 277                         str_repeat('
 278                                           </\2\s*>      # closing nested tag
 279                                         )
 280                                   |
 281                                         <(?!/\2\s*>     # other tags with a different name
 282                                   )
 283                                 )*',
 284                                 $nested_tags_level);
 285                 $content2 = str_replace('\2', '\3', $content);
 286
 287                 # First, look for nested blocks, e.g.:
 288                 #       <div>
 289                 #               <div>
 290                 #               tags for inner block must be indented.
 291                 #               </div>
 292                 #       </div>
 293                 #
 294                 # The outermost tags must start at the left margin for this to match, and
 295                 # the inner nested divs must be indented.
 296                 # We need to do this before the next, more liberal match, because the next
 297                 # match will start at the first `<div>` and stop at the first `</div>`.
 298                 $text = preg_replace_callback('{(?>
 299                         (?>
 300                                 (?<=\n\n)               # Starting after a blank line
 301                                 |                               # or
 302                                 \A\n?                   # the beginning of the doc
 303                         )
 304                         (                                               # save in $1
 305
 306                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 307                           # in between.
 308
 309                                                 [ ]{0,'.$less_than_tab.'}
 310                                                 <('.$block_tags_b_re.')# start tag = $2
 311                                                 '.$attr.'>                      # attributes followed by > and \n
 312                                                 '.$content.'            # content, support nesting
 313                                                 </\2>                           # the matching end tag
 314                                                 [ ]*                            # trailing spaces/tabs
 315                                                 (?=\n+|\Z)      # followed by a newline or end of document
 316
 317                         | # Special version for tags of group a.
 318
 319                                                 [ ]{0,'.$less_than_tab.'}
 320                                                 <('.$block_tags_a_re.')# start tag = $3
 321                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 322                                                 '.$content2.'           # content, support nesting
 323                                                 </\3>                           # the matching end tag
 324                                                 [ ]*                            # trailing spaces/tabs
 325                                                 (?=\n+|\Z)      # followed by a newline or end of document
 326
 327                         | # Special case just for <hr />. It was easier to make a special
 328                           # case than to make the other regex more complicated.
 329
 330                                                 [ ]{0,'.$less_than_tab.'}
 331                                                 <(hr)                           # start tag = $2
 332                                                 '.$attr.'                       # attributes
 333                                                 /?>                                     # the matching end tag
 334                                                 [ ]*
 335                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 336
 337                         | # Special case for standalone HTML comments:
 338
 339                                         [ ]{0,'.$less_than_tab.'}
 340                                         (?s:
 341                                                 <!-- .*? -->
 342                                         )
 343                                         [ ]*
 344                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 345
 346                         | # PHP and ASP-style processor instructions (<? and <%)
 347
 348                                         [ ]{0,'.$less_than_tab.'}
 349                                         (?s:
 350                                                 <([?%])                 # $2
 351                                                 .*?
 352                                                 \2>
 353                                         )
 354                                         [ ]*
 355                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 356
 357                         )
 358                         )}Sxmi',
 359                         array(&$this, '_hashHTMLBlocks_callback'),
 360                         $text);
 361
 362                 return $text;
 363         }
 364         protected function _hashHTMLBlocks_callback($matches) {
 365                 $text = $matches[1];
 366                 $key  = $this->hashBlock($text);
 367                 return "\n\n$key\n\n";
 368         }
 369
 370
 371         protected function hashPart($text, $boundary = 'X') {
 372         #
 373         # Called whenever a tag must be hashed when a function insert an atomic
 374         # element in the text stream. Passing $text to through this function gives
 375         # a unique text-token which will be reverted back when calling unhash.
 376         #
 377         # The $boundary argument specify what character should be used to surround
 378         # the token. By convension, "B" is used for block elements that needs not
 379         # to be wrapped into paragraph tags at the end, ":" is used for elements
 380         # that are word separators and "X" is used in the general case.
 381         #
 382                 # Swap back any tag hash found in $text so we do not have to `unhash`
 383                 # multiple times at the end.
 384                 $text = $this->unhash($text);
 385
 386                 # Then hash the block.
 387                 static $i = 0;
 388                 $key = "$boundary\x1A" . ++$i . $boundary;
 389                 $this->html_hashes[$key] = $text;
 390                 return $key; # String that will replace the tag.
 391         }
 392
 393
 394         protected function hashBlock($text) {
 395         #
 396         # Shortcut function for hashPart with block-level boundaries.
 397         #
 398                 return $this->hashPart($text, 'B');
 399         }
 400
 401
 402         protected $block_gamut = array(
 403         #
 404         # These are all the transformations that form block-level
 405         # tags like paragraphs, headers, and list items.
 406         #
 407                 "doHeaders"         => 10,
 408                 "doHorizontalRules" => 20,
 409
 410                 "doLists"           => 40,
 411                 "doCodeBlocks"      => 50,
 412                 "doBlockQuotes"     => 60,
 413                 );
 414
 415         protected function runBlockGamut($text) {
 416         #
 417         # Run block gamut tranformations.
 418         #
 419                 # We need to escape raw HTML in Markdown source before doing anything
 420                 # else. This need to be done for each block, and not only at the
 421                 # begining in the Markdown function since hashed blocks can be part of
 422                 # list items and could have been indented. Indented blocks would have
 423                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 424                 $text = $this->hashHTMLBlocks($text);
 425
 426                 return $this->runBasicBlockGamut($text);
 427         }
 428
 429         protected function runBasicBlockGamut($text) {
 430         #
 431         # Run block gamut tranformations, without hashing HTML blocks. This is
 432         # useful when HTML blocks are known to be already hashed, like in the first
 433         # whole-document pass.
 434         #
 435                 foreach ($this->block_gamut as $method => $priority) {
 436                         $text = $this->$method($text);
 437                 }
 438
 439                 # Finally form paragraph and restore hashed blocks.
 440                 $text = $this->formParagraphs($text);
 441
 442                 return $text;
 443         }
 444
 445
 446         protected function doHorizontalRules($text) {
 447                 # Do Horizontal Rules:
 448                 return preg_replace(
 449                         '{
 450                                 ^[ ]{0,3}       # Leading space
 451                                 ([-*_])         # $1: First marker
 452                                 (?>                     # Repeated marker group
 453                                         [ ]{0,2}        # Zero, one, or two spaces.
 454                                         \1                      # Marker character
 455                                 ){2,}           # Group repeated at least twice
 456                                 [ ]*            # Tailing spaces
 457                                 $                       # End of line.
 458                         }mx',
 459                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 460                         $text);
 461         }
 462
 463
 464         protected $span_gamut = array(
 465         #
 466         # These are all the transformations that occur *within* block-level
 467         # tags like paragraphs, headers, and list items.
 468         #
 469                 # Process character escapes, code spans, and inline HTML
 470                 # in one shot.
 471                 "parseSpan"           => -30,
 472
 473                 # Process anchor and image tags. Images must come first,
 474                 # because ![foo][f] looks like an anchor.
 475                 "doImages"            =>  10,
 476                 "doAnchors"           =>  20,
 477
 478                 # Make links out of things like `<http://example.com/>`
 479                 # Must come after doAnchors, because you can use < and >
 480                 # delimiters in inline links like [this](<url>).
 481                 "doAutoLinks"         =>  30,
 482                 "encodeAmpsAndAngles" =>  40,
 483
 484                 "doItalicsAndBold"    =>  50,
 485                 "doHardBreaks"        =>  60,
 486                 );
 487
 488         protected function runSpanGamut($text) {
 489         #
 490         # Run span gamut tranformations.
 491         #
 492                 foreach ($this->span_gamut as $method => $priority) {
 493                         $text = $this->$method($text);
 494                 }
 495
 496                 return $text;
 497         }
 498
 499
 500         protected function doHardBreaks($text) {
 501                 # Do hard breaks:
 502                 return preg_replace_callback('/ {2,}\n/',
 503                         array(&$this, '_doHardBreaks_callback'), $text);
 504         }
 505         protected function _doHardBreaks_callback($matches) {
 506                 return $this->hashPart("<br$this->empty_element_suffix\n");
 507         }
 508
 509
 510         protected function doAnchors($text) {
 511         #
 512         # Turn Markdown link shortcuts into XHTML <a> tags.
 513         #
 514                 if ($this->in_anchor) return $text;
 515                 $this->in_anchor = true;
 516
 517                 #
 518                 # First, handle reference-style links: [link text] [id]
 519                 #
 520                 $text = preg_replace_callback('{
 521                         (                                       # wrap whole match in $1
 522                           \[
 523                                 ('.$this->nested_brackets_re.') # link text = $2
 524                           \]
 525
 526                           [ ]?                          # one optional space
 527                           (?:\n[ ]*)?           # one optional newline followed by spaces
 528
 529                           \[
 530                                 (.*?)           # id = $3
 531                           \]
 532                         )
 533                         }xs',
 534                         array(&$this, '_doAnchors_reference_callback'), $text);
 535
 536                 #
 537                 # Next, inline-style links: [link text](url "optional title")
 538                 #
 539                 $text = preg_replace_callback('{
 540                         (                               # wrap whole match in $1
 541                           \[
 542                                 ('.$this->nested_brackets_re.') # link text = $2
 543                           \]
 544                           \(                    # literal paren
 545                                 [ \n]*
 546                                 (?:
 547                                         <(.+?)> # href = $3
 548                                 |
 549                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 550                                 )
 551                                 [ \n]*
 552                                 (                       # $5
 553                                   ([\'"])       # quote char = $6
 554                                   (.*?)         # Title = $7
 555                                   \6            # matching quote
 556                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 557                                 )?                      # title is optional
 558                           \)
 559                         )
 560                         }xs',
 561                         array(&$this, '_doAnchors_inline_callback'), $text);
 562
 563                 #
 564                 # Last, handle reference-style shortcuts: [link text]
 565                 # These must come last in case you've also got [link text][1]
 566                 # or [link text](/foo)
 567                 #
 568                 $text = preg_replace_callback('{
 569                         (                                       # wrap whole match in $1
 570                           \[
 571                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 572                           \]
 573                         )
 574                         }xs',
 575                         array(&$this, '_doAnchors_reference_callback'), $text);
 576
 577                 $this->in_anchor = false;
 578                 return $text;
 579         }
 580         protected function _doAnchors_reference_callback($matches) {
 581                 $whole_match =  $matches[1];
 582                 $link_text   =  $matches[2];
 583                 $link_id     =& $matches[3];
 584
 585                 if ($link_id == "") {
 586                         # for shortcut links like [this][] or [this].
 587                         $link_id = $link_text;
 588                 }
 589
 590                 # lower-case and turn embedded newlines into spaces
 591                 $link_id = strtolower($link_id);
 592                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 593
 594                 if (isset($this->urls[$link_id])) {
 595                         $url = $this->urls[$link_id];
 596                         $url = $this->encodeAttribute($url);
 597
 598                         $result = "<a href=\"$url\"";
 599                         if ( isset( $this->titles[$link_id] ) ) {
 600                                 $title = $this->titles[$link_id];
 601                                 $title = $this->encodeAttribute($title);
 602                                 $result .=  " title=\"$title\"";
 603                         }
 604
 605                         $link_text = $this->runSpanGamut($link_text);
 606                         $result .= ">$link_text</a>";
 607                         $result = $this->hashPart($result);
 608                 }
 609                 else {
 610                         $result = $whole_match;
 611                 }
 612                 return $result;
 613         }
 614         protected function _doAnchors_inline_callback($matches) {
 615                 $whole_match    =  $matches[1];
 616                 $link_text              =  $this->runSpanGamut($matches[2]);
 617                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 618                 $title                  =& $matches[7];
 619
 620                 $url = $this->encodeAttribute($url);
 621
 622                 $result = "<a href=\"$url\"";
 623                 if (isset($title)) {
 624                         $title = $this->encodeAttribute($title);
 625                         $result .=  " title=\"$title\"";
 626                 }
 627
 628                 $link_text = $this->runSpanGamut($link_text);
 629                 $result .= ">$link_text</a>";
 630
 631                 return $this->hashPart($result);
 632         }
 633
 634
 635         protected function doImages($text) {
 636         #
 637         # Turn Markdown image shortcuts into <img> tags.
 638         #
 639                 #
 640                 # First, handle reference-style labeled images: ![alt text][id]
 641                 #
 642                 $text = preg_replace_callback('{
 643                         (                               # wrap whole match in $1
 644                           !\[
 645                                 ('.$this->nested_brackets_re.')         # alt text = $2
 646                           \]
 647
 648                           [ ]?                          # one optional space
 649                           (?:\n[ ]*)?           # one optional newline followed by spaces
 650
 651                           \[
 652                                 (.*?)           # id = $3
 653                           \]
 654
 655                         )
 656                         }xs',
 657                         array(&$this, '_doImages_reference_callback'), $text);
 658
 659                 #
 660                 # Next, handle inline images:  ![alt text](url "optional title")
 661                 # Don't forget: encode * and _
 662                 #
 663                 $text = preg_replace_callback('{
 664                         (                               # wrap whole match in $1
 665                           !\[
 666                                 ('.$this->nested_brackets_re.')         # alt text = $2
 667                           \]
 668                           \s?                   # One optional whitespace character
 669                           \(                    # literal paren
 670                                 [ \n]*
 671                                 (?:
 672                                         <(\S*)> # src url = $3
 673                                 |
 674                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 675                                 )
 676                                 [ \n]*
 677                                 (                       # $5
 678                                   ([\'"])       # quote char = $6
 679                                   (.*?)         # title = $7
 680                                   \6            # matching quote
 681                                   [ \n]*
 682                                 )?                      # title is optional
 683                           \)
 684                         )
 685                         }xs',
 686                         array(&$this, '_doImages_inline_callback'), $text);
 687
 688                 return $text;
 689         }
 690         protected function _doImages_reference_callback($matches) {
 691                 $whole_match = $matches[1];
 692                 $alt_text    = $matches[2];
 693                 $link_id     = strtolower($matches[3]);
 694
 695                 if ($link_id == "") {
 696                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 697                 }
 698
 699                 $alt_text = $this->encodeAttribute($alt_text);
 700                 if (isset($this->urls[$link_id])) {
 701                         $url = $this->encodeAttribute($this->urls[$link_id]);
 702                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 703                         if (isset($this->titles[$link_id])) {
 704                                 $title = $this->titles[$link_id];
 705                                 $title = $this->encodeAttribute($title);
 706                                 $result .=  " title=\"$title\"";
 707                         }
 708                         $result .= $this->empty_element_suffix;
 709                         $result = $this->hashPart($result);
 710                 }
 711                 else {
 712                         # If there's no such link ID, leave intact:
 713                         $result = $whole_match;
 714                 }
 715
 716                 return $result;
 717         }
 718         protected function _doImages_inline_callback($matches) {
 719                 $whole_match    = $matches[1];
 720                 $alt_text               = $matches[2];
 721                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 722                 $title                  =& $matches[7];
 723
 724                 $alt_text = $this->encodeAttribute($alt_text);
 725                 $url = $this->encodeAttribute($url);
 726                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 727                 if (isset($title)) {
 728                         $title = $this->encodeAttribute($title);
 729                         $result .=  " title=\"$title\""; # $title already quoted
 730                 }
 731                 $result .= $this->empty_element_suffix;
 732
 733                 return $this->hashPart($result);
 734         }
 735
 736
 737         protected function doHeaders($text) {
 738                 # Setext-style headers:
 739                 #         Header 1
 740                 #         ========
 741                 #
 742                 #         Header 2
 743                 #         --------
 744                 #
 745                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 746                         array(&$this, '_doHeaders_callback_setext'), $text);
 747
 748                 # atx-style headers:
 749                 #       # Header 1
 750                 #       ## Header 2
 751                 #       ## Header 2 with closing hashes ##
 752                 #       ...
 753                 #       ###### Header 6
 754                 #
 755                 $text = preg_replace_callback('{
 756                                 ^(\#{1,6})      # $1 = string of #\'s
 757                                 [ ]*
 758                                 (.+?)           # $2 = Header text
 759                                 [ ]*
 760                                 \#*                     # optional closing #\'s (not counted)
 761                                 \n+
 762                         }xm',
 763                         array(&$this, '_doHeaders_callback_atx'), $text);
 764
 765                 return $text;
 766         }
 767         protected function _doHeaders_callback_setext($matches) {
 768                 # Terrible hack to check we haven't found an empty list item.
 769                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 770                         return $matches[0];
 771
 772                 $level = $matches[2]{0} == '=' ? 1 : 2;
 773                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 774                 return "\n" . $this->hashBlock($block) . "\n\n";
 775         }
 776         protected function _doHeaders_callback_atx($matches) {
 777                 $level = strlen($matches[1]);
 778                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 779                 return "\n" . $this->hashBlock($block) . "\n\n";
 780         }
 781
 782
 783         protected function doLists($text) {
 784         #
 785         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 786         #
 787                 $less_than_tab = $this->tab_width - 1;
 788
 789                 # Re-usable patterns to match list item bullets and number markers:
 790                 $marker_ul_re  = '[*+-]';
 791                 $marker_ol_re  = '\d+[\.]';
 792                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 793
 794                 $markers_relist = array(
 795                         $marker_ul_re => $marker_ol_re,
 796                         $marker_ol_re => $marker_ul_re,
 797                         );
 798
 799                 foreach ($markers_relist as $marker_re => $other_marker_re) {
 800                         # Re-usable pattern to match any entirel ul or ol list:
 801                         $whole_list_re = '
 802                                 (                                                               # $1 = whole list
 803                                   (                                                             # $2
 804                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
 805                                         ('.$marker_re.')                        # $4 = first list item marker
 806                                         [ ]+
 807                                   )
 808                                   (?s:.+?)
 809                                   (                                                             # $5
 810                                           \z
 811                                         |
 812                                           \n{2,}
 813                                           (?=\S)
 814                                           (?!                                           # Negative lookahead for another list item marker
 815                                                 [ ]*
 816                                                 '.$marker_re.'[ ]+
 817                                           )
 818                                         |
 819                                           (?=                                           # Lookahead for another kind of list
 820                                             \n
 821                                                 \3                                              # Must have the same indentation
 822                                                 '.$other_marker_re.'[ ]+
 823                                           )
 824                                   )
 825                                 )
 826                         '; // mx
 827
 828                         # We use a different prefix before nested lists than top-level lists.
 829                         # See extended comment in _ProcessListItems().
 830
 831                         if ($this->list_level) {
 832                                 $text = preg_replace_callback('{
 833                                                 ^
 834                                                 '.$whole_list_re.'
 835                                         }mx',
 836                                         array(&$this, '_doLists_callback'), $text);
 837                         }
 838                         else {
 839                                 $text = preg_replace_callback('{
 840                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
 841                                                 '.$whole_list_re.'
 842                                         }mx',
 843                                         array(&$this, '_doLists_callback'), $text);
 844                         }
 845                 }
 846
 847                 return $text;
 848         }
 849         protected function _doLists_callback($matches) {
 850                 # Re-usable patterns to match list item bullets and number markers:
 851                 $marker_ul_re  = '[*+-]';
 852                 $marker_ol_re  = '\d+[\.]';
 853                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 854
 855                 $list = $matches[1];
 856                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 857
 858                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 859
 860                 $list .= "\n";
 861                 $result = $this->processListItems($list, $marker_any_re);
 862
 863                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 864                 return "\n". $result ."\n\n";
 865         }
 866
 867         protected $list_level = 0;
 868
 869         protected function processListItems($list_str, $marker_any_re) {
 870         #
 871         #       Process the contents of a single ordered or unordered list, splitting it
 872         #       into individual list items.
 873         #
 874                 # The $this->list_level global keeps track of when we're inside a list.
 875                 # Each time we enter a list, we increment it; when we leave a list,
 876                 # we decrement. If it's zero, we're not in a list anymore.
 877                 #
 878                 # We do this because when we're not inside a list, we want to treat
 879                 # something like this:
 880                 #
 881                 #               I recommend upgrading to version
 882                 #               8. Oops, now this line is treated
 883                 #               as a sub-list.
 884                 #
 885                 # As a single paragraph, despite the fact that the second line starts
 886                 # with a digit-period-space sequence.
 887                 #
 888                 # Whereas when we're inside a list (or sub-list), that line will be
 889                 # treated as the start of a sub-list. What a kludge, huh? This is
 890                 # an aspect of Markdown's syntax that's hard to parse perfectly
 891                 # without resorting to mind-reading. Perhaps the solution is to
 892                 # change the syntax rules such that sub-lists must start with a
 893                 # starting cardinal number; e.g. "1." or "a.".
 894
 895                 $this->list_level++;
 896
 897                 # trim trailing blank lines:
 898                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 899
 900                 $list_str = preg_replace_callback('{
 901                         (\n)?                                                   # leading line = $1
 902                         (^[ ]*)                                                 # leading whitespace = $2
 903                         ('.$marker_any_re.'                             # list marker and space = $3
 904                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
 905                         )
 906                         ((?s:.*?))                                              # list item text   = $4
 907                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
 908                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 909                         }xm',
 910                         array(&$this, '_processListItems_callback'), $list_str);
 911
 912                 $this->list_level--;
 913                 return $list_str;
 914         }
 915         protected function _processListItems_callback($matches) {
 916                 $item = $matches[4];
 917                 $leading_line =& $matches[1];
 918                 $leading_space =& $matches[2];
 919                 $marker_space = $matches[3];
 920                 $tailing_blank_line =& $matches[5];
 921
 922                 if ($leading_line || $tailing_blank_line ||
 923                         preg_match('/\n{2,}/', $item))
 924                 {
 925                         # Replace marker with the appropriate whitespace indentation
 926                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 927                         $item = $this->runBlockGamut($this->outdent($item)."\n");
 928                 }
 929                 else {
 930                         # Recursion for sub-lists:
 931                         $item = $this->doLists($this->outdent($item));
 932                         $item = preg_replace('/\n+$/', '', $item);
 933                         $item = $this->runSpanGamut($item);
 934                 }
 935
 936                 return "<li>" . $item . "</li>\n";
 937         }
 938
 939
 940         protected function doCodeBlocks($text) {
 941         #
 942         #       Process Markdown `<pre><code>` blocks.
 943         #
 944                 $text = preg_replace_callback('{
 945                                 (?:\n\n|\A\n?)
 946                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
 947                                   (?>
 948                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
 949                                         .*\n+
 950                                   )+
 951                                 )
 952                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
 953                         }xm',
 954                         array(&$this, '_doCodeBlocks_callback'), $text);
 955
 956                 return $text;
 957         }
 958         protected function _doCodeBlocks_callback($matches) {
 959                 $codeblock = $matches[1];
 960
 961                 $codeblock = $this->outdent($codeblock);
 962                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 963
 964                 # trim leading newlines and trailing newlines
 965                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
 966
 967                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
 968                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
 969         }
 970
 971
 972         protected function makeCodeSpan($code) {
 973         #
 974         # Create a code span markup for $code. Called from handleSpanToken.
 975         #
 976                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
 977                 return $this->hashPart("<code>$code</code>");
 978         }
 979
 980
 981         protected $em_relist = array(
 982                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
 983                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
 984                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
 985                 );
 986         protected $strong_relist = array(
 987                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
 988                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
 989                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
 990                 );
 991         protected $em_strong_relist = array(
 992                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
 993                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
 994                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
 995                 );
 996         protected $em_strong_prepared_relist;
 997
 998         protected function prepareItalicsAndBold() {
 999         #
1000         # Prepare regular expressions for searching emphasis tokens in any
1001         # context.
1002         #
1003                 foreach ($this->em_relist as $em => $em_re) {
1004                         foreach ($this->strong_relist as $strong => $strong_re) {
1005                                 # Construct list of allowed token expressions.
1006                                 $token_relist = array();
1007                                 if (isset($this->em_strong_relist["$em$strong"])) {
1008                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1009                                 }
1010                                 $token_relist[] = $em_re;
1011                                 $token_relist[] = $strong_re;
1012
1013                                 # Construct master expression from list.
1014                                 $token_re = '{('. implode('|', $token_relist) .')}';
1015                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1016                         }
1017                 }
1018         }
1019
1020         protected function doItalicsAndBold($text) {
1021                 $token_stack = array('');
1022                 $text_stack = array('');
1023                 $em = '';
1024                 $strong = '';
1025                 $tree_char_em = false;
1026
1027                 while (1) {
1028                         #
1029                         # Get prepared regular expression for seraching emphasis tokens
1030                         # in current context.
1031                         #
1032                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1033
1034                         #
1035                         # Each loop iteration search for the next emphasis token.
1036                         # Each token is then passed to handleSpanToken.
1037                         #
1038                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1039                         $text_stack[0] .= $parts[0];
1040                         $token =& $parts[1];
1041                         $text =& $parts[2];
1042
1043                         if (empty($token)) {
1044                                 # Reached end of text span: empty stack without emitting.
1045                                 # any more emphasis.
1046                                 while ($token_stack[0]) {
1047                                         $text_stack[1] .= array_shift($token_stack);
1048                                         $text_stack[0] .= array_shift($text_stack);
1049                                 }
1050                                 break;
1051                         }
1052
1053                         $token_len = strlen($token);
1054                         if ($tree_char_em) {
1055                                 # Reached closing marker while inside a three-char emphasis.
1056                                 if ($token_len == 3) {
1057                                         # Three-char closing marker, close em and strong.
1058                                         array_shift($token_stack);
1059                                         $span = array_shift($text_stack);
1060                                         $span = $this->runSpanGamut($span);
1061                                         $span = "<strong><em>$span</em></strong>";
1062                                         $text_stack[0] .= $this->hashPart($span);
1063                                         $em = '';
1064                                         $strong = '';
1065                                 } else {
1066                                         # Other closing marker: close one em or strong and
1067                                         # change current token state to match the other
1068                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1069                                         $tag = $token_len == 2 ? "strong" : "em";
1070                                         $span = $text_stack[0];
1071                                         $span = $this->runSpanGamut($span);
1072                                         $span = "<$tag>$span</$tag>";
1073                                         $text_stack[0] = $this->hashPart($span);
1074                                         $$tag = ''; # $$tag stands for $em or $strong
1075                                 }
1076                                 $tree_char_em = false;
1077                         } else if ($token_len == 3) {
1078                                 if ($em) {
1079                                         # Reached closing marker for both em and strong.
1080                                         # Closing strong marker:
1081                                         for ($i = 0; $i < 2; ++$i) {
1082                                                 $shifted_token = array_shift($token_stack);
1083                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1084                                                 $span = array_shift($text_stack);
1085                                                 $span = $this->runSpanGamut($span);
1086                                                 $span = "<$tag>$span</$tag>";
1087                                                 $text_stack[0] .= $this->hashPart($span);
1088                                                 $$tag = ''; # $$tag stands for $em or $strong
1089                                         }
1090                                 } else {
1091                                         # Reached opening three-char emphasis marker. Push on token
1092                                         # stack; will be handled by the special condition above.
1093                                         $em = $token{0};
1094                                         $strong = "$em$em";
1095                                         array_unshift($token_stack, $token);
1096                                         array_unshift($text_stack, '');
1097                                         $tree_char_em = true;
1098                                 }
1099                         } else if ($token_len == 2) {
1100                                 if ($strong) {
1101                                         # Unwind any dangling emphasis marker:
1102                                         if (strlen($token_stack[0]) == 1) {
1103                                                 $text_stack[1] .= array_shift($token_stack);
1104                                                 $text_stack[0] .= array_shift($text_stack);
1105                                         }
1106                                         # Closing strong marker:
1107                                         array_shift($token_stack);
1108                                         $span = array_shift($text_stack);
1109                                         $span = $this->runSpanGamut($span);
1110                                         $span = "<strong>$span</strong>";
1111                                         $text_stack[0] .= $this->hashPart($span);
1112                                         $strong = '';
1113                                 } else {
1114                                         array_unshift($token_stack, $token);
1115                                         array_unshift($text_stack, '');
1116                                         $strong = $token;
1117                                 }
1118                         } else {
1119                                 # Here $token_len == 1
1120                                 if ($em) {
1121                                         if (strlen($token_stack[0]) == 1) {
1122                                                 # Closing emphasis marker:
1123                                                 array_shift($token_stack);
1124                                                 $span = array_shift($text_stack);
1125                                                 $span = $this->runSpanGamut($span);
1126                                                 $span = "<em>$span</em>";
1127                                                 $text_stack[0] .= $this->hashPart($span);
1128                                                 $em = '';
1129                                         } else {
1130                                                 $text_stack[0] .= $token;
1131                                         }
1132                                 } else {
1133                                         array_unshift($token_stack, $token);
1134                                         array_unshift($text_stack, '');
1135                                         $em = $token;
1136                                 }
1137                         }
1138                 }
1139                 return $text_stack[0];
1140         }
1141
1142
1143         protected function doBlockQuotes($text) {
1144                 $text = preg_replace_callback('/
1145                           (                                                             # Wrap whole match in $1
1146                                 (?>
1147                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1148                                         .+\n                                    # rest of the first line
1149                                   (.+\n)*                                       # subsequent consecutive lines
1150                                   \n*                                           # blanks
1151                                 )+
1152                           )
1153                         /xm',
1154                         array(&$this, '_doBlockQuotes_callback'), $text);
1155
1156                 return $text;
1157         }
1158         protected function _doBlockQuotes_callback($matches) {
1159                 $bq = $matches[1];
1160                 # trim one level of quoting - trim whitespace-only lines
1161                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1162                 $bq = $this->runBlockGamut($bq);                # recurse
1163
1164                 $bq = preg_replace('/^/m', "  ", $bq);
1165                 # These leading spaces cause problem with <pre> content,
1166                 # so we need to fix that:
1167                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1168                         array(&$this, '_doBlockQuotes_callback2'), $bq);
1169
1170                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1171         }
1172         protected function _doBlockQuotes_callback2($matches) {
1173                 $pre = $matches[1];
1174                 $pre = preg_replace('/^  /m', '', $pre);
1175                 return $pre;
1176         }
1177
1178
1179         protected function formParagraphs($text) {
1180         #
1181         #       Params:
1182         #               $text - string to process with html <p> tags
1183         #
1184                 # Strip leading and trailing lines:
1185                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1186
1187                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1188
1189                 #
1190                 # Wrap <p> tags and unhashify HTML blocks
1191                 #
1192                 foreach ($grafs as $key => $value) {
1193                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1194                                 # Is a paragraph.
1195                                 $value = $this->runSpanGamut($value);
1196                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1197                                 $value .= "</p>";
1198                                 $grafs[$key] = $this->unhash($value);
1199                         }
1200                         else {
1201                                 # Is a block.
1202                                 # Modify elements of @grafs in-place...
1203                                 $graf = $value;
1204                                 $block = $this->html_hashes[$graf];
1205                                 $graf = $block;
1206 //                              if (preg_match('{
1207 //                                      \A
1208 //                                      (                                                       # $1 = <div> tag
1209 //                                        <div  \s+
1210 //                                        [^>]*
1211 //                                        \b
1212 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1213 //                                        1
1214 //                                        \2
1215 //                                        [^>]*
1216 //                                        >
1217 //                                      )
1218 //                                      (                                                       # $3 = contents
1219 //                                      .*
1220 //                                      )
1221 //                                      (</div>)                                        # $4 = closing tag
1222 //                                      \z
1223 //                                      }xs', $block, $matches))
1224 //                              {
1225 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1226 //
1227 //                                      # We can't call Markdown(), because that resets the hash;
1228 //                                      # that initialization code should be pulled into its own sub, though.
1229 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1230 //
1231 //                                      # Run document gamut methods on the content.
1232 //                                      foreach ($this->document_gamut as $method => $priority) {
1233 //                                              $div_content = $this->$method($div_content);
1234 //                                      }
1235 //
1236 //                                      $div_open = preg_replace(
1237 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1238 //
1239 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1240 //                              }
1241                                 $grafs[$key] = $graf;
1242                         }
1243                 }
1244
1245                 return implode("\n\n", $grafs);
1246         }
1247
1248
1249         protected function encodeAttribute($text) {
1250         #
1251         # Encode text for a double-quoted HTML attribute. This function
1252         # is *not* suitable for attributes enclosed in single quotes.
1253         #
1254                 $text = $this->encodeAmpsAndAngles($text);
1255                 $text = str_replace('"', '&quot;', $text);
1256                 return $text;
1257         }
1258
1259
1260         protected function encodeAmpsAndAngles($text) {
1261         #
1262         # Smart processing for ampersands and angle brackets that need to
1263         # be encoded. Valid character entities are left alone unless the
1264         # no-entities mode is set.
1265         #
1266                 if ($this->no_entities) {
1267                         $text = str_replace('&', '&amp;', $text);
1268                 } else {
1269                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1270                         # MT plugin: <http://bumppo.net/projects/amputator/>
1271                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1272                                                                 '&amp;', $text);
1273                 }
1274                 # Encode remaining <'s
1275                 $text = str_replace('<', '&lt;', $text);
1276
1277                 return $text;
1278         }
1279
1280
1281         protected function doAutoLinks($text) {
1282                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1283                         array(&$this, '_doAutoLinks_url_callback'), $text);
1284
1285                 # Email addresses: <address@domain.foo>
1286                 $text = preg_replace_callback('{
1287                         <
1288                         (?:mailto:)?
1289                         (
1290                                 (?:
1291                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1292                                 |
1293                                         ".*?"
1294                                 )
1295                                 \@
1296                                 (?:
1297                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1298                                 |
1299                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1300                                 )
1301                         )
1302                         >
1303                         }xi',
1304                         array(&$this, '_doAutoLinks_email_callback'), $text);
1305                 $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array(&$this, '_doAutoLinks_tel_callback'), $text);
1306
1307                 return $text;
1308         }
1309         protected function _doAutoLinks_tel_callback($matches) {
1310                 $url = $this->encodeAttribute($matches[1]);
1311                 $tel = $this->encodeAttribute($matches[2]);
1312                 $link = "<a href=\"$url\">$tel</a>";
1313                 return $this->hashPart($link);
1314         }
1315         protected function _doAutoLinks_url_callback($matches) {
1316                 $url = $this->encodeAttribute($matches[1]);
1317                 $link = "<a href=\"$url\">$url</a>";
1318                 return $this->hashPart($link);
1319         }
1320         protected function _doAutoLinks_email_callback($matches) {
1321                 $address = $matches[1];
1322                 $link = $this->encodeEmailAddress($address);
1323                 return $this->hashPart($link);
1324         }
1325
1326
1327         protected function encodeEmailAddress($addr) {
1328         #
1329         #       Input: an email address, e.g. "foo@example.com"
1330         #
1331         #       Output: the email address as a mailto link, with each character
1332         #               of the address encoded as either a decimal or hex entity, in
1333         #               the hopes of foiling most address harvesting spam bots. E.g.:
1334         #
1335         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1336         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1337         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1338         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1339         #
1340         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1341         #   With some optimizations by Milian Wolff.
1342         #
1343                 $addr = "mailto:" . $addr;
1344                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1345                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1346
1347                 foreach ($chars as $key => $char) {
1348                         $ord = ord($char);
1349                         # Ignore non-ascii chars.
1350                         if ($ord < 128) {
1351                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1352                                 # roughly 10% raw, 45% hex, 45% dec
1353                                 # '@' *must* be encoded. I insist.
1354                                 if ($r > 90 && $char != '@') /* do nothing */;
1355                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1356                                 else              $chars[$key] = '&#'.$ord.';';
1357                         }
1358                 }
1359
1360                 $addr = implode('', $chars);
1361                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1362                 $addr = "<a href=\"$addr\">$text</a>";
1363
1364                 return $addr;
1365         }
1366
1367
1368         protected function parseSpan($str) {
1369         #
1370         # Take the string $str and parse it into tokens, hashing embeded HTML,
1371         # escaped characters and handling code spans.
1372         #
1373                 $output = '';
1374
1375                 $span_re = '{
1376                                 (
1377                                         \\\\'.$this->escape_chars_re.'
1378                                 |
1379                                         (?<![`\\\\])
1380                                         `+                                              # code span marker
1381                         '.( $this->no_markup ? '' : '
1382                                 |
1383                                         <!--    .*?     -->             # comment
1384                                 |
1385                                         <\?.*?\?> | <%.*?%>             # processing instruction
1386                                 |
1387                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1388                                         (?>
1389                                                 \s
1390                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1391                                         )?
1392                                         >
1393                                 |
1394                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1395                                 |
1396                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1397                         ').'
1398                                 )
1399                                 }xs';
1400
1401                 while (1) {
1402                         #
1403                         # Each loop iteration seach for either the next tag, the next
1404                         # openning code span marker, or the next escaped character.
1405                         # Each token is then passed to handleSpanToken.
1406                         #
1407                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1408
1409                         # Create token from text preceding tag.
1410                         if ($parts[0] != "") {
1411                                 $output .= $parts[0];
1412                         }
1413
1414                         # Check if we reach the end.
1415                         if (isset($parts[1])) {
1416                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1417                                 $str = $parts[2];
1418                         }
1419                         else {
1420                                 break;
1421                         }
1422                 }
1423
1424                 return $output;
1425         }
1426
1427
1428         protected function handleSpanToken($token, &$str) {
1429         #
1430         # Handle $token provided by parseSpan by determining its nature and
1431         # returning the corresponding value that should replace it.
1432         #
1433                 switch ($token{0}) {
1434                         case "\\":
1435                                 return $this->hashPart("&#". ord($token{1}). ";");
1436                         case "`":
1437                                 # Search for end marker in remaining text.
1438                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1439                                         $str, $matches))
1440                                 {
1441                                         $str = $matches[2];
1442                                         $codespan = $this->makeCodeSpan($matches[1]);
1443                                         return $this->hashPart($codespan);
1444                                 }
1445                                 return $token; // return as text since no ending marker found.
1446                         default:
1447                                 return $this->hashPart($token);
1448                 }
1449         }
1450
1451
1452         protected function outdent($text) {
1453         #
1454         # Remove one level of line-leading tabs or spaces
1455         #
1456                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1457         }
1458
1459
1460         # String length function for detab. `_initDetab` will create a function to
1461         # hanlde UTF-8 if the default function does not exist.
1462         protected $utf8_strlen = 'mb_strlen';
1463
1464         protected function detab($text) {
1465         #
1466         # Replace tabs with the appropriate amount of space.
1467         #
1468                 # For each line we separate the line in blocks delemited by
1469                 # tab characters. Then we reconstruct every line by adding the
1470                 # appropriate number of space between each blocks.
1471
1472                 $text = preg_replace_callback('/^.*\t.*$/m',
1473                         array(&$this, '_detab_callback'), $text);
1474
1475                 return $text;
1476         }
1477         protected function _detab_callback($matches) {
1478                 $line = $matches[0];
1479                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1480
1481                 # Split in blocks.
1482                 $blocks = explode("\t", $line);
1483                 # Add each blocks to the line.
1484                 $line = $blocks[0];
1485                 unset($blocks[0]); # Do not add first block twice.
1486                 foreach ($blocks as $block) {
1487                         # Calculate amount of space, insert spaces, insert block.
1488                         $amount = $this->tab_width -
1489                                 $strlen($line, 'UTF-8') % $this->tab_width;
1490                         $line .= str_repeat(" ", $amount) . $block;
1491                 }
1492                 return $line;
1493         }
1494         protected function _initDetab() {
1495         #
1496         # Check for the availability of the function in the `utf8_strlen` property
1497         # (initially `mb_strlen`). If the function is not available, create a
1498         # function that will loosely count the number of UTF-8 characters with a
1499         # regular expression.
1500         #
1501                 if (function_exists($this->utf8_strlen)) return;
1502                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1503                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1504                         $text, $m);');
1505         }
1506
1507
1508         protected function unhash($text) {
1509         #
1510         # Swap back in all the tags hashed by _HashHTMLBlocks.
1511         #
1512                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1513                         array(&$this, '_unhash_callback'), $text);
1514         }
1515         protected function _unhash_callback($matches) {
1516                 return $this->html_hashes[$matches[0]];
1517         }
1518
1519 }
1520
1521
1522 #
1523 # Temporary Markdown Extra Parser Implementation Class
1524 #
1525 # NOTE: DON'T USE THIS CLASS
1526 # Currently the implementation of of Extra resides here in this temporary class.
1527 # This makes it easier to propagate the changes between the three different
1528 # packaging styles of PHP Markdown. When this issue is resolved, this
1529 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1530 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1531 # one.
1532 #
1533
1534 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1535
1536         ### Configuration Variables ###
1537
1538         # Prefix for footnote ids.
1539         public $fn_id_prefix = "";
1540
1541         # Optional title attribute for footnote links and backlinks.
1542         public $fn_link_title = "";
1543         public $fn_backlink_title = "";
1544
1545         # Optional class attribute for footnote links and backlinks.
1546         public $fn_link_class = "footnote-ref";
1547         public $fn_backlink_class = "footnote-backref";
1548
1549         # Class name for table cell alignment (%% replaced left/center/right)
1550         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1551         # If empty, the align attribute is used instead of a class name.
1552         public $table_align_class_tmpl = '';
1553
1554         # Optional class prefix for fenced code block.
1555         public $code_class_prefix = "";
1556         # Class attribute for code blocks goes on the `code` tag;
1557         # setting this to true will put attributes on the `pre` tag instead.
1558         public $code_attr_on_pre = false;
1559
1560         # Predefined abbreviations.
1561         public $predef_abbr = array();
1562
1563
1564         ### Parser Implementation ###
1565
1566         public function __construct() {
1567         #
1568         # Constructor function. Initialize the parser object.
1569         #
1570                 # Add extra escapable characters before parent constructor
1571                 # initialize the table.
1572                 $this->escape_chars .= ':|';
1573
1574                 # Insert extra document, block, and span transformations.
1575                 # Parent constructor will do the sorting.
1576                 $this->document_gamut += array(
1577                         "doFencedCodeBlocks" => 5,
1578                         "stripFootnotes"     => 15,
1579                         "stripAbbreviations" => 25,
1580                         "appendFootnotes"    => 50,
1581                         );
1582                 $this->block_gamut += array(
1583                         "doFencedCodeBlocks" => 5,
1584                         "doTables"           => 15,
1585                         "doDefLists"         => 45,
1586                         );
1587                 $this->span_gamut += array(
1588                         "doFootnotes"        => 5,
1589                         "doAbbreviations"    => 70,
1590                         );
1591
1592                 parent::__construct();
1593         }
1594
1595
1596         # Extra variables used during extra transformations.
1597         protected $footnotes = array();
1598         protected $footnotes_ordered = array();
1599         protected $footnotes_ref_count = array();
1600         protected $footnotes_numbers = array();
1601         protected $abbr_desciptions = array();
1602         protected $abbr_word_re = '';
1603
1604         # Give the current footnote number.
1605         protected $footnote_counter = 1;
1606
1607
1608         protected function setup() {
1609         #
1610         # Setting up Extra-specific variables.
1611         #
1612                 parent::setup();
1613
1614                 $this->footnotes = array();
1615                 $this->footnotes_ordered = array();
1616                 $this->footnotes_ref_count = array();
1617                 $this->footnotes_numbers = array();
1618                 $this->abbr_desciptions = array();
1619                 $this->abbr_word_re = '';
1620                 $this->footnote_counter = 1;
1621
1622                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1623                         if ($this->abbr_word_re)
1624                                 $this->abbr_word_re .= '|';
1625                         $this->abbr_word_re .= preg_quote($abbr_word);
1626                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1627                 }
1628         }
1629
1630         protected function teardown() {
1631         #
1632         # Clearing Extra-specific variables.
1633         #
1634                 $this->footnotes = array();
1635                 $this->footnotes_ordered = array();
1636                 $this->footnotes_ref_count = array();
1637                 $this->footnotes_numbers = array();
1638                 $this->abbr_desciptions = array();
1639                 $this->abbr_word_re = '';
1640
1641                 parent::teardown();
1642         }
1643
1644
1645         ### Extra Attribute Parser ###
1646
1647         # Expression to use to catch attributes (includes the braces)
1648         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}';
1649         # Expression to use when parsing in a context when no capture is desired
1650         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}';
1651
1652         protected function doExtraAttributes($tag_name, $attr) {
1653         #
1654         # Parse attributes caught by the $this->id_class_attr_catch_re expression
1655         # and return the HTML-formatted list of attributes.
1656         #
1657         # Currently supported attributes are .class and #id.
1658         #
1659                 if (empty($attr)) return "";
1660
1661                 # Split on components
1662                 preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches);
1663                 $elements = $matches[0];
1664
1665                 # handle classes and ids (only first id taken into account)
1666                 $classes = array();
1667                 $id = false;
1668                 foreach ($elements as $element) {
1669                         if ($element{0} == '.') {
1670                                 $classes[] = substr($element, 1);
1671                         } else if ($element{0} == '#') {
1672                                 if ($id === false) $id = substr($element, 1);
1673                         }
1674                 }
1675
1676                 # compose attributes as string
1677                 $attr_str = "";
1678                 if (!empty($id)) {
1679                         $attr_str .= ' id="'.$id.'"';
1680                 }
1681                 if (!empty($classes)) {
1682                         $attr_str .= ' class="'.implode(" ", $classes).'"';
1683                 }
1684                 return $attr_str;
1685         }
1686
1687
1688         protected function stripLinkDefinitions($text) {
1689         #
1690         # Strips link definitions from text, stores the URLs and titles in
1691         # hash references.
1692         #
1693                 $less_than_tab = $this->tab_width - 1;
1694
1695                 # Link defs are in the form: ^[id]: url "optional title"
1696                 $text = preg_replace_callback('{
1697                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
1698                                                           [ ]*
1699                                                           \n?                           # maybe *one* newline
1700                                                           [ ]*
1701                                                         (?:
1702                                                           <(.+?)>                       # url = $2
1703                                                         |
1704                                                           (\S+?)                        # url = $3
1705                                                         )
1706                                                           [ ]*
1707                                                           \n?                           # maybe one newline
1708                                                           [ ]*
1709                                                         (?:
1710                                                                 (?<=\s)                 # lookbehind for whitespace
1711                                                                 ["(]
1712                                                                 (.*?)                   # title = $4
1713                                                                 [")]
1714                                                                 [ ]*
1715                                                         )?      # title is optional
1716                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1717                                                         (?:\n+|\Z)
1718                         }xm',
1719                         array(&$this, '_stripLinkDefinitions_callback'),
1720                         $text);
1721                 return $text;
1722         }
1723         protected function _stripLinkDefinitions_callback($matches) {
1724                 $link_id = strtolower($matches[1]);
1725                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
1726                 $this->urls[$link_id] = $url;
1727                 $this->titles[$link_id] =& $matches[4];
1728                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1729                 return ''; # String that will replace the block
1730         }
1731
1732
1733         ### HTML Block Parser ###
1734
1735         # Tags that are always treated as block tags:
1736         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption';
1737
1738         # Tags treated as block tags only if the opening tag is alone on its line:
1739         protected $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1740
1741         # Tags where markdown="1" default to span mode:
1742         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1743
1744         # Tags which must not have their contents modified, no matter where
1745         # they appear:
1746         protected $clean_tags_re = 'script|math|svg';
1747
1748         # Tags that do not need to be closed.
1749         protected $auto_close_tags_re = 'hr|img|param|source|track';
1750
1751
1752         protected function hashHTMLBlocks($text) {
1753         #
1754         # Hashify HTML Blocks and "clean tags".
1755         #
1756         # We only want to do this for block-level HTML tags, such as headers,
1757         # lists, and tables. That's because we still want to wrap <p>s around
1758         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1759         # phrase emphasis, and spans. The list of tags we're looking for is
1760         # hard-coded.
1761         #
1762         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1763         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1764         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1765         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1766         # These two functions are calling each other. It's recursive!
1767         #
1768                 if ($this->no_markup)  return $text;
1769
1770                 #
1771                 # Call the HTML-in-Markdown hasher.
1772                 #
1773                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1774
1775                 return $text;
1776         }
1777         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1778                                                                                 $enclosing_tag_re = '', $span = false)
1779         {
1780         #
1781         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1782         #
1783         # *   $indent is the number of space to be ignored when checking for code
1784         #     blocks. This is important because if we don't take the indent into
1785         #     account, something like this (which looks right) won't work as expected:
1786         #
1787         #     <div>
1788         #         <div markdown="1">
1789         #         Hello World.  <-- Is this a Markdown code block or text?
1790         #         </div>  <-- Is this a Markdown code block or a real tag?
1791         #     <div>
1792         #
1793         #     If you don't like this, just don't indent the tag on which
1794         #     you apply the markdown="1" attribute.
1795         #
1796         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1797         #     tag with that name. Nested tags supported.
1798         #
1799         # *   If $span is true, text inside must treated as span. So any double
1800         #     newline will be replaced by a single newline so that it does not create
1801         #     paragraphs.
1802         #
1803         # Returns an array of that form: ( processed text , remaining text )
1804         #
1805                 if ($text === '') return array('', '');
1806
1807                 # Regex to check for the presense of newlines around a block tag.
1808                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1809                 $newline_after_re =
1810                         '{
1811                                 ^                                               # Start of text following the tag.
1812                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1813                                 [ ]*\n                                  # Must be followed by newline.
1814                         }xs';
1815
1816                 # Regex to match any tag.
1817                 $block_tag_re =
1818                         '{
1819                                 (                                       # $2: Capture whole tag.
1820                                         </?                                     # Any opening or closing tag.
1821                                                 (?>                             # Tag name.
1822                                                         '.$this->block_tags_re.'                        |
1823                                                         '.$this->context_block_tags_re.'        |
1824                                                         '.$this->clean_tags_re.'                |
1825                                                         (?!\s)'.$enclosing_tag_re.'
1826                                                 )
1827                                                 (?:
1828                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1829                                                         (?>
1830                                                                 ".*?"           |       # Double quotes (can contain `>`)
1831                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1832                                                                 .+?                             # Anything but quotes and `>`.
1833                                                         )*?
1834                                                 )?
1835                                         >                                       # End of tag.
1836                                 |
1837                                         <!--    .*?     -->     # HTML Comment
1838                                 |
1839                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1840                                 |
1841                                         <!\[CDATA\[.*?\]\]>     # CData Block
1842                                 '. ( !$span ? ' # If not in span.
1843                                 |
1844                                         # Indented code block
1845                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1846                                         [ ]{'.($indent+4).'}[^\n]* \n
1847                                         (?>
1848                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1849                                         )*
1850                                 |
1851                                         # Fenced code block marker
1852                                         (?<= ^ | \n )
1853                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1854                                                                         [ ]*
1855                                         (?:
1856                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
1857                                         |
1858                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
1859                                         )?
1860                                         [ ]*
1861                                         (?= \n )
1862                                 ' : '' ). ' # End (if not is span).
1863                                 |
1864                                         # Code span marker
1865                                         # Note, this regex needs to go after backtick fenced
1866                                         # code blocks but it should also be kept outside of the
1867                                         # "if not in span" condition adding backticks to the parser
1868                                         `+
1869                                 )
1870                         }xs';
1871
1872
1873                 $depth = 0;             # Current depth inside the tag tree.
1874                 $parsed = "";   # Parsed text that will be returned.
1875
1876                 #
1877                 # Loop through every tag until we find the closing tag of the parent
1878                 # or loop until reaching the end of text if no parent tag specified.
1879                 #
1880                 do {
1881                         #
1882                         # Split the text using the first $tag_match pattern found.
1883                         # Text before  pattern will be first in the array, text after
1884                         # pattern will be at the end, and between will be any catches made
1885                         # by the pattern.
1886                         #
1887                         $parts = preg_split($block_tag_re, $text, 2,
1888                                                                 PREG_SPLIT_DELIM_CAPTURE);
1889
1890                         # If in Markdown span mode, add a empty-string span-level hash
1891                         # after each newline to prevent triggering any block element.
1892                         if ($span) {
1893                                 $void = $this->hashPart("", ':');
1894                                 $newline = "$void\n";
1895                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1896                         }
1897
1898                         $parsed .= $parts[0]; # Text before current tag.
1899
1900                         # If end of $text has been reached. Stop loop.
1901                         if (count($parts) < 3) {
1902                                 $text = "";
1903                                 break;
1904                         }
1905
1906                         $tag  = $parts[1]; # Tag to handle.
1907                         $text = $parts[2]; # Remaining text after current tag.
1908                         $tag_re = preg_quote($tag); # For use in a regular expression.
1909
1910                         #
1911                         # Check for: Fenced code block marker.
1912                         # Note: need to recheck the whole tag to disambiguate backtick
1913                         # fences from code spans
1914                         #
1915                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1916                                 # Fenced code block marker: find matching end marker.
1917                                 $fence_indent = strlen($capture[1]); # use captured indent in re
1918                                 $fence_re = $capture[2]; # use captured fence in re
1919                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1920                                         $matches))
1921                                 {
1922                                         # End marker found: pass text unchanged until marker.
1923                                         $parsed .= $tag . $matches[0];
1924                                         $text = substr($text, strlen($matches[0]));
1925                                 }
1926                                 else {
1927                                         # No end marker: just skip it.
1928                                         $parsed .= $tag;
1929                                 }
1930                         }
1931                         #
1932                         # Check for: Indented code block.
1933                         #
1934                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1935                                 # Indented code block: pass it unchanged, will be handled
1936                                 # later.
1937                                 $parsed .= $tag;
1938                         }
1939                         #
1940                         # Check for: Code span marker
1941                         # Note: need to check this after backtick fenced code blocks
1942                         #
1943                         else if ($tag{0} == "`") {
1944                                 # Find corresponding end marker.
1945                                 $tag_re = preg_quote($tag);
1946                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1947                                         $text, $matches))
1948                                 {
1949                                         # End marker found: pass text unchanged until marker.
1950                                         $parsed .= $tag . $matches[0];
1951                                         $text = substr($text, strlen($matches[0]));
1952                                 }
1953                                 else {
1954                                         # Unmatched marker: just skip it.
1955                                         $parsed .= $tag;
1956                                 }
1957                         }
1958                         #
1959                         # Check for: Opening Block level tag or
1960                         #            Opening Context Block tag (like ins and del)
1961                         #               used as a block tag (tag is alone on it's line).
1962                         #
1963                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1964                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1965                                         preg_match($newline_before_re, $parsed) &&
1966                                         preg_match($newline_after_re, $text)    )
1967                                 )
1968                         {
1969                                 # Need to parse tag and following text using the HTML parser.
1970                                 list($block_text, $text) =
1971                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1972
1973                                 # Make sure it stays outside of any paragraph by adding newlines.
1974                                 $parsed .= "\n\n$block_text\n\n";
1975                         }
1976                         #
1977                         # Check for: Clean tag (like script, math)
1978                         #            HTML Comments, processing instructions.
1979                         #
1980                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1981                                 $tag{1} == '!' || $tag{1} == '?')
1982                         {
1983                                 # Need to parse tag and following text using the HTML parser.
1984                                 # (don't check for markdown attribute)
1985                                 list($block_text, $text) =
1986                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1987
1988                                 $parsed .= $block_text;
1989                         }
1990                         #
1991                         # Check for: Tag with same name as enclosing tag.
1992                         #
1993                         else if ($enclosing_tag_re !== '' &&
1994                                 # Same name as enclosing tag.
1995                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1996                         {
1997                                 #
1998                                 # Increase/decrease nested tag count.
1999                                 #
2000                                 if ($tag{1} == '/')                                             $depth--;
2001                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2002
2003                                 if ($depth < 0) {
2004                                         #
2005                                         # Going out of parent element. Clean up and break so we
2006                                         # return to the calling function.
2007                                         #
2008                                         $text = $tag . $text;
2009                                         break;
2010                                 }
2011
2012                                 $parsed .= $tag;
2013                         }
2014                         else {
2015                                 $parsed .= $tag;
2016                         }
2017                 } while ($depth >= 0);
2018
2019                 return array($parsed, $text);
2020         }
2021         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2022         #
2023         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2024         #
2025         # *   Calls $hash_method to convert any blocks.
2026         # *   Stops when the first opening tag closes.
2027         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2028         #     (it is not inside clean tags)
2029         #
2030         # Returns an array of that form: ( processed text , remaining text )
2031         #
2032                 if ($text === '') return array('', '');
2033
2034                 # Regex to match `markdown` attribute inside of a tag.
2035                 $markdown_attr_re = '
2036                         {
2037                                 \s*                     # Eat whitespace before the `markdown` attribute
2038                                 markdown
2039                                 \s*=\s*
2040                                 (?>
2041                                         (["\'])         # $1: quote delimiter
2042                                         (.*?)           # $2: attribute value
2043                                         \1                      # matching delimiter
2044                                 |
2045                                         ([^\s>]*)       # $3: unquoted attribute value
2046                                 )
2047                                 ()                              # $4: make $3 always defined (avoid warnings)
2048                         }xs';
2049
2050                 # Regex to match any tag.
2051                 $tag_re = '{
2052                                 (                                       # $2: Capture whole tag.
2053                                         </?                                     # Any opening or closing tag.
2054                                                 [\w:$]+                 # Tag name.
2055                                                 (?:
2056                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2057                                                         (?>
2058                                                                 ".*?"           |       # Double quotes (can contain `>`)
2059                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2060                                                                 .+?                             # Anything but quotes and `>`.
2061                                                         )*?
2062                                                 )?
2063                                         >                                       # End of tag.
2064                                 |
2065                                         <!--    .*?     -->     # HTML Comment
2066                                 |
2067                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2068                                 |
2069                                         <!\[CDATA\[.*?\]\]>     # CData Block
2070                                 )
2071                         }xs';
2072
2073                 $original_text = $text;         # Save original text in case of faliure.
2074
2075                 $depth          = 0;    # Current depth inside the tag tree.
2076                 $block_text     = "";   # Temporary text holder for current text.
2077                 $parsed         = "";   # Parsed text that will be returned.
2078
2079                 #
2080                 # Get the name of the starting tag.
2081                 # (This pattern makes $base_tag_name_re safe without quoting.)
2082                 #
2083                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2084                         $base_tag_name_re = $matches[1];
2085
2086                 #
2087                 # Loop through every tag until we find the corresponding closing tag.
2088                 #
2089                 do {
2090                         #
2091                         # Split the text using the first $tag_match pattern found.
2092                         # Text before  pattern will be first in the array, text after
2093                         # pattern will be at the end, and between will be any catches made
2094                         # by the pattern.
2095                         #
2096                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2097
2098                         if (count($parts) < 3) {
2099                                 #
2100                                 # End of $text reached with unbalenced tag(s).
2101                                 # In that case, we return original text unchanged and pass the
2102                                 # first character as filtered to prevent an infinite loop in the
2103                                 # parent function.
2104                                 #
2105                                 return array($original_text{0}, substr($original_text, 1));
2106                         }
2107
2108                         $block_text .= $parts[0]; # Text before current tag.
2109                         $tag         = $parts[1]; # Tag to handle.
2110                         $text        = $parts[2]; # Remaining text after current tag.
2111
2112                         #
2113                         # Check for: Auto-close tag (like <hr/>)
2114                         #                        Comments and Processing Instructions.
2115                         #
2116                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2117                                 $tag{1} == '!' || $tag{1} == '?')
2118                         {
2119                                 # Just add the tag to the block as if it was text.
2120                                 $block_text .= $tag;
2121                         }
2122                         else {
2123                                 #
2124                                 # Increase/decrease nested tag count. Only do so if
2125                                 # the tag's name match base tag's.
2126                                 #
2127                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2128                                         if ($tag{1} == '/')                                             $depth--;
2129                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2130                                 }
2131
2132                                 #
2133                                 # Check for `markdown="1"` attribute and handle it.
2134                                 #
2135                                 if ($md_attr &&
2136                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2137                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2138                                 {
2139                                         # Remove `markdown` attribute from opening tag.
2140                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2141
2142                                         # Check if text inside this tag must be parsed in span mode.
2143                                         $this->mode = $attr_m[2] . $attr_m[3];
2144                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2145                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2146
2147                                         # Calculate indent before tag.
2148                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2149                                                 $strlen = $this->utf8_strlen;
2150                                                 $indent = $strlen($matches[1], 'UTF-8');
2151                                         } else {
2152                                                 $indent = 0;
2153                                         }
2154
2155                                         # End preceding block with this tag.
2156                                         $block_text .= $tag;
2157                                         $parsed .= $this->$hash_method($block_text);
2158
2159                                         # Get enclosing tag name for the ParseMarkdown function.
2160                                         # (This pattern makes $tag_name_re safe without quoting.)
2161                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2162                                         $tag_name_re = $matches[1];
2163
2164                                         # Parse the content using the HTML-in-Markdown parser.
2165                                         list ($block_text, $text)
2166                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2167                                                         $tag_name_re, $span_mode);
2168
2169                                         # Outdent markdown text.
2170                                         if ($indent > 0) {
2171                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2172                                                                                                         $block_text);
2173                                         }
2174
2175                                         # Append tag content to parsed text.
2176                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2177                                         else                            $parsed .= "$block_text";
2178
2179                                         # Start over with a new block.
2180                                         $block_text = "";
2181                                 }
2182                                 else $block_text .= $tag;
2183                         }
2184
2185                 } while ($depth > 0);
2186
2187                 #
2188                 # Hash last block text that wasn't processed inside the loop.
2189                 #
2190                 $parsed .= $this->$hash_method($block_text);
2191
2192                 return array($parsed, $text);
2193         }
2194
2195
2196         protected function hashClean($text) {
2197         #
2198         # Called whenever a tag must be hashed when a function inserts a "clean" tag
2199         # in $text, it passes through this function and is automaticaly escaped,
2200         # blocking invalid nested overlap.
2201         #
2202                 return $this->hashPart($text, 'C');
2203         }
2204
2205
2206         protected function doAnchors($text) {
2207         #
2208         # Turn Markdown link shortcuts into XHTML <a> tags.
2209         #
2210                 if ($this->in_anchor) return $text;
2211                 $this->in_anchor = true;
2212
2213                 #
2214                 # First, handle reference-style links: [link text] [id]
2215                 #
2216                 $text = preg_replace_callback('{
2217                         (                                       # wrap whole match in $1
2218                           \[
2219                                 ('.$this->nested_brackets_re.') # link text = $2
2220                           \]
2221
2222                           [ ]?                          # one optional space
2223                           (?:\n[ ]*)?           # one optional newline followed by spaces
2224
2225                           \[
2226                                 (.*?)           # id = $3
2227                           \]
2228                         )
2229                         }xs',
2230                         array(&$this, '_doAnchors_reference_callback'), $text);
2231
2232                 #
2233                 # Next, inline-style links: [link text](url "optional title")
2234                 #
2235                 $text = preg_replace_callback('{
2236                         (                               # wrap whole match in $1
2237                           \[
2238                                 ('.$this->nested_brackets_re.') # link text = $2
2239                           \]
2240                           \(                    # literal paren
2241                                 [ \n]*
2242                                 (?:
2243                                         <(.+?)> # href = $3
2244                                 |
2245                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
2246                                 )
2247                                 [ \n]*
2248                                 (                       # $5
2249                                   ([\'"])       # quote char = $6
2250                                   (.*?)         # Title = $7
2251                                   \6            # matching quote
2252                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
2253                                 )?                      # title is optional
2254                           \)
2255                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2256                         )
2257                         }xs',
2258                         array(&$this, '_doAnchors_inline_callback'), $text);
2259
2260                 #
2261                 # Last, handle reference-style shortcuts: [link text]
2262                 # These must come last in case you've also got [link text][1]
2263                 # or [link text](/foo)
2264                 #
2265                 $text = preg_replace_callback('{
2266                         (                                       # wrap whole match in $1
2267                           \[
2268                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
2269                           \]
2270                         )
2271                         }xs',
2272                         array(&$this, '_doAnchors_reference_callback'), $text);
2273
2274                 $this->in_anchor = false;
2275                 return $text;
2276         }
2277         protected function _doAnchors_reference_callback($matches) {
2278                 $whole_match =  $matches[1];
2279                 $link_text   =  $matches[2];
2280                 $link_id     =& $matches[3];
2281
2282                 if ($link_id == "") {
2283                         # for shortcut links like [this][] or [this].
2284                         $link_id = $link_text;
2285                 }
2286
2287                 # lower-case and turn embedded newlines into spaces
2288                 $link_id = strtolower($link_id);
2289                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2290
2291                 if (isset($this->urls[$link_id])) {
2292                         $url = $this->urls[$link_id];
2293                         $url = $this->encodeAttribute($url);
2294
2295                         $result = "<a href=\"$url\"";
2296                         if ( isset( $this->titles[$link_id] ) ) {
2297                                 $title = $this->titles[$link_id];
2298                                 $title = $this->encodeAttribute($title);
2299                                 $result .=  " title=\"$title\"";
2300                         }
2301                         if (isset($this->ref_attr[$link_id]))
2302                                 $result .= $this->ref_attr[$link_id];
2303
2304                         $link_text = $this->runSpanGamut($link_text);
2305                         $result .= ">$link_text</a>";
2306                         $result = $this->hashPart($result);
2307                 }
2308                 else {
2309                         $result = $whole_match;
2310                 }
2311                 return $result;
2312         }
2313         protected function _doAnchors_inline_callback($matches) {
2314                 $whole_match    =  $matches[1];
2315                 $link_text              =  $this->runSpanGamut($matches[2]);
2316                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
2317                 $title                  =& $matches[7];
2318                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2319
2320
2321                 $url = $this->encodeAttribute($url);
2322
2323                 $result = "<a href=\"$url\"";
2324                 if (isset($title)) {
2325                         $title = $this->encodeAttribute($title);
2326                         $result .=  " title=\"$title\"";
2327                 }
2328                 $result .= $attr;
2329
2330                 $link_text = $this->runSpanGamut($link_text);
2331                 $result .= ">$link_text</a>";
2332
2333                 return $this->hashPart($result);
2334         }
2335
2336
2337         protected function doImages($text) {
2338         #
2339         # Turn Markdown image shortcuts into <img> tags.
2340         #
2341                 #
2342                 # First, handle reference-style labeled images: ![alt text][id]
2343                 #
2344                 $text = preg_replace_callback('{
2345                         (                               # wrap whole match in $1
2346                           !\[
2347                                 ('.$this->nested_brackets_re.')         # alt text = $2
2348                           \]
2349
2350                           [ ]?                          # one optional space
2351                           (?:\n[ ]*)?           # one optional newline followed by spaces
2352
2353                           \[
2354                                 (.*?)           # id = $3
2355                           \]
2356
2357                         )
2358                         }xs',
2359                         array(&$this, '_doImages_reference_callback'), $text);
2360
2361                 #
2362                 # Next, handle inline images:  ![alt text](url "optional title")
2363                 # Don't forget: encode * and _
2364                 #
2365                 $text = preg_replace_callback('{
2366                         (                               # wrap whole match in $1
2367                           !\[
2368                                 ('.$this->nested_brackets_re.')         # alt text = $2
2369                           \]
2370                           \s?                   # One optional whitespace character
2371                           \(                    # literal paren
2372                                 [ \n]*
2373                                 (?:
2374                                         <(\S*)> # src url = $3
2375                                 |
2376                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
2377                                 )
2378                                 [ \n]*
2379                                 (                       # $5
2380                                   ([\'"])       # quote char = $6
2381                                   (.*?)         # title = $7
2382                                   \6            # matching quote
2383                                   [ \n]*
2384                                 )?                      # title is optional
2385                           \)
2386                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2387                         )
2388                         }xs',
2389                         array(&$this, '_doImages_inline_callback'), $text);
2390
2391                 return $text;
2392         }
2393         protected function _doImages_reference_callback($matches) {
2394                 $whole_match = $matches[1];
2395                 $alt_text    = $matches[2];
2396                 $link_id     = strtolower($matches[3]);
2397
2398                 if ($link_id == "") {
2399                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2400                 }
2401
2402                 $alt_text = $this->encodeAttribute($alt_text);
2403                 if (isset($this->urls[$link_id])) {
2404                         $url = $this->encodeAttribute($this->urls[$link_id]);
2405                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
2406                         if (isset($this->titles[$link_id])) {
2407                                 $title = $this->titles[$link_id];
2408                                 $title = $this->encodeAttribute($title);
2409                                 $result .=  " title=\"$title\"";
2410                         }
2411                         if (isset($this->ref_attr[$link_id]))
2412                                 $result .= $this->ref_attr[$link_id];
2413                         $result .= $this->empty_element_suffix;
2414                         $result = $this->hashPart($result);
2415                 }
2416                 else {
2417                         # If there's no such link ID, leave intact:
2418                         $result = $whole_match;
2419                 }
2420
2421                 return $result;
2422         }
2423         protected function _doImages_inline_callback($matches) {
2424                 $whole_match    = $matches[1];
2425                 $alt_text               = $matches[2];
2426                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
2427                 $title                  =& $matches[7];
2428                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2429
2430                 $alt_text = $this->encodeAttribute($alt_text);
2431                 $url = $this->encodeAttribute($url);
2432                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2433                 if (isset($title)) {
2434                         $title = $this->encodeAttribute($title);
2435                         $result .=  " title=\"$title\""; # $title already quoted
2436                 }
2437                 $result .= $attr;
2438                 $result .= $this->empty_element_suffix;
2439
2440                 return $this->hashPart($result);
2441         }
2442
2443
2444         protected function doHeaders($text) {
2445         #
2446         # Redefined to add id and class attribute support.
2447         #
2448                 # Setext-style headers:
2449                 #         Header 1  {#header1}
2450                 #         ========
2451                 #
2452                 #         Header 2  {#header2 .class1 .class2}
2453                 #         --------
2454                 #
2455                 $text = preg_replace_callback(
2456                         '{
2457                                 (^.+?)                                                          # $1: Header text
2458                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2459                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2460                         }mx',
2461                         array(&$this, '_doHeaders_callback_setext'), $text);
2462
2463                 # atx-style headers:
2464                 #       # Header 1        {#header1}
2465                 #       ## Header 2       {#header2}
2466                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
2467                 #       ...
2468                 #       ###### Header 6   {.class2}
2469                 #
2470                 $text = preg_replace_callback('{
2471                                 ^(\#{1,6})      # $1 = string of #\'s
2472                                 [ ]*
2473                                 (.+?)           # $2 = Header text
2474                                 [ ]*
2475                                 \#*                     # optional closing #\'s (not counted)
2476                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2477                                 [ ]*
2478                                 \n+
2479                         }xm',
2480                         array(&$this, '_doHeaders_callback_atx'), $text);
2481
2482                 return $text;
2483         }
2484         protected function _doHeaders_callback_setext($matches) {
2485                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2486                         return $matches[0];
2487                 $level = $matches[3]{0} == '=' ? 1 : 2;
2488                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2489                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2490                 return "\n" . $this->hashBlock($block) . "\n\n";
2491         }
2492         protected function _doHeaders_callback_atx($matches) {
2493                 $level = strlen($matches[1]);
2494                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2495                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2496                 return "\n" . $this->hashBlock($block) . "\n\n";
2497         }
2498
2499
2500         protected function doTables($text) {
2501         #
2502         # Form HTML tables.
2503         #
2504                 $less_than_tab = $this->tab_width - 1;
2505                 #
2506                 # Find tables with leading pipe.
2507                 #
2508                 #       | Header 1 | Header 2
2509                 #       | -------- | --------
2510                 #       | Cell 1   | Cell 2
2511                 #       | Cell 3   | Cell 4
2512                 #
2513                 $text = preg_replace_callback('
2514                         {
2515                                 ^                                                       # Start of a line
2516                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2517                                 [|]                                                     # Optional leading pipe (present)
2518                                 (.+) \n                                         # $1: Header row (at least one pipe)
2519
2520                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2521                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2522
2523                                 (                                                       # $3: Cells
2524                                         (?>
2525                                                 [ ]*                            # Allowed whitespace.
2526                                                 [|] .* \n                       # Row content.
2527                                         )*
2528                                 )
2529                                 (?=\n|\Z)                                       # Stop at final double newline.
2530                         }xm',
2531                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2532
2533                 #
2534                 # Find tables without leading pipe.
2535                 #
2536                 #       Header 1 | Header 2
2537                 #       -------- | --------
2538                 #       Cell 1   | Cell 2
2539                 #       Cell 3   | Cell 4
2540                 #
2541                 $text = preg_replace_callback('
2542                         {
2543                                 ^                                                       # Start of a line
2544                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2545                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2546
2547                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2548                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2549
2550                                 (                                                       # $3: Cells
2551                                         (?>
2552                                                 .* [|] .* \n            # Row content
2553                                         )*
2554                                 )
2555                                 (?=\n|\Z)                                       # Stop at final double newline.
2556                         }xm',
2557                         array(&$this, '_DoTable_callback'), $text);
2558
2559                 return $text;
2560         }
2561         protected function _doTable_leadingPipe_callback($matches) {
2562                 $head           = $matches[1];
2563                 $underline      = $matches[2];
2564                 $content        = $matches[3];
2565
2566                 # Remove leading pipe for each row.
2567                 $content        = preg_replace('/^ *[|]/m', '', $content);
2568
2569                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2570         }
2571         protected function _doTable_makeAlignAttr($alignname)
2572         {
2573                 if (empty($this->table_align_class_tmpl))
2574                         return " align=\"$alignname\"";
2575
2576                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2577                 return " class=\"$classname\"";
2578         }
2579         protected function _doTable_callback($matches) {
2580                 $head           = $matches[1];
2581                 $underline      = $matches[2];
2582                 $content        = $matches[3];
2583
2584                 # Remove any tailing pipes for each line.
2585                 $head           = preg_replace('/[|] *$/m', '', $head);
2586                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2587                 $content        = preg_replace('/[|] *$/m', '', $content);
2588
2589                 # Reading alignement from header underline.
2590                 $separators     = preg_split('/ *[|] */', $underline);
2591                 foreach ($separators as $n => $s) {
2592                         if (preg_match('/^ *-+: *$/', $s))
2593                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
2594                         else if (preg_match('/^ *:-+: *$/', $s))
2595                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
2596                         else if (preg_match('/^ *:-+ *$/', $s))
2597                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
2598                         else
2599                                 $attr[$n] = '';
2600                 }
2601
2602                 # Parsing span elements, including code spans, character escapes,
2603                 # and inline HTML tags, so that pipes inside those gets ignored.
2604                 $head           = $this->parseSpan($head);
2605                 $headers        = preg_split('/ *[|] */', $head);
2606                 $col_count      = count($headers);
2607                 $attr       = array_pad($attr, $col_count, '');
2608
2609                 # Write column headers.
2610                 $text = "<table>\n";
2611                 $text .= "<thead>\n";
2612                 $text .= "<tr>\n";
2613                 foreach ($headers as $n => $header)
2614                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2615                 $text .= "</tr>\n";
2616                 $text .= "</thead>\n";
2617
2618                 # Split content by row.
2619                 $rows = explode("\n", trim($content, "\n"));
2620
2621                 $text .= "<tbody>\n";
2622                 foreach ($rows as $row) {
2623                         # Parsing span elements, including code spans, character escapes,
2624                         # and inline HTML tags, so that pipes inside those gets ignored.
2625                         $row = $this->parseSpan($row);
2626
2627                         # Split row by cell.
2628                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2629                         $row_cells = array_pad($row_cells, $col_count, '');
2630
2631                         $text .= "<tr>\n";
2632                         foreach ($row_cells as $n => $cell)
2633                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2634                         $text .= "</tr>\n";
2635                 }
2636                 $text .= "</tbody>\n";
2637                 $text .= "</table>";
2638
2639                 return $this->hashBlock($text) . "\n";
2640         }
2641
2642
2643         protected function doDefLists($text) {
2644         #
2645         # Form HTML definition lists.
2646         #
2647                 $less_than_tab = $this->tab_width - 1;
2648
2649                 # Re-usable pattern to match any entire dl list:
2650                 $whole_list_re = '(?>
2651                         (                                                               # $1 = whole list
2652                           (                                                             # $2
2653                                 [ ]{0,'.$less_than_tab.'}
2654                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2655                                 \n?
2656                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2657                           )
2658                           (?s:.+?)
2659                           (                                                             # $4
2660                                   \z
2661                                 |
2662                                   \n{2,}
2663                                   (?=\S)
2664                                   (?!                                           # Negative lookahead for another term
2665                                         [ ]{0,'.$less_than_tab.'}
2666                                         (?: \S.*\n )+?                  # defined term
2667                                         \n?
2668                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2669                                   )
2670                                   (?!                                           # Negative lookahead for another definition
2671                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2672                                   )
2673                           )
2674                         )
2675                 )'; // mx
2676
2677                 $text = preg_replace_callback('{
2678                                 (?>\A\n?|(?<=\n\n))
2679                                 '.$whole_list_re.'
2680                         }mx',
2681                         array(&$this, '_doDefLists_callback'), $text);
2682
2683                 return $text;
2684         }
2685         protected function _doDefLists_callback($matches) {
2686                 # Re-usable patterns to match list item bullets and number markers:
2687                 $list = $matches[1];
2688
2689                 # Turn double returns into triple returns, so that we can make a
2690                 # paragraph for the last item in a list, if necessary:
2691                 $result = trim($this->processDefListItems($list));
2692                 $result = "<dl>\n" . $result . "\n</dl>";
2693                 return $this->hashBlock($result) . "\n\n";
2694         }
2695
2696
2697         protected function processDefListItems($list_str) {
2698         #
2699         #       Process the contents of a single definition list, splitting it
2700         #       into individual term and definition list items.
2701         #
2702                 $less_than_tab = $this->tab_width - 1;
2703
2704                 # trim trailing blank lines:
2705                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2706
2707                 # Process definition terms.
2708                 $list_str = preg_replace_callback('{
2709                         (?>\A\n?|\n\n+)                                 # leading line
2710                         (                                                               # definition terms = $1
2711                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2712                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
2713                                                                                         #   mark (colon) or more whitespace.
2714                                 (?> \S.* \n)+?                          # actual term (not whitespace).
2715                         )
2716                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
2717                                                                                         #   with a definition mark.
2718                         }xm',
2719                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2720
2721                 # Process actual definitions.
2722                 $list_str = preg_replace_callback('{
2723                         \n(\n+)?                                                # leading line = $1
2724                         (                                                               # marker space = $2
2725                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2726                                 \:[ ]+                                          # definition mark (colon)
2727                         )
2728                         ((?s:.+?))                                              # definition text = $3
2729                         (?= \n+                                                 # stop at next definition mark,
2730                                 (?:                                                     # next term or end of text
2731                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
2732                                         <dt> | \z
2733                                 )
2734                         )
2735                         }xm',
2736                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2737
2738                 return $list_str;
2739         }
2740         protected function _processDefListItems_callback_dt($matches) {
2741                 $terms = explode("\n", trim($matches[1]));
2742                 $text = '';
2743                 foreach ($terms as $term) {
2744                         $term = $this->runSpanGamut(trim($term));
2745                         $text .= "\n<dt>" . $term . "</dt>";
2746                 }
2747                 return $text . "\n";
2748         }
2749         protected function _processDefListItems_callback_dd($matches) {
2750                 $leading_line   = $matches[1];
2751                 $marker_space   = $matches[2];
2752                 $def                    = $matches[3];
2753
2754                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2755                         # Replace marker with the appropriate whitespace indentation
2756                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2757                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2758                         $def = "\n". $def ."\n";
2759                 }
2760                 else {
2761                         $def = rtrim($def);
2762                         $def = $this->runSpanGamut($this->outdent($def));
2763                 }
2764
2765                 return "\n<dd>" . $def . "</dd>\n";
2766         }
2767
2768
2769         protected function doFencedCodeBlocks($text) {
2770         #
2771         # Adding the fenced code block syntax to regular Markdown:
2772         #
2773         # ~~~
2774         # Code block
2775         # ~~~
2776         #
2777                 $less_than_tab = $this->tab_width;
2778
2779                 $text = preg_replace_callback('{
2780                                 (?:\n|\A)
2781                                 # 1: Opening marker
2782                                 (
2783                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2784                                 )
2785                                 [ ]*
2786                                 (?:
2787                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2788                                 |
2789                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2790                                 )?
2791                                 [ ]* \n # Whitespace and newline following marker.
2792
2793                                 # 4: Content
2794                                 (
2795                                         (?>
2796                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2797                                                 .*\n+
2798                                         )+
2799                                 )
2800
2801                                 # Closing marker.
2802                                 \1 [ ]* (?= \n )
2803                         }xm',
2804                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
2805
2806                 return $text;
2807         }
2808         protected function _doFencedCodeBlocks_callback($matches) {
2809                 $classname =& $matches[2];
2810                 $attrs     =& $matches[3];
2811                 $codeblock = $matches[4];
2812                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2813                 $codeblock = preg_replace_callback('/^\n+/',
2814                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2815
2816                 if ($classname != "") {
2817                         if ($classname{0} == '.')
2818                                 $classname = substr($classname, 1);
2819                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2820                 } else {
2821                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2822                 }
2823                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2824                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2825                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2826
2827                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2828         }
2829         protected function _doFencedCodeBlocks_newlines($matches) {
2830                 return str_repeat("<br$this->empty_element_suffix",
2831                         strlen($matches[0]));
2832         }
2833
2834
2835         #
2836         # Redefining emphasis markers so that emphasis by underscore does not
2837         # work in the middle of a word.
2838         #
2839         protected $em_relist = array(
2840                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
2841                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2842                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2843                 );
2844         protected $strong_relist = array(
2845                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
2846                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2847                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2848                 );
2849         protected $em_strong_relist = array(
2850                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
2851                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2852                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2853                 );
2854
2855
2856         protected function formParagraphs($text) {
2857         #
2858         #       Params:
2859         #               $text - string to process with html <p> tags
2860         #
2861                 # Strip leading and trailing lines:
2862                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2863
2864                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2865
2866                 #
2867                 # Wrap <p> tags and unhashify HTML blocks
2868                 #
2869                 foreach ($grafs as $key => $value) {
2870                         $value = trim($this->runSpanGamut($value));
2871
2872                         # Check if this should be enclosed in a paragraph.
2873                         # Clean tag hashes & block tag hashes are left alone.
2874                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2875
2876                         if ($is_p) {
2877                                 $value = "<p>$value</p>";
2878                         }
2879                         $grafs[$key] = $value;
2880                 }
2881
2882                 # Join grafs in one text, then unhash HTML tags.
2883                 $text = implode("\n\n", $grafs);
2884
2885                 # Finish by removing any tag hashes still present in $text.
2886                 $text = $this->unhash($text);
2887
2888                 return $text;
2889         }
2890
2891
2892         ### Footnotes
2893
2894         protected function stripFootnotes($text) {
2895         #
2896         # Strips link definitions from text, stores the URLs and titles in
2897         # hash references.
2898         #
2899                 $less_than_tab = $this->tab_width - 1;
2900
2901                 # Link defs are in the form: [^id]: url "optional title"
2902                 $text = preg_replace_callback('{
2903                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2904                           [ ]*
2905                           \n?                                   # maybe *one* newline
2906                         (                                               # text = $2 (no blank lines allowed)
2907                                 (?:
2908                                         .+                              # actual text
2909                                 |
2910                                         \n                              # newlines but
2911                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2912                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2913                                                                         # by non-indented content
2914                                 )*
2915                         )
2916                         }xm',
2917                         array(&$this, '_stripFootnotes_callback'),
2918                         $text);
2919                 return $text;
2920         }
2921         protected function _stripFootnotes_callback($matches) {
2922                 $note_id = $this->fn_id_prefix . $matches[1];
2923                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2924                 return ''; # String that will replace the block
2925         }
2926
2927
2928         protected function doFootnotes($text) {
2929         #
2930         # Replace footnote references in $text [^id] with a special text-token
2931         # which will be replaced by the actual footnote marker in appendFootnotes.
2932         #
2933                 if (!$this->in_anchor) {
2934                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2935                 }
2936                 return $text;
2937         }
2938
2939
2940         protected function appendFootnotes($text) {
2941         #
2942         # Append footnote list to text.
2943         #
2944                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2945                         array(&$this, '_appendFootnotes_callback'), $text);
2946
2947                 if (!empty($this->footnotes_ordered)) {
2948                         $text .= "\n\n";
2949                         $text .= "<div class=\"footnotes\">\n";
2950                         $text .= "<hr". $this->empty_element_suffix ."\n";
2951                         $text .= "<ol>\n\n";
2952
2953                         $attr = "";
2954                         if ($this->fn_backlink_class != "") {
2955                                 $class = $this->fn_backlink_class;
2956                                 $class = $this->encodeAttribute($class);
2957                                 $attr .= " class=\"$class\"";
2958                         }
2959                         if ($this->fn_backlink_title != "") {
2960                                 $title = $this->fn_backlink_title;
2961                                 $title = $this->encodeAttribute($title);
2962                                 $attr .= " title=\"$title\"";
2963                         }
2964                         $num = 0;
2965
2966                         while (!empty($this->footnotes_ordered)) {
2967                                 $footnote = reset($this->footnotes_ordered);
2968                                 $note_id = key($this->footnotes_ordered);
2969                                 unset($this->footnotes_ordered[$note_id]);
2970                                 $ref_count = $this->footnotes_ref_count[$note_id];
2971                                 unset($this->footnotes_ref_count[$note_id]);
2972                                 unset($this->footnotes[$note_id]);
2973
2974                                 $footnote .= "\n"; # Need to append newline before parsing.
2975                                 $footnote = $this->runBlockGamut("$footnote\n");
2976                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2977                                         array(&$this, '_appendFootnotes_callback'), $footnote);
2978
2979                                 $attr = str_replace("%%", ++$num, $attr);
2980                                 $note_id = $this->encodeAttribute($note_id);
2981
2982                                 # Prepare backlink, multiple backlinks if multiple references
2983                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2984                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
2985                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
2986                                 }
2987                                 # Add backlink to last paragraph; create new paragraph if needed.
2988                                 if (preg_match('{</p>$}', $footnote)) {
2989                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2990                                 } else {
2991                                         $footnote .= "\n\n<p>$backlink</p>";
2992                                 }
2993
2994                                 $text .= "<li id=\"fn:$note_id\">\n";
2995                                 $text .= $footnote . "\n";
2996                                 $text .= "</li>\n\n";
2997                         }
2998
2999                         $text .= "</ol>\n";
3000                         $text .= "</div>";
3001                 }
3002                 return $text;
3003         }
3004         protected function _appendFootnotes_callback($matches) {
3005                 $node_id = $this->fn_id_prefix . $matches[1];
3006
3007                 # Create footnote marker only if it has a corresponding footnote *and*
3008                 # the footnote hasn't been used by another marker.
3009                 if (isset($this->footnotes[$node_id])) {
3010                         $num =& $this->footnotes_numbers[$node_id];
3011                         if (!isset($num)) {
3012                                 # Transfer footnote content to the ordered list and give it its
3013                                 # number
3014                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3015                                 $this->footnotes_ref_count[$node_id] = 1;
3016                                 $num = $this->footnote_counter++;
3017                                 $ref_count_mark = '';
3018                         } else {
3019                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3020                         }
3021
3022                         $attr = "";
3023                         if ($this->fn_link_class != "") {
3024                                 $class = $this->fn_link_class;
3025                                 $class = $this->encodeAttribute($class);
3026                                 $attr .= " class=\"$class\"";
3027                         }
3028                         if ($this->fn_link_title != "") {
3029                                 $title = $this->fn_link_title;
3030                                 $title = $this->encodeAttribute($title);
3031                                 $attr .= " title=\"$title\"";
3032                         }
3033
3034                         $attr = str_replace("%%", $num, $attr);
3035                         $node_id = $this->encodeAttribute($node_id);
3036
3037                         return
3038                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
3039                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
3040                                 "</sup>";
3041                 }
3042
3043                 return "[^".$matches[1]."]";
3044         }
3045
3046
3047         ### Abbreviations ###
3048
3049         protected function stripAbbreviations($text) {
3050         #
3051         # Strips abbreviations from text, stores titles in hash references.
3052         #
3053                 $less_than_tab = $this->tab_width - 1;
3054
3055                 # Link defs are in the form: [id]*: url "optional title"
3056                 $text = preg_replace_callback('{
3057                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
3058                         (.*)                                    # text = $2 (no blank lines allowed)
3059                         }xm',
3060                         array(&$this, '_stripAbbreviations_callback'),
3061                         $text);
3062                 return $text;
3063         }
3064         protected function _stripAbbreviations_callback($matches) {
3065                 $abbr_word = $matches[1];
3066                 $abbr_desc = $matches[2];
3067                 if ($this->abbr_word_re)
3068                         $this->abbr_word_re .= '|';
3069                 $this->abbr_word_re .= preg_quote($abbr_word);
3070                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3071                 return ''; # String that will replace the block
3072         }
3073
3074
3075         protected function doAbbreviations($text) {
3076         #
3077         # Find defined abbreviations in text and wrap them in <abbr> elements.
3078         #
3079                 if ($this->abbr_word_re) {
3080                         // cannot use the /x modifier because abbr_word_re may
3081                         // contain significant spaces:
3082                         $text = preg_replace_callback('{'.
3083                                 '(?<![\w\x1A])'.
3084                                 '(?:'.$this->abbr_word_re.')'.
3085                                 '(?![\w\x1A])'.
3086                                 '}',
3087                                 array(&$this, '_doAbbreviations_callback'), $text);
3088                 }
3089                 return $text;
3090         }
3091         protected function _doAbbreviations_callback($matches) {
3092                 $abbr = $matches[0];
3093                 if (isset($this->abbr_desciptions[$abbr])) {
3094                         $desc = $this->abbr_desciptions[$abbr];
3095                         if (empty($desc)) {
3096                                 return $this->hashPart("<abbr>$abbr</abbr>");
3097                         } else {
3098                                 $desc = $this->encodeAttribute($desc);
3099                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3100                         }
3101                 } else {
3102                         return $matches[0];
3103                 }
3104         }
3105
3106 }