library/php-markdown/Michelf/Markdown.php

   1 <?php
   2 #
   3 # Markdown  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown
   6 # Copyright (c) 2004-2014 Michel Fortin
   7 # <http://michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13 namespace Michelf;
  14
  15
  16 #
  17 # Markdown Parser Class
  18 #
  19
  20 class Markdown implements MarkdownInterface {
  21
  22         ### Version ###
  23
  24         const  MARKDOWNLIB_VERSION  =  "1.4.1";
  25
  26         ### Simple Function Interface ###
  27
  28         public static function defaultTransform($text) {
  29         #
  30         # Initialize the parser and return the result of its transform method.
  31         # This will work fine for derived classes too.
  32         #
  33                 # Take parser class on which this function was called.
  34                 $parser_class = \get_called_class();
  35
  36                 # try to take parser from the static parser list
  37                 static $parser_list;
  38                 $parser =& $parser_list[$parser_class];
  39
  40                 # create the parser it not already set
  41                 if (!$parser)
  42                         $parser = new $parser_class;
  43
  44                 # Transform text using parser.
  45                 return $parser->transform($text);
  46         }
  47
  48         ### Configuration Variables ###
  49
  50         # Change to ">" for HTML output.
  51         public $empty_element_suffix = " />";
  52         public $tab_width = 4;
  53
  54         # Change to `true` to disallow markup or entities.
  55         public $no_markup = false;
  56         public $no_entities = false;
  57
  58         # Predefined urls and titles for reference links and images.
  59         public $predef_urls = array();
  60         public $predef_titles = array();
  61
  62         # Optional filter function for URLs
  63         public $url_filter_func = null;
  64
  65
  66         ### Parser Implementation ###
  67
  68         # Regex to match balanced [brackets].
  69         # Needed to insert a maximum bracked depth while converting to PHP.
  70         protected $nested_brackets_depth = 6;
  71         protected $nested_brackets_re;
  72
  73         protected $nested_url_parenthesis_depth = 4;
  74         protected $nested_url_parenthesis_re;
  75
  76         # Table of hash values for escaped characters:
  77         protected $escape_chars = '\`*_{}[]()>#+-.!';
  78         protected $escape_chars_re;
  79
  80
  81         public function __construct() {
  82         #
  83         # Constructor function. Initialize appropriate member variables.
  84         #
  85                 $this->_initDetab();
  86                 $this->prepareItalicsAndBold();
  87
  88                 $this->nested_brackets_re =
  89                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  90                         str_repeat('\])*', $this->nested_brackets_depth);
  91
  92                 $this->nested_url_parenthesis_re =
  93                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  94                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  95
  96                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  97
  98                 # Sort document, block, and span gamut in ascendent priority order.
  99                 asort($this->document_gamut);
 100                 asort($this->block_gamut);
 101                 asort($this->span_gamut);
 102         }
 103
 104
 105         # Internal hashes used during transformation.
 106         protected $urls = array();
 107         protected $titles = array();
 108         protected $html_hashes = array();
 109
 110         # Status flag to avoid invalid nesting.
 111         protected $in_anchor = false;
 112
 113
 114         protected function setup() {
 115         #
 116         # Called before the transformation process starts to setup parser
 117         # states.
 118         #
 119                 # Clear global hashes.
 120                 $this->urls = $this->predef_urls;
 121                 $this->titles = $this->predef_titles;
 122                 $this->html_hashes = array();
 123
 124                 $this->in_anchor = false;
 125         }
 126
 127         protected function teardown() {
 128         #
 129         # Called after the transformation process to clear any variable
 130         # which may be taking up memory unnecessarly.
 131         #
 132                 $this->urls = array();
 133                 $this->titles = array();
 134                 $this->html_hashes = array();
 135         }
 136
 137
 138         public function transform($text) {
 139         #
 140         # Main function. Performs some preprocessing on the input text
 141         # and pass it through the document gamut.
 142         #
 143                 $this->setup();
 144
 145                 # Remove UTF-8 BOM and marker character in input, if present.
 146                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 147
 148                 # Standardize line endings:
 149                 #   DOS to Unix and Mac to Unix
 150                 $text = preg_replace('{\r\n?}', "\n", $text);
 151
 152                 # Make sure $text ends with a couple of newlines:
 153                 $text .= "\n\n";
 154
 155                 # Convert all tabs to spaces.
 156                 $text = $this->detab($text);
 157
 158                 # Turn block-level HTML blocks into hash entries
 159                 $text = $this->hashHTMLBlocks($text);
 160
 161                 # Strip any lines consisting only of spaces and tabs.
 162                 # This makes subsequent regexen easier to write, because we can
 163                 # match consecutive blank lines with /\n+/ instead of something
 164                 # contorted like /[ ]*\n+/ .
 165                 $text = preg_replace('/^[ ]+$/m', '', $text);
 166
 167                 # Run document gamut methods.
 168                 foreach ($this->document_gamut as $method => $priority) {
 169                         $text = $this->$method($text);
 170                 }
 171
 172                 $this->teardown();
 173
 174                 return $text . "\n";
 175         }
 176
 177         protected $document_gamut = array(
 178                 # Strip link definitions, store in hashes.
 179                 "stripLinkDefinitions" => 20,
 180
 181                 "runBasicBlockGamut"   => 30,
 182                 );
 183
 184
 185         protected function stripLinkDefinitions($text) {
 186         #
 187         # Strips link definitions from text, stores the URLs and titles in
 188         # hash references.
 189         #
 190                 $less_than_tab = $this->tab_width - 1;
 191
 192                 # Link defs are in the form: ^[id]: url "optional title"
 193                 $text = preg_replace_callback('{
 194                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 195                                                           [ ]*
 196                                                           \n?                           # maybe *one* newline
 197                                                           [ ]*
 198                                                         (?:
 199                                                           <(.+?)>                       # url = $2
 200                                                         |
 201                                                           (\S+?)                        # url = $3
 202                                                         )
 203                                                           [ ]*
 204                                                           \n?                           # maybe one newline
 205                                                           [ ]*
 206                                                         (?:
 207                                                                 (?<=\s)                 # lookbehind for whitespace
 208                                                                 ["(]
 209                                                                 (.*?)                   # title = $4
 210                                                                 [")]
 211                                                                 [ ]*
 212                                                         )?      # title is optional
 213                                                         (?:\n+|\Z)
 214                         }xm',
 215                         array($this, '_stripLinkDefinitions_callback'),
 216                         $text);
 217                 return $text;
 218         }
 219         protected function _stripLinkDefinitions_callback($matches) {
 220                 $link_id = strtolower($matches[1]);
 221                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 222                 $this->urls[$link_id] = $url;
 223                 $this->titles[$link_id] =& $matches[4];
 224                 return ''; # String that will replace the block
 225         }
 226
 227
 228         protected function hashHTMLBlocks($text) {
 229                 if ($this->no_markup)  return $text;
 230
 231                 $less_than_tab = $this->tab_width - 1;
 232
 233                 # Hashify HTML blocks:
 234                 # We only want to do this for block-level HTML tags, such as headers,
 235                 # lists, and tables. That's because we still want to wrap <p>s around
 236                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 237                 # phrase emphasis, and spans. The list of tags we're looking for is
 238                 # hard-coded:
 239                 #
 240                 # *  List "a" is made of tags which can be both inline or block-level.
 241                 #    These will be treated block-level when the start tag is alone on
 242                 #    its line, otherwise they're not matched here and will be taken as
 243                 #    inline later.
 244                 # *  List "b" is made of tags which are always block-level;
 245                 #
 246                 $block_tags_a_re = 'ins|del';
 247                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 248                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 249                                                    'article|section|nav|aside|hgroup|header|footer|'.
 250                                                    'figure';
 251
 252                 # Regular expression for the content of a block tag.
 253                 $nested_tags_level = 4;
 254                 $attr = '
 255                         (?>                             # optional tag attributes
 256                           \s                    # starts with whitespace
 257                           (?>
 258                                 [^>"/]+         # text outside quotes
 259                           |
 260                                 /+(?!>)         # slash not followed by ">"
 261                           |
 262                                 "[^"]*"         # text inside double quotes (tolerate ">")
 263                           |
 264                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 265                           )*
 266                         )?
 267                         ';
 268                 $content =
 269                         str_repeat('
 270                                 (?>
 271                                   [^<]+                 # content without tag
 272                                 |
 273                                   <\2                   # nested opening tag
 274                                         '.$attr.'       # attributes
 275                                         (?>
 276                                           />
 277                                         |
 278                                           >', $nested_tags_level).      # end of opening tag
 279                                           '.*?'.                                        # last level nested tag content
 280                         str_repeat('
 281                                           </\2\s*>      # closing nested tag
 282                                         )
 283                                   |
 284                                         <(?!/\2\s*>     # other tags with a different name
 285                                   )
 286                                 )*',
 287                                 $nested_tags_level);
 288                 $content2 = str_replace('\2', '\3', $content);
 289
 290                 # First, look for nested blocks, e.g.:
 291                 #       <div>
 292                 #               <div>
 293                 #               tags for inner block must be indented.
 294                 #               </div>
 295                 #       </div>
 296                 #
 297                 # The outermost tags must start at the left margin for this to match, and
 298                 # the inner nested divs must be indented.
 299                 # We need to do this before the next, more liberal match, because the next
 300                 # match will start at the first `<div>` and stop at the first `</div>`.
 301                 $text = preg_replace_callback('{(?>
 302                         (?>
 303                                 (?<=\n)                 # Starting on its own line
 304                                 |                               # or
 305                                 \A\n?                   # the at beginning of the doc
 306                         )
 307                         (                                               # save in $1
 308
 309                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 310                           # in between.
 311
 312                                                 [ ]{0,'.$less_than_tab.'}
 313                                                 <('.$block_tags_b_re.')# start tag = $2
 314                                                 '.$attr.'>                      # attributes followed by > and \n
 315                                                 '.$content.'            # content, support nesting
 316                                                 </\2>                           # the matching end tag
 317                                                 [ ]*                            # trailing spaces/tabs
 318                                                 (?=\n+|\Z)      # followed by a newline or end of document
 319
 320                         | # Special version for tags of group a.
 321
 322                                                 [ ]{0,'.$less_than_tab.'}
 323                                                 <('.$block_tags_a_re.')# start tag = $3
 324                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 325                                                 '.$content2.'           # content, support nesting
 326                                                 </\3>                           # the matching end tag
 327                                                 [ ]*                            # trailing spaces/tabs
 328                                                 (?=\n+|\Z)      # followed by a newline or end of document
 329
 330                         | # Special case just for <hr />. It was easier to make a special
 331                           # case than to make the other regex more complicated.
 332
 333                                                 [ ]{0,'.$less_than_tab.'}
 334                                                 <(hr)                           # start tag = $2
 335                                                 '.$attr.'                       # attributes
 336                                                 /?>                                     # the matching end tag
 337                                                 [ ]*
 338                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 339
 340                         | # Special case for standalone HTML comments:
 341
 342                                         [ ]{0,'.$less_than_tab.'}
 343                                         (?s:
 344                                                 <!-- .*? -->
 345                                         )
 346                                         [ ]*
 347                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 348
 349                         | # PHP and ASP-style processor instructions (<? and <%)
 350
 351                                         [ ]{0,'.$less_than_tab.'}
 352                                         (?s:
 353                                                 <([?%])                 # $2
 354                                                 .*?
 355                                                 \2>
 356                                         )
 357                                         [ ]*
 358                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 359
 360                         )
 361                         )}Sxmi',
 362                         array($this, '_hashHTMLBlocks_callback'),
 363                         $text);
 364
 365                 return $text;
 366         }
 367         protected function _hashHTMLBlocks_callback($matches) {
 368                 $text = $matches[1];
 369                 $key  = $this->hashBlock($text);
 370                 return "\n\n$key\n\n";
 371         }
 372
 373
 374         protected function hashPart($text, $boundary = 'X') {
 375         #
 376         # Called whenever a tag must be hashed when a function insert an atomic
 377         # element in the text stream. Passing $text to through this function gives
 378         # a unique text-token which will be reverted back when calling unhash.
 379         #
 380         # The $boundary argument specify what character should be used to surround
 381         # the token. By convension, "B" is used for block elements that needs not
 382         # to be wrapped into paragraph tags at the end, ":" is used for elements
 383         # that are word separators and "X" is used in the general case.
 384         #
 385                 # Swap back any tag hash found in $text so we do not have to `unhash`
 386                 # multiple times at the end.
 387                 $text = $this->unhash($text);
 388
 389                 # Then hash the block.
 390                 static $i = 0;
 391                 $key = "$boundary\x1A" . ++$i . $boundary;
 392                 $this->html_hashes[$key] = $text;
 393                 return $key; # String that will replace the tag.
 394         }
 395
 396
 397         protected function hashBlock($text) {
 398         #
 399         # Shortcut function for hashPart with block-level boundaries.
 400         #
 401                 return $this->hashPart($text, 'B');
 402         }
 403
 404
 405         protected $block_gamut = array(
 406         #
 407         # These are all the transformations that form block-level
 408         # tags like paragraphs, headers, and list items.
 409         #
 410                 "doHeaders"         => 10,
 411                 "doHorizontalRules" => 20,
 412
 413                 "doLists"           => 40,
 414                 "doCodeBlocks"      => 50,
 415                 "doBlockQuotes"     => 60,
 416                 );
 417
 418         protected function runBlockGamut($text) {
 419         #
 420         # Run block gamut tranformations.
 421         #
 422                 # We need to escape raw HTML in Markdown source before doing anything
 423                 # else. This need to be done for each block, and not only at the
 424                 # begining in the Markdown function since hashed blocks can be part of
 425                 # list items and could have been indented. Indented blocks would have
 426                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 427                 $text = $this->hashHTMLBlocks($text);
 428
 429                 return $this->runBasicBlockGamut($text);
 430         }
 431
 432         protected function runBasicBlockGamut($text) {
 433         #
 434         # Run block gamut tranformations, without hashing HTML blocks. This is
 435         # useful when HTML blocks are known to be already hashed, like in the first
 436         # whole-document pass.
 437         #
 438                 foreach ($this->block_gamut as $method => $priority) {
 439                         $text = $this->$method($text);
 440                 }
 441
 442                 # Finally form paragraph and restore hashed blocks.
 443                 $text = $this->formParagraphs($text);
 444
 445                 return $text;
 446         }
 447
 448
 449         protected function doHorizontalRules($text) {
 450                 # Do Horizontal Rules:
 451                 return preg_replace(
 452                         '{
 453                                 ^[ ]{0,3}       # Leading space
 454                                 ([-*_])         # $1: First marker
 455                                 (?>                     # Repeated marker group
 456                                         [ ]{0,2}        # Zero, one, or two spaces.
 457                                         \1                      # Marker character
 458                                 ){2,}           # Group repeated at least twice
 459                                 [ ]*            # Tailing spaces
 460                                 $                       # End of line.
 461                         }mx',
 462                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 463                         $text);
 464         }
 465
 466
 467         protected $span_gamut = array(
 468         #
 469         # These are all the transformations that occur *within* block-level
 470         # tags like paragraphs, headers, and list items.
 471         #
 472                 # Process character escapes, code spans, and inline HTML
 473                 # in one shot.
 474                 "parseSpan"           => -30,
 475
 476                 # Process anchor and image tags. Images must come first,
 477                 # because ![foo][f] looks like an anchor.
 478                 "doImages"            =>  10,
 479                 "doAnchors"           =>  20,
 480
 481                 # Make links out of things like `<http://example.com/>`
 482                 # Must come after doAnchors, because you can use < and >
 483                 # delimiters in inline links like [this](<url>).
 484                 "doAutoLinks"         =>  30,
 485                 "encodeAmpsAndAngles" =>  40,
 486
 487                 "doItalicsAndBold"    =>  50,
 488                 "doHardBreaks"        =>  60,
 489                 );
 490
 491         protected function runSpanGamut($text) {
 492         #
 493         # Run span gamut tranformations.
 494         #
 495                 foreach ($this->span_gamut as $method => $priority) {
 496                         $text = $this->$method($text);
 497                 }
 498
 499                 return $text;
 500         }
 501
 502
 503         protected function doHardBreaks($text) {
 504                 # Do hard breaks:
 505                 return preg_replace_callback('/ {2,}\n/',
 506                         array($this, '_doHardBreaks_callback'), $text);
 507         }
 508         protected function _doHardBreaks_callback($matches) {
 509                 return $this->hashPart("<br$this->empty_element_suffix\n");
 510         }
 511
 512
 513         protected function doAnchors($text) {
 514         #
 515         # Turn Markdown link shortcuts into XHTML <a> tags.
 516         #
 517                 if ($this->in_anchor) return $text;
 518                 $this->in_anchor = true;
 519
 520                 #
 521                 # First, handle reference-style links: [link text] [id]
 522                 #
 523                 $text = preg_replace_callback('{
 524                         (                                       # wrap whole match in $1
 525                           \[
 526                                 ('.$this->nested_brackets_re.') # link text = $2
 527                           \]
 528
 529                           [ ]?                          # one optional space
 530                           (?:\n[ ]*)?           # one optional newline followed by spaces
 531
 532                           \[
 533                                 (.*?)           # id = $3
 534                           \]
 535                         )
 536                         }xs',
 537                         array($this, '_doAnchors_reference_callback'), $text);
 538
 539                 #
 540                 # Next, inline-style links: [link text](url "optional title")
 541                 #
 542                 $text = preg_replace_callback('{
 543                         (                               # wrap whole match in $1
 544                           \[
 545                                 ('.$this->nested_brackets_re.') # link text = $2
 546                           \]
 547                           \(                    # literal paren
 548                                 [ \n]*
 549                                 (?:
 550                                         <(.+?)> # href = $3
 551                                 |
 552                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 553                                 )
 554                                 [ \n]*
 555                                 (                       # $5
 556                                   ([\'"])       # quote char = $6
 557                                   (.*?)         # Title = $7
 558                                   \6            # matching quote
 559                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 560                                 )?                      # title is optional
 561                           \)
 562                         )
 563                         }xs',
 564                         array($this, '_doAnchors_inline_callback'), $text);
 565
 566                 #
 567                 # Last, handle reference-style shortcuts: [link text]
 568                 # These must come last in case you've also got [link text][1]
 569                 # or [link text](/foo)
 570                 #
 571                 $text = preg_replace_callback('{
 572                         (                                       # wrap whole match in $1
 573                           \[
 574                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 575                           \]
 576                         )
 577                         }xs',
 578                         array($this, '_doAnchors_reference_callback'), $text);
 579
 580                 $this->in_anchor = false;
 581                 return $text;
 582         }
 583         protected function _doAnchors_reference_callback($matches) {
 584                 $whole_match =  $matches[1];
 585                 $link_text   =  $matches[2];
 586                 $link_id     =& $matches[3];
 587
 588                 if ($link_id == "") {
 589                         # for shortcut links like [this][] or [this].
 590                         $link_id = $link_text;
 591                 }
 592
 593                 # lower-case and turn embedded newlines into spaces
 594                 $link_id = strtolower($link_id);
 595                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 596
 597                 if (isset($this->urls[$link_id])) {
 598                         $url = $this->urls[$link_id];
 599                         $url = $this->encodeURLAttribute($url);
 600
 601                         $result = "<a href=\"$url\"";
 602                         if ( isset( $this->titles[$link_id] ) ) {
 603                                 $title = $this->titles[$link_id];
 604                                 $title = $this->encodeAttribute($title);
 605                                 $result .=  " title=\"$title\"";
 606                         }
 607
 608                         $link_text = $this->runSpanGamut($link_text);
 609                         $result .= ">$link_text</a>";
 610                         $result = $this->hashPart($result);
 611                 }
 612                 else {
 613                         $result = $whole_match;
 614                 }
 615                 return $result;
 616         }
 617         protected function _doAnchors_inline_callback($matches) {
 618                 $whole_match    =  $matches[1];
 619                 $link_text              =  $this->runSpanGamut($matches[2]);
 620                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 621                 $title                  =& $matches[7];
 622
 623                 // if the URL was of the form <s p a c e s> it got caught by the HTML
 624                 // tag parser and hashed. Need to reverse the process before using the URL.
 625                 $unhashed = $this->unhash($url);
 626                 if ($unhashed != $url)
 627                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 628
 629                 $url = $this->encodeURLAttribute($url);
 630
 631                 $result = "<a href=\"$url\"";
 632                 if (isset($title)) {
 633                         $title = $this->encodeAttribute($title);
 634                         $result .=  " title=\"$title\"";
 635                 }
 636
 637                 $link_text = $this->runSpanGamut($link_text);
 638                 $result .= ">$link_text</a>";
 639
 640                 return $this->hashPart($result);
 641         }
 642
 643
 644         protected function doImages($text) {
 645         #
 646         # Turn Markdown image shortcuts into <img> tags.
 647         #
 648                 #
 649                 # First, handle reference-style labeled images: ![alt text][id]
 650                 #
 651                 $text = preg_replace_callback('{
 652                         (                               # wrap whole match in $1
 653                           !\[
 654                                 ('.$this->nested_brackets_re.')         # alt text = $2
 655                           \]
 656
 657                           [ ]?                          # one optional space
 658                           (?:\n[ ]*)?           # one optional newline followed by spaces
 659
 660                           \[
 661                                 (.*?)           # id = $3
 662                           \]
 663
 664                         )
 665                         }xs',
 666                         array($this, '_doImages_reference_callback'), $text);
 667
 668                 #
 669                 # Next, handle inline images:  ![alt text](url "optional title")
 670                 # Don't forget: encode * and _
 671                 #
 672                 $text = preg_replace_callback('{
 673                         (                               # wrap whole match in $1
 674                           !\[
 675                                 ('.$this->nested_brackets_re.')         # alt text = $2
 676                           \]
 677                           \s?                   # One optional whitespace character
 678                           \(                    # literal paren
 679                                 [ \n]*
 680                                 (?:
 681                                         <(\S*)> # src url = $3
 682                                 |
 683                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 684                                 )
 685                                 [ \n]*
 686                                 (                       # $5
 687                                   ([\'"])       # quote char = $6
 688                                   (.*?)         # title = $7
 689                                   \6            # matching quote
 690                                   [ \n]*
 691                                 )?                      # title is optional
 692                           \)
 693                         )
 694                         }xs',
 695                         array($this, '_doImages_inline_callback'), $text);
 696
 697                 return $text;
 698         }
 699         protected function _doImages_reference_callback($matches) {
 700                 $whole_match = $matches[1];
 701                 $alt_text    = $matches[2];
 702                 $link_id     = strtolower($matches[3]);
 703
 704                 if ($link_id == "") {
 705                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 706                 }
 707
 708                 $alt_text = $this->encodeAttribute($alt_text);
 709                 if (isset($this->urls[$link_id])) {
 710                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 711                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 712                         if (isset($this->titles[$link_id])) {
 713                                 $title = $this->titles[$link_id];
 714                                 $title = $this->encodeAttribute($title);
 715                                 $result .=  " title=\"$title\"";
 716                         }
 717                         $result .= $this->empty_element_suffix;
 718                         $result = $this->hashPart($result);
 719                 }
 720                 else {
 721                         # If there's no such link ID, leave intact:
 722                         $result = $whole_match;
 723                 }
 724
 725                 return $result;
 726         }
 727         protected function _doImages_inline_callback($matches) {
 728                 $whole_match    = $matches[1];
 729                 $alt_text               = $matches[2];
 730                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 731                 $title                  =& $matches[7];
 732
 733                 $alt_text = $this->encodeAttribute($alt_text);
 734                 $url = $this->encodeURLAttribute($url);
 735                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 736                 if (isset($title)) {
 737                         $title = $this->encodeAttribute($title);
 738                         $result .=  " title=\"$title\""; # $title already quoted
 739                 }
 740                 $result .= $this->empty_element_suffix;
 741
 742                 return $this->hashPart($result);
 743         }
 744
 745
 746         protected function doHeaders($text) {
 747                 # Setext-style headers:
 748                 #         Header 1
 749                 #         ========
 750                 #
 751                 #         Header 2
 752                 #         --------
 753                 #
 754                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 755                         array($this, '_doHeaders_callback_setext'), $text);
 756
 757                 # atx-style headers:
 758                 #       # Header 1
 759                 #       ## Header 2
 760                 #       ## Header 2 with closing hashes ##
 761                 #       ...
 762                 #       ###### Header 6
 763                 #
 764                 $text = preg_replace_callback('{
 765                                 ^(\#{1,6})      # $1 = string of #\'s
 766                                 [ ]*
 767                                 (.+?)           # $2 = Header text
 768                                 [ ]*
 769                                 \#*                     # optional closing #\'s (not counted)
 770                                 \n+
 771                         }xm',
 772                         array($this, '_doHeaders_callback_atx'), $text);
 773
 774                 return $text;
 775         }
 776         protected function _doHeaders_callback_setext($matches) {
 777                 # Terrible hack to check we haven't found an empty list item.
 778                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 779                         return $matches[0];
 780
 781                 $level = $matches[2]{0} == '=' ? 1 : 2;
 782                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 783                 return "\n" . $this->hashBlock($block) . "\n\n";
 784         }
 785         protected function _doHeaders_callback_atx($matches) {
 786                 $level = strlen($matches[1]);
 787                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 788                 return "\n" . $this->hashBlock($block) . "\n\n";
 789         }
 790
 791
 792         protected function doLists($text) {
 793         #
 794         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 795         #
 796                 $less_than_tab = $this->tab_width - 1;
 797
 798                 # Re-usable patterns to match list item bullets and number markers:
 799                 $marker_ul_re  = '[*+-]';
 800                 $marker_ol_re  = '\d+[\.]';
 801
 802                 $markers_relist = array(
 803                         $marker_ul_re => $marker_ol_re,
 804                         $marker_ol_re => $marker_ul_re,
 805                         );
 806
 807                 foreach ($markers_relist as $marker_re => $other_marker_re) {
 808                         # Re-usable pattern to match any entirel ul or ol list:
 809                         $whole_list_re = '
 810                                 (                                                               # $1 = whole list
 811                                   (                                                             # $2
 812                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
 813                                         ('.$marker_re.')                        # $4 = first list item marker
 814                                         [ ]+
 815                                   )
 816                                   (?s:.+?)
 817                                   (                                                             # $5
 818                                           \z
 819                                         |
 820                                           \n{2,}
 821                                           (?=\S)
 822                                           (?!                                           # Negative lookahead for another list item marker
 823                                                 [ ]*
 824                                                 '.$marker_re.'[ ]+
 825                                           )
 826                                         |
 827                                           (?=                                           # Lookahead for another kind of list
 828                                             \n
 829                                                 \3                                              # Must have the same indentation
 830                                                 '.$other_marker_re.'[ ]+
 831                                           )
 832                                   )
 833                                 )
 834                         '; // mx
 835
 836                         # We use a different prefix before nested lists than top-level lists.
 837                         # See extended comment in _ProcessListItems().
 838
 839                         if ($this->list_level) {
 840                                 $text = preg_replace_callback('{
 841                                                 ^
 842                                                 '.$whole_list_re.'
 843                                         }mx',
 844                                         array($this, '_doLists_callback'), $text);
 845                         }
 846                         else {
 847                                 $text = preg_replace_callback('{
 848                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
 849                                                 '.$whole_list_re.'
 850                                         }mx',
 851                                         array($this, '_doLists_callback'), $text);
 852                         }
 853                 }
 854
 855                 return $text;
 856         }
 857         protected function _doLists_callback($matches) {
 858                 # Re-usable patterns to match list item bullets and number markers:
 859                 $marker_ul_re  = '[*+-]';
 860                 $marker_ol_re  = '\d+[\.]';
 861                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 862
 863                 $list = $matches[1];
 864                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 865
 866                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 867
 868                 $list .= "\n";
 869                 $result = $this->processListItems($list, $marker_any_re);
 870
 871                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 872                 return "\n". $result ."\n\n";
 873         }
 874
 875         protected $list_level = 0;
 876
 877         protected function processListItems($list_str, $marker_any_re) {
 878         #
 879         #       Process the contents of a single ordered or unordered list, splitting it
 880         #       into individual list items.
 881         #
 882                 # The $this->list_level global keeps track of when we're inside a list.
 883                 # Each time we enter a list, we increment it; when we leave a list,
 884                 # we decrement. If it's zero, we're not in a list anymore.
 885                 #
 886                 # We do this because when we're not inside a list, we want to treat
 887                 # something like this:
 888                 #
 889                 #               I recommend upgrading to version
 890                 #               8. Oops, now this line is treated
 891                 #               as a sub-list.
 892                 #
 893                 # As a single paragraph, despite the fact that the second line starts
 894                 # with a digit-period-space sequence.
 895                 #
 896                 # Whereas when we're inside a list (or sub-list), that line will be
 897                 # treated as the start of a sub-list. What a kludge, huh? This is
 898                 # an aspect of Markdown's syntax that's hard to parse perfectly
 899                 # without resorting to mind-reading. Perhaps the solution is to
 900                 # change the syntax rules such that sub-lists must start with a
 901                 # starting cardinal number; e.g. "1." or "a.".
 902
 903                 $this->list_level++;
 904
 905                 # trim trailing blank lines:
 906                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 907
 908                 $list_str = preg_replace_callback('{
 909                         (\n)?                                                   # leading line = $1
 910                         (^[ ]*)                                                 # leading whitespace = $2
 911                         ('.$marker_any_re.'                             # list marker and space = $3
 912                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
 913                         )
 914                         ((?s:.*?))                                              # list item text   = $4
 915                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
 916                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 917                         }xm',
 918                         array($this, '_processListItems_callback'), $list_str);
 919
 920                 $this->list_level--;
 921                 return $list_str;
 922         }
 923         protected function _processListItems_callback($matches) {
 924                 $item = $matches[4];
 925                 $leading_line =& $matches[1];
 926                 $leading_space =& $matches[2];
 927                 $marker_space = $matches[3];
 928                 $tailing_blank_line =& $matches[5];
 929
 930                 if ($leading_line || $tailing_blank_line ||
 931                         preg_match('/\n{2,}/', $item))
 932                 {
 933                         # Replace marker with the appropriate whitespace indentation
 934                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 935                         $item = $this->runBlockGamut($this->outdent($item)."\n");
 936                 }
 937                 else {
 938                         # Recursion for sub-lists:
 939                         $item = $this->doLists($this->outdent($item));
 940                         $item = preg_replace('/\n+$/', '', $item);
 941                         $item = $this->runSpanGamut($item);
 942                 }
 943
 944                 return "<li>" . $item . "</li>\n";
 945         }
 946
 947
 948         protected function doCodeBlocks($text) {
 949         #
 950         #       Process Markdown `<pre><code>` blocks.
 951         #
 952                 $text = preg_replace_callback('{
 953                                 (?:\n\n|\A\n?)
 954                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
 955                                   (?>
 956                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
 957                                         .*\n+
 958                                   )+
 959                                 )
 960                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
 961                         }xm',
 962                         array($this, '_doCodeBlocks_callback'), $text);
 963
 964                 return $text;
 965         }
 966         protected function _doCodeBlocks_callback($matches) {
 967                 $codeblock = $matches[1];
 968
 969                 $codeblock = $this->outdent($codeblock);
 970                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 971
 972                 # trim leading newlines and trailing newlines
 973                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
 974
 975                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
 976                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
 977         }
 978
 979
 980         protected function makeCodeSpan($code) {
 981         #
 982         # Create a code span markup for $code. Called from handleSpanToken.
 983         #
 984                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
 985                 return $this->hashPart("<code>$code</code>");
 986         }
 987
 988
 989         protected $em_relist = array(
 990                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
 991                 '*' => '(?<![\s*])\*(?!\*)',
 992                 '_' => '(?<![\s_])_(?!_)',
 993                 );
 994         protected $strong_relist = array(
 995                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
 996                 '**' => '(?<![\s*])\*\*(?!\*)',
 997                 '__' => '(?<![\s_])__(?!_)',
 998                 );
 999         protected $em_strong_relist = array(
1000                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1001                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1002                 '___' => '(?<![\s_])___(?!_)',
1003                 );
1004         protected $em_strong_prepared_relist;
1005
1006         protected function prepareItalicsAndBold() {
1007         #
1008         # Prepare regular expressions for searching emphasis tokens in any
1009         # context.
1010         #
1011                 foreach ($this->em_relist as $em => $em_re) {
1012                         foreach ($this->strong_relist as $strong => $strong_re) {
1013                                 # Construct list of allowed token expressions.
1014                                 $token_relist = array();
1015                                 if (isset($this->em_strong_relist["$em$strong"])) {
1016                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1017                                 }
1018                                 $token_relist[] = $em_re;
1019                                 $token_relist[] = $strong_re;
1020
1021                                 # Construct master expression from list.
1022                                 $token_re = '{('. implode('|', $token_relist) .')}';
1023                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1024                         }
1025                 }
1026         }
1027
1028         protected function doItalicsAndBold($text) {
1029                 $token_stack = array('');
1030                 $text_stack = array('');
1031                 $em = '';
1032                 $strong = '';
1033                 $tree_char_em = false;
1034
1035                 while (1) {
1036                         #
1037                         # Get prepared regular expression for seraching emphasis tokens
1038                         # in current context.
1039                         #
1040                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1041
1042                         #
1043                         # Each loop iteration search for the next emphasis token.
1044                         # Each token is then passed to handleSpanToken.
1045                         #
1046                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1047                         $text_stack[0] .= $parts[0];
1048                         $token =& $parts[1];
1049                         $text =& $parts[2];
1050
1051                         if (empty($token)) {
1052                                 # Reached end of text span: empty stack without emitting.
1053                                 # any more emphasis.
1054                                 while ($token_stack[0]) {
1055                                         $text_stack[1] .= array_shift($token_stack);
1056                                         $text_stack[0] .= array_shift($text_stack);
1057                                 }
1058                                 break;
1059                         }
1060
1061                         $token_len = strlen($token);
1062                         if ($tree_char_em) {
1063                                 # Reached closing marker while inside a three-char emphasis.
1064                                 if ($token_len == 3) {
1065                                         # Three-char closing marker, close em and strong.
1066                                         array_shift($token_stack);
1067                                         $span = array_shift($text_stack);
1068                                         $span = $this->runSpanGamut($span);
1069                                         $span = "<strong><em>$span</em></strong>";
1070                                         $text_stack[0] .= $this->hashPart($span);
1071                                         $em = '';
1072                                         $strong = '';
1073                                 } else {
1074                                         # Other closing marker: close one em or strong and
1075                                         # change current token state to match the other
1076                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1077                                         $tag = $token_len == 2 ? "strong" : "em";
1078                                         $span = $text_stack[0];
1079                                         $span = $this->runSpanGamut($span);
1080                                         $span = "<$tag>$span</$tag>";
1081                                         $text_stack[0] = $this->hashPart($span);
1082                                         $$tag = ''; # $$tag stands for $em or $strong
1083                                 }
1084                                 $tree_char_em = false;
1085                         } else if ($token_len == 3) {
1086                                 if ($em) {
1087                                         # Reached closing marker for both em and strong.
1088                                         # Closing strong marker:
1089                                         for ($i = 0; $i < 2; ++$i) {
1090                                                 $shifted_token = array_shift($token_stack);
1091                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1092                                                 $span = array_shift($text_stack);
1093                                                 $span = $this->runSpanGamut($span);
1094                                                 $span = "<$tag>$span</$tag>";
1095                                                 $text_stack[0] .= $this->hashPart($span);
1096                                                 $$tag = ''; # $$tag stands for $em or $strong
1097                                         }
1098                                 } else {
1099                                         # Reached opening three-char emphasis marker. Push on token
1100                                         # stack; will be handled by the special condition above.
1101                                         $em = $token{0};
1102                                         $strong = "$em$em";
1103                                         array_unshift($token_stack, $token);
1104                                         array_unshift($text_stack, '');
1105                                         $tree_char_em = true;
1106                                 }
1107                         } else if ($token_len == 2) {
1108                                 if ($strong) {
1109                                         # Unwind any dangling emphasis marker:
1110                                         if (strlen($token_stack[0]) == 1) {
1111                                                 $text_stack[1] .= array_shift($token_stack);
1112                                                 $text_stack[0] .= array_shift($text_stack);
1113                                         }
1114                                         # Closing strong marker:
1115                                         array_shift($token_stack);
1116                                         $span = array_shift($text_stack);
1117                                         $span = $this->runSpanGamut($span);
1118                                         $span = "<strong>$span</strong>";
1119                                         $text_stack[0] .= $this->hashPart($span);
1120                                         $strong = '';
1121                                 } else {
1122                                         array_unshift($token_stack, $token);
1123                                         array_unshift($text_stack, '');
1124                                         $strong = $token;
1125                                 }
1126                         } else {
1127                                 # Here $token_len == 1
1128                                 if ($em) {
1129                                         if (strlen($token_stack[0]) == 1) {
1130                                                 # Closing emphasis marker:
1131                                                 array_shift($token_stack);
1132                                                 $span = array_shift($text_stack);
1133                                                 $span = $this->runSpanGamut($span);
1134                                                 $span = "<em>$span</em>";
1135                                                 $text_stack[0] .= $this->hashPart($span);
1136                                                 $em = '';
1137                                         } else {
1138                                                 $text_stack[0] .= $token;
1139                                         }
1140                                 } else {
1141                                         array_unshift($token_stack, $token);
1142                                         array_unshift($text_stack, '');
1143                                         $em = $token;
1144                                 }
1145                         }
1146                 }
1147                 return $text_stack[0];
1148         }
1149
1150
1151         protected function doBlockQuotes($text) {
1152                 $text = preg_replace_callback('/
1153                           (                                                             # Wrap whole match in $1
1154                                 (?>
1155                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1156                                         .+\n                                    # rest of the first line
1157                                   (.+\n)*                                       # subsequent consecutive lines
1158                                   \n*                                           # blanks
1159                                 )+
1160                           )
1161                         /xm',
1162                         array($this, '_doBlockQuotes_callback'), $text);
1163
1164                 return $text;
1165         }
1166         protected function _doBlockQuotes_callback($matches) {
1167                 $bq = $matches[1];
1168                 # trim one level of quoting - trim whitespace-only lines
1169                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1170                 $bq = $this->runBlockGamut($bq);                # recurse
1171
1172                 $bq = preg_replace('/^/m', "  ", $bq);
1173                 # These leading spaces cause problem with <pre> content,
1174                 # so we need to fix that:
1175                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1176                         array($this, '_doBlockQuotes_callback2'), $bq);
1177
1178                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1179         }
1180         protected function _doBlockQuotes_callback2($matches) {
1181                 $pre = $matches[1];
1182                 $pre = preg_replace('/^  /m', '', $pre);
1183                 return $pre;
1184         }
1185
1186
1187         protected function formParagraphs($text) {
1188         #
1189         #       Params:
1190         #               $text - string to process with html <p> tags
1191         #
1192                 # Strip leading and trailing lines:
1193                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1194
1195                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1196
1197                 #
1198                 # Wrap <p> tags and unhashify HTML blocks
1199                 #
1200                 foreach ($grafs as $key => $value) {
1201                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1202                                 # Is a paragraph.
1203                                 $value = $this->runSpanGamut($value);
1204                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1205                                 $value .= "</p>";
1206                                 $grafs[$key] = $this->unhash($value);
1207                         }
1208                         else {
1209                                 # Is a block.
1210                                 # Modify elements of @grafs in-place...
1211                                 $graf = $value;
1212                                 $block = $this->html_hashes[$graf];
1213                                 $graf = $block;
1214 //                              if (preg_match('{
1215 //                                      \A
1216 //                                      (                                                       # $1 = <div> tag
1217 //                                        <div  \s+
1218 //                                        [^>]*
1219 //                                        \b
1220 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1221 //                                        1
1222 //                                        \2
1223 //                                        [^>]*
1224 //                                        >
1225 //                                      )
1226 //                                      (                                                       # $3 = contents
1227 //                                      .*
1228 //                                      )
1229 //                                      (</div>)                                        # $4 = closing tag
1230 //                                      \z
1231 //                                      }xs', $block, $matches))
1232 //                              {
1233 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1234 //
1235 //                                      # We can't call Markdown(), because that resets the hash;
1236 //                                      # that initialization code should be pulled into its own sub, though.
1237 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1238 //
1239 //                                      # Run document gamut methods on the content.
1240 //                                      foreach ($this->document_gamut as $method => $priority) {
1241 //                                              $div_content = $this->$method($div_content);
1242 //                                      }
1243 //
1244 //                                      $div_open = preg_replace(
1245 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1246 //
1247 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1248 //                              }
1249                                 $grafs[$key] = $graf;
1250                         }
1251                 }
1252
1253                 return implode("\n\n", $grafs);
1254         }
1255
1256
1257         protected function encodeAttribute($text) {
1258         #
1259         # Encode text for a double-quoted HTML attribute. This function
1260         # is *not* suitable for attributes enclosed in single quotes.
1261         #
1262                 $text = $this->encodeAmpsAndAngles($text);
1263                 $text = str_replace('"', '&quot;', $text);
1264                 return $text;
1265         }
1266
1267
1268         protected function encodeURLAttribute($url, &$text = null) {
1269         #
1270         # Encode text for a double-quoted HTML attribute containing a URL,
1271         # applying the URL filter if set. Also generates the textual
1272         # representation for the URL (removing mailto: or tel:) storing it in $text.
1273         # This function is *not* suitable for attributes enclosed in single quotes.
1274         #
1275                 if ($this->url_filter_func)
1276                         $url = call_user_func($this->url_filter_func, $url);
1277
1278                 if (preg_match('{^mailto:}i', $url))
1279                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1280                 else if (preg_match('{^tel:}i', $url))
1281                 {
1282                         $url = $this->encodeAttribute($url);
1283                         $text = substr($url, 4);
1284                 }
1285                 else
1286                 {
1287                         $url = $this->encodeAttribute($url);
1288                         $text = $url;
1289                 }
1290
1291                 return $url;
1292         }
1293
1294
1295         protected function encodeAmpsAndAngles($text) {
1296         #
1297         # Smart processing for ampersands and angle brackets that need to
1298         # be encoded. Valid character entities are left alone unless the
1299         # no-entities mode is set.
1300         #
1301                 if ($this->no_entities) {
1302                         $text = str_replace('&', '&amp;', $text);
1303                 } else {
1304                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1305                         # MT plugin: <http://bumppo.net/projects/amputator/>
1306                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1307                                                                 '&amp;', $text);
1308                 }
1309                 # Encode remaining <'s
1310                 $text = str_replace('<', '&lt;', $text);
1311
1312                 return $text;
1313         }
1314
1315
1316         protected function doAutoLinks($text) {
1317                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1318                         array($this, '_doAutoLinks_url_callback'), $text);
1319
1320                 # Email addresses: <address@domain.foo>
1321                 $text = preg_replace_callback('{
1322                         <
1323                         (?:mailto:)?
1324                         (
1325                                 (?:
1326                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1327                                 |
1328                                         ".*?"
1329                                 )
1330                                 \@
1331                                 (?:
1332                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1333                                 |
1334                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1335                                 )
1336                         )
1337                         >
1338                         }xi',
1339                         array($this, '_doAutoLinks_email_callback'), $text);
1340
1341                 return $text;
1342         }
1343         protected function _doAutoLinks_url_callback($matches) {
1344                 $url = $this->encodeURLAttribute($matches[1], $text);
1345                 $link = "<a href=\"$url\">$text</a>";
1346                 return $this->hashPart($link);
1347         }
1348         protected function _doAutoLinks_email_callback($matches) {
1349                 $addr = $matches[1];
1350                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1351                 $link = "<a href=\"$url\">$text</a>";
1352                 return $this->hashPart($link);
1353         }
1354
1355
1356         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1357         #
1358         #       Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1359         #
1360         #       Output: the same text but with most characters encoded as either a
1361         #               decimal or hex entity, in the hopes of foiling most address
1362         #               harvesting spam bots. E.g.:
1363         #
1364         #        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1365         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1366         #        &#x6d;
1367         #
1368         #       Note: the additional output $tail is assigned the same value as the
1369         #       ouput, minus the number of characters specified by $head_length.
1370         #
1371         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1372         #   With some optimizations by Milian Wolff. Forced encoding of HTML
1373         #       attribute special characters by Allan Odgaard.
1374         #
1375                 if ($text == "") return $tail = "";
1376
1377                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1378                 $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
1379
1380                 foreach ($chars as $key => $char) {
1381                         $ord = ord($char);
1382                         # Ignore non-ascii chars.
1383                         if ($ord < 128) {
1384                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1385                                 # roughly 10% raw, 45% hex, 45% dec
1386                                 # '@' *must* be encoded. I insist.
1387                                 # '"' and '>' have to be encoded inside the attribute
1388                                 if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
1389                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1390                                 else              $chars[$key] = '&#'.$ord.';';
1391                         }
1392                 }
1393
1394                 $text = implode('', $chars);
1395                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1396
1397                 return $text;
1398         }
1399
1400
1401         protected function parseSpan($str) {
1402         #
1403         # Take the string $str and parse it into tokens, hashing embeded HTML,
1404         # escaped characters and handling code spans.
1405         #
1406                 $output = '';
1407
1408                 $span_re = '{
1409                                 (
1410                                         \\\\'.$this->escape_chars_re.'
1411                                 |
1412                                         (?<![`\\\\])
1413                                         `+                                              # code span marker
1414                         '.( $this->no_markup ? '' : '
1415                                 |
1416                                         <!--    .*?     -->             # comment
1417                                 |
1418                                         <\?.*?\?> | <%.*?%>             # processing instruction
1419                                 |
1420                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1421                                         (?>
1422                                                 \s
1423                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1424                                         )?
1425                                         >
1426                                 |
1427                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1428                                 |
1429                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1430                         ').'
1431                                 )
1432                                 }xs';
1433
1434                 while (1) {
1435                         #
1436                         # Each loop iteration seach for either the next tag, the next
1437                         # openning code span marker, or the next escaped character.
1438                         # Each token is then passed to handleSpanToken.
1439                         #
1440                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1441
1442                         # Create token from text preceding tag.
1443                         if ($parts[0] != "") {
1444                                 $output .= $parts[0];
1445                         }
1446
1447                         # Check if we reach the end.
1448                         if (isset($parts[1])) {
1449                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1450                                 $str = $parts[2];
1451                         }
1452                         else {
1453                                 break;
1454                         }
1455                 }
1456
1457                 return $output;
1458         }
1459
1460
1461         protected function handleSpanToken($token, &$str) {
1462         #
1463         # Handle $token provided by parseSpan by determining its nature and
1464         # returning the corresponding value that should replace it.
1465         #
1466                 switch ($token{0}) {
1467                         case "\\":
1468                                 return $this->hashPart("&#". ord($token{1}). ";");
1469                         case "`":
1470                                 # Search for end marker in remaining text.
1471                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1472                                         $str, $matches))
1473                                 {
1474                                         $str = $matches[2];
1475                                         $codespan = $this->makeCodeSpan($matches[1]);
1476                                         return $this->hashPart($codespan);
1477                                 }
1478                                 return $token; // return as text since no ending marker found.
1479                         default:
1480                                 return $this->hashPart($token);
1481                 }
1482         }
1483
1484
1485         protected function outdent($text) {
1486         #
1487         # Remove one level of line-leading tabs or spaces
1488         #
1489                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1490         }
1491
1492
1493         # String length function for detab. `_initDetab` will create a function to
1494         # hanlde UTF-8 if the default function does not exist.
1495         protected $utf8_strlen = 'mb_strlen';
1496
1497         protected function detab($text) {
1498         #
1499         # Replace tabs with the appropriate amount of space.
1500         #
1501                 # For each line we separate the line in blocks delemited by
1502                 # tab characters. Then we reconstruct every line by adding the
1503                 # appropriate number of space between each blocks.
1504
1505                 $text = preg_replace_callback('/^.*\t.*$/m',
1506                         array($this, '_detab_callback'), $text);
1507
1508                 return $text;
1509         }
1510         protected function _detab_callback($matches) {
1511                 $line = $matches[0];
1512                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1513
1514                 # Split in blocks.
1515                 $blocks = explode("\t", $line);
1516                 # Add each blocks to the line.
1517                 $line = $blocks[0];
1518                 unset($blocks[0]); # Do not add first block twice.
1519                 foreach ($blocks as $block) {
1520                         # Calculate amount of space, insert spaces, insert block.
1521                         $amount = $this->tab_width -
1522                                 $strlen($line, 'UTF-8') % $this->tab_width;
1523                         $line .= str_repeat(" ", $amount) . $block;
1524                 }
1525                 return $line;
1526         }
1527         protected function _initDetab() {
1528         #
1529         # Check for the availability of the function in the `utf8_strlen` property
1530         # (initially `mb_strlen`). If the function is not available, create a
1531         # function that will loosely count the number of UTF-8 characters with a
1532         # regular expression.
1533         #
1534                 if (function_exists($this->utf8_strlen)) return;
1535                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1536                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1537                         $text, $m);');
1538         }
1539
1540
1541         protected function unhash($text) {
1542         #
1543         # Swap back in all the tags hashed by _HashHTMLBlocks.
1544         #
1545                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1546                         array($this, '_unhash_callback'), $text);
1547         }
1548         protected function _unhash_callback($matches) {
1549                 return $this->html_hashes[$matches[0]];
1550         }
1551
1552 }
1553
1554
1555 #
1556 # Temporary Markdown Extra Parser Implementation Class
1557 #
1558 # NOTE: DON'T USE THIS CLASS
1559 # Currently the implementation of of Extra resides here in this temporary class.
1560 # This makes it easier to propagate the changes between the three different
1561 # packaging styles of PHP Markdown. When this issue is resolved, this
1562 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1563 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1564 # one.
1565 #
1566
1567 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1568
1569         ### Configuration Variables ###
1570
1571         # Prefix for footnote ids.
1572         public $fn_id_prefix = "";
1573
1574         # Optional title attribute for footnote links and backlinks.
1575         public $fn_link_title = "";
1576         public $fn_backlink_title = "";
1577
1578         # Optional class attribute for footnote links and backlinks.
1579         public $fn_link_class = "footnote-ref";
1580         public $fn_backlink_class = "footnote-backref";
1581
1582         # Class name for table cell alignment (%% replaced left/center/right)
1583         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1584         # If empty, the align attribute is used instead of a class name.
1585         public $table_align_class_tmpl = '';
1586
1587         # Optional class prefix for fenced code block.
1588         public $code_class_prefix = "";
1589         # Class attribute for code blocks goes on the `code` tag;
1590         # setting this to true will put attributes on the `pre` tag instead.
1591         public $code_attr_on_pre = false;
1592
1593         # Predefined abbreviations.
1594         public $predef_abbr = array();
1595
1596
1597         ### Parser Implementation ###
1598
1599         public function __construct() {
1600         #
1601         # Constructor function. Initialize the parser object.
1602         #
1603                 # Add extra escapable characters before parent constructor
1604                 # initialize the table.
1605                 $this->escape_chars .= ':|';
1606
1607                 # Insert extra document, block, and span transformations.
1608                 # Parent constructor will do the sorting.
1609                 $this->document_gamut += array(
1610                         "doFencedCodeBlocks" => 5,
1611                         "stripFootnotes"     => 15,
1612                         "stripAbbreviations" => 25,
1613                         "appendFootnotes"    => 50,
1614                         );
1615                 $this->block_gamut += array(
1616                         "doFencedCodeBlocks" => 5,
1617                         "doTables"           => 15,
1618                         "doDefLists"         => 45,
1619                         );
1620                 $this->span_gamut += array(
1621                         "doFootnotes"        => 5,
1622                         "doAbbreviations"    => 70,
1623                         );
1624
1625                 parent::__construct();
1626         }
1627
1628
1629         # Extra variables used during extra transformations.
1630         protected $footnotes = array();
1631         protected $footnotes_ordered = array();
1632         protected $footnotes_ref_count = array();
1633         protected $footnotes_numbers = array();
1634         protected $abbr_desciptions = array();
1635         protected $abbr_word_re = '';
1636
1637         # Give the current footnote number.
1638         protected $footnote_counter = 1;
1639
1640
1641         protected function setup() {
1642         #
1643         # Setting up Extra-specific variables.
1644         #
1645                 parent::setup();
1646
1647                 $this->footnotes = array();
1648                 $this->footnotes_ordered = array();
1649                 $this->footnotes_ref_count = array();
1650                 $this->footnotes_numbers = array();
1651                 $this->abbr_desciptions = array();
1652                 $this->abbr_word_re = '';
1653                 $this->footnote_counter = 1;
1654
1655                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1656                         if ($this->abbr_word_re)
1657                                 $this->abbr_word_re .= '|';
1658                         $this->abbr_word_re .= preg_quote($abbr_word);
1659                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1660                 }
1661         }
1662
1663         protected function teardown() {
1664         #
1665         # Clearing Extra-specific variables.
1666         #
1667                 $this->footnotes = array();
1668                 $this->footnotes_ordered = array();
1669                 $this->footnotes_ref_count = array();
1670                 $this->footnotes_numbers = array();
1671                 $this->abbr_desciptions = array();
1672                 $this->abbr_word_re = '';
1673
1674                 parent::teardown();
1675         }
1676
1677
1678         ### Extra Attribute Parser ###
1679
1680         # Expression to use to catch attributes (includes the braces)
1681         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
1682         # Expression to use when parsing in a context when no capture is desired
1683         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
1684
1685         protected function doExtraAttributes($tag_name, $attr) {
1686         #
1687         # Parse attributes caught by the $this->id_class_attr_catch_re expression
1688         # and return the HTML-formatted list of attributes.
1689         #
1690         # Currently supported attributes are .class and #id.
1691         #
1692                 if (empty($attr)) return "";
1693
1694                 # Split on components
1695                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
1696                 $elements = $matches[0];
1697
1698                 # handle classes and ids (only first id taken into account)
1699                 $classes = array();
1700                 $attributes = array();
1701                 $id = false;
1702                 foreach ($elements as $element) {
1703                         if ($element{0} == '.') {
1704                                 $classes[] = substr($element, 1);
1705                         } else if ($element{0} == '#') {
1706                                 if ($id === false) $id = substr($element, 1);
1707                         } else if (strpos($element, '=') > 0) {
1708                                 $parts = explode('=', $element, 2);
1709                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
1710                         }
1711                 }
1712
1713                 # compose attributes as string
1714                 $attr_str = "";
1715                 if (!empty($id)) {
1716                         $attr_str .= ' id="'.$id.'"';
1717                 }
1718                 if (!empty($classes)) {
1719                         $attr_str .= ' class="'.implode(" ", $classes).'"';
1720                 }
1721                 if (!$this->no_markup && !empty($attributes)) {
1722                         $attr_str .= ' '.implode(" ", $attributes);
1723                 }
1724                 return $attr_str;
1725         }
1726
1727
1728         protected function stripLinkDefinitions($text) {
1729         #
1730         # Strips link definitions from text, stores the URLs and titles in
1731         # hash references.
1732         #
1733                 $less_than_tab = $this->tab_width - 1;
1734
1735                 # Link defs are in the form: ^[id]: url "optional title"
1736                 $text = preg_replace_callback('{
1737                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
1738                                                           [ ]*
1739                                                           \n?                           # maybe *one* newline
1740                                                           [ ]*
1741                                                         (?:
1742                                                           <(.+?)>                       # url = $2
1743                                                         |
1744                                                           (\S+?)                        # url = $3
1745                                                         )
1746                                                           [ ]*
1747                                                           \n?                           # maybe one newline
1748                                                           [ ]*
1749                                                         (?:
1750                                                                 (?<=\s)                 # lookbehind for whitespace
1751                                                                 ["(]
1752                                                                 (.*?)                   # title = $4
1753                                                                 [")]
1754                                                                 [ ]*
1755                                                         )?      # title is optional
1756                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1757                                                         (?:\n+|\Z)
1758                         }xm',
1759                         array($this, '_stripLinkDefinitions_callback'),
1760                         $text);
1761                 return $text;
1762         }
1763         protected function _stripLinkDefinitions_callback($matches) {
1764                 $link_id = strtolower($matches[1]);
1765                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
1766                 $this->urls[$link_id] = $url;
1767                 $this->titles[$link_id] =& $matches[4];
1768                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1769                 return ''; # String that will replace the block
1770         }
1771
1772
1773         ### HTML Block Parser ###
1774
1775         # Tags that are always treated as block tags:
1776         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
1777
1778         # Tags treated as block tags only if the opening tag is alone on its line:
1779         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1780
1781         # Tags where markdown="1" default to span mode:
1782         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1783
1784         # Tags which must not have their contents modified, no matter where
1785         # they appear:
1786         protected $clean_tags_re = 'script|style|math|svg';
1787
1788         # Tags that do not need to be closed.
1789         protected $auto_close_tags_re = 'hr|img|param|source|track';
1790
1791
1792         protected function hashHTMLBlocks($text) {
1793         #
1794         # Hashify HTML Blocks and "clean tags".
1795         #
1796         # We only want to do this for block-level HTML tags, such as headers,
1797         # lists, and tables. That's because we still want to wrap <p>s around
1798         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1799         # phrase emphasis, and spans. The list of tags we're looking for is
1800         # hard-coded.
1801         #
1802         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1803         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1804         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1805         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1806         # These two functions are calling each other. It's recursive!
1807         #
1808                 if ($this->no_markup)  return $text;
1809
1810                 #
1811                 # Call the HTML-in-Markdown hasher.
1812                 #
1813                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1814
1815                 return $text;
1816         }
1817         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1818                                                                                 $enclosing_tag_re = '', $span = false)
1819         {
1820         #
1821         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1822         #
1823         # *   $indent is the number of space to be ignored when checking for code
1824         #     blocks. This is important because if we don't take the indent into
1825         #     account, something like this (which looks right) won't work as expected:
1826         #
1827         #     <div>
1828         #         <div markdown="1">
1829         #         Hello World.  <-- Is this a Markdown code block or text?
1830         #         </div>  <-- Is this a Markdown code block or a real tag?
1831         #     <div>
1832         #
1833         #     If you don't like this, just don't indent the tag on which
1834         #     you apply the markdown="1" attribute.
1835         #
1836         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1837         #     tag with that name. Nested tags supported.
1838         #
1839         # *   If $span is true, text inside must treated as span. So any double
1840         #     newline will be replaced by a single newline so that it does not create
1841         #     paragraphs.
1842         #
1843         # Returns an array of that form: ( processed text , remaining text )
1844         #
1845                 if ($text === '') return array('', '');
1846
1847                 # Regex to check for the presense of newlines around a block tag.
1848                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1849                 $newline_after_re =
1850                         '{
1851                                 ^                                               # Start of text following the tag.
1852                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1853                                 [ ]*\n                                  # Must be followed by newline.
1854                         }xs';
1855
1856                 # Regex to match any tag.
1857                 $block_tag_re =
1858                         '{
1859                                 (                                       # $2: Capture whole tag.
1860                                         </?                                     # Any opening or closing tag.
1861                                                 (?>                             # Tag name.
1862                                                         '.$this->block_tags_re.'                        |
1863                                                         '.$this->context_block_tags_re.'        |
1864                                                         '.$this->clean_tags_re.'                |
1865                                                         (?!\s)'.$enclosing_tag_re.'
1866                                                 )
1867                                                 (?:
1868                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1869                                                         (?>
1870                                                                 ".*?"           |       # Double quotes (can contain `>`)
1871                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1872                                                                 .+?                             # Anything but quotes and `>`.
1873                                                         )*?
1874                                                 )?
1875                                         >                                       # End of tag.
1876                                 |
1877                                         <!--    .*?     -->     # HTML Comment
1878                                 |
1879                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1880                                 |
1881                                         <!\[CDATA\[.*?\]\]>     # CData Block
1882                                 '. ( !$span ? ' # If not in span.
1883                                 |
1884                                         # Indented code block
1885                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1886                                         [ ]{'.($indent+4).'}[^\n]* \n
1887                                         (?>
1888                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1889                                         )*
1890                                 |
1891                                         # Fenced code block marker
1892                                         (?<= ^ | \n )
1893                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1894                                                                         [ ]*
1895                                         (?:
1896                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
1897                                         |
1898                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
1899                                         )?
1900                                         [ ]*
1901                                         (?= \n )
1902                                 ' : '' ). ' # End (if not is span).
1903                                 |
1904                                         # Code span marker
1905                                         # Note, this regex needs to go after backtick fenced
1906                                         # code blocks but it should also be kept outside of the
1907                                         # "if not in span" condition adding backticks to the parser
1908                                         `+
1909                                 )
1910                         }xs';
1911
1912
1913                 $depth = 0;             # Current depth inside the tag tree.
1914                 $parsed = "";   # Parsed text that will be returned.
1915
1916                 #
1917                 # Loop through every tag until we find the closing tag of the parent
1918                 # or loop until reaching the end of text if no parent tag specified.
1919                 #
1920                 do {
1921                         #
1922                         # Split the text using the first $tag_match pattern found.
1923                         # Text before  pattern will be first in the array, text after
1924                         # pattern will be at the end, and between will be any catches made
1925                         # by the pattern.
1926                         #
1927                         $parts = preg_split($block_tag_re, $text, 2,
1928                                                                 PREG_SPLIT_DELIM_CAPTURE);
1929
1930                         # If in Markdown span mode, add a empty-string span-level hash
1931                         # after each newline to prevent triggering any block element.
1932                         if ($span) {
1933                                 $void = $this->hashPart("", ':');
1934                                 $newline = "$void\n";
1935                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1936                         }
1937
1938                         $parsed .= $parts[0]; # Text before current tag.
1939
1940                         # If end of $text has been reached. Stop loop.
1941                         if (count($parts) < 3) {
1942                                 $text = "";
1943                                 break;
1944                         }
1945
1946                         $tag  = $parts[1]; # Tag to handle.
1947                         $text = $parts[2]; # Remaining text after current tag.
1948                         $tag_re = preg_quote($tag); # For use in a regular expression.
1949
1950                         #
1951                         # Check for: Fenced code block marker.
1952                         # Note: need to recheck the whole tag to disambiguate backtick
1953                         # fences from code spans
1954                         #
1955                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1956                                 # Fenced code block marker: find matching end marker.
1957                                 $fence_indent = strlen($capture[1]); # use captured indent in re
1958                                 $fence_re = $capture[2]; # use captured fence in re
1959                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1960                                         $matches))
1961                                 {
1962                                         # End marker found: pass text unchanged until marker.
1963                                         $parsed .= $tag . $matches[0];
1964                                         $text = substr($text, strlen($matches[0]));
1965                                 }
1966                                 else {
1967                                         # No end marker: just skip it.
1968                                         $parsed .= $tag;
1969                                 }
1970                         }
1971                         #
1972                         # Check for: Indented code block.
1973                         #
1974                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1975                                 # Indented code block: pass it unchanged, will be handled
1976                                 # later.
1977                                 $parsed .= $tag;
1978                         }
1979                         #
1980                         # Check for: Code span marker
1981                         # Note: need to check this after backtick fenced code blocks
1982                         #
1983                         else if ($tag{0} == "`") {
1984                                 # Find corresponding end marker.
1985                                 $tag_re = preg_quote($tag);
1986                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1987                                         $text, $matches))
1988                                 {
1989                                         # End marker found: pass text unchanged until marker.
1990                                         $parsed .= $tag . $matches[0];
1991                                         $text = substr($text, strlen($matches[0]));
1992                                 }
1993                                 else {
1994                                         # Unmatched marker: just skip it.
1995                                         $parsed .= $tag;
1996                                 }
1997                         }
1998                         #
1999                         # Check for: Opening Block level tag or
2000                         #            Opening Context Block tag (like ins and del)
2001                         #               used as a block tag (tag is alone on it's line).
2002                         #
2003                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
2004                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
2005                                         preg_match($newline_before_re, $parsed) &&
2006                                         preg_match($newline_after_re, $text)    )
2007                                 )
2008                         {
2009                                 # Need to parse tag and following text using the HTML parser.
2010                                 list($block_text, $text) =
2011                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
2012
2013                                 # Make sure it stays outside of any paragraph by adding newlines.
2014                                 $parsed .= "\n\n$block_text\n\n";
2015                         }
2016                         #
2017                         # Check for: Clean tag (like script, math)
2018                         #            HTML Comments, processing instructions.
2019                         #
2020                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
2021                                 $tag{1} == '!' || $tag{1} == '?')
2022                         {
2023                                 # Need to parse tag and following text using the HTML parser.
2024                                 # (don't check for markdown attribute)
2025                                 list($block_text, $text) =
2026                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2027
2028                                 $parsed .= $block_text;
2029                         }
2030                         #
2031                         # Check for: Tag with same name as enclosing tag.
2032                         #
2033                         else if ($enclosing_tag_re !== '' &&
2034                                 # Same name as enclosing tag.
2035                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2036                         {
2037                                 #
2038                                 # Increase/decrease nested tag count.
2039                                 #
2040                                 if ($tag{1} == '/')                                             $depth--;
2041                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2042
2043                                 if ($depth < 0) {
2044                                         #
2045                                         # Going out of parent element. Clean up and break so we
2046                                         # return to the calling function.
2047                                         #
2048                                         $text = $tag . $text;
2049                                         break;
2050                                 }
2051
2052                                 $parsed .= $tag;
2053                         }
2054                         else {
2055                                 $parsed .= $tag;
2056                         }
2057                 } while ($depth >= 0);
2058
2059                 return array($parsed, $text);
2060         }
2061         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2062         #
2063         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2064         #
2065         # *   Calls $hash_method to convert any blocks.
2066         # *   Stops when the first opening tag closes.
2067         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2068         #     (it is not inside clean tags)
2069         #
2070         # Returns an array of that form: ( processed text , remaining text )
2071         #
2072                 if ($text === '') return array('', '');
2073
2074                 # Regex to match `markdown` attribute inside of a tag.
2075                 $markdown_attr_re = '
2076                         {
2077                                 \s*                     # Eat whitespace before the `markdown` attribute
2078                                 markdown
2079                                 \s*=\s*
2080                                 (?>
2081                                         (["\'])         # $1: quote delimiter
2082                                         (.*?)           # $2: attribute value
2083                                         \1                      # matching delimiter
2084                                 |
2085                                         ([^\s>]*)       # $3: unquoted attribute value
2086                                 )
2087                                 ()                              # $4: make $3 always defined (avoid warnings)
2088                         }xs';
2089
2090                 # Regex to match any tag.
2091                 $tag_re = '{
2092                                 (                                       # $2: Capture whole tag.
2093                                         </?                                     # Any opening or closing tag.
2094                                                 [\w:$]+                 # Tag name.
2095                                                 (?:
2096                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2097                                                         (?>
2098                                                                 ".*?"           |       # Double quotes (can contain `>`)
2099                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2100                                                                 .+?                             # Anything but quotes and `>`.
2101                                                         )*?
2102                                                 )?
2103                                         >                                       # End of tag.
2104                                 |
2105                                         <!--    .*?     -->     # HTML Comment
2106                                 |
2107                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2108                                 |
2109                                         <!\[CDATA\[.*?\]\]>     # CData Block
2110                                 )
2111                         }xs';
2112
2113                 $original_text = $text;         # Save original text in case of faliure.
2114
2115                 $depth          = 0;    # Current depth inside the tag tree.
2116                 $block_text     = "";   # Temporary text holder for current text.
2117                 $parsed         = "";   # Parsed text that will be returned.
2118
2119                 #
2120                 # Get the name of the starting tag.
2121                 # (This pattern makes $base_tag_name_re safe without quoting.)
2122                 #
2123                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2124                         $base_tag_name_re = $matches[1];
2125
2126                 #
2127                 # Loop through every tag until we find the corresponding closing tag.
2128                 #
2129                 do {
2130                         #
2131                         # Split the text using the first $tag_match pattern found.
2132                         # Text before  pattern will be first in the array, text after
2133                         # pattern will be at the end, and between will be any catches made
2134                         # by the pattern.
2135                         #
2136                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2137
2138                         if (count($parts) < 3) {
2139                                 #
2140                                 # End of $text reached with unbalenced tag(s).
2141                                 # In that case, we return original text unchanged and pass the
2142                                 # first character as filtered to prevent an infinite loop in the
2143                                 # parent function.
2144                                 #
2145                                 return array($original_text{0}, substr($original_text, 1));
2146                         }
2147
2148                         $block_text .= $parts[0]; # Text before current tag.
2149                         $tag         = $parts[1]; # Tag to handle.
2150                         $text        = $parts[2]; # Remaining text after current tag.
2151
2152                         #
2153                         # Check for: Auto-close tag (like <hr/>)
2154                         #                        Comments and Processing Instructions.
2155                         #
2156                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2157                                 $tag{1} == '!' || $tag{1} == '?')
2158                         {
2159                                 # Just add the tag to the block as if it was text.
2160                                 $block_text .= $tag;
2161                         }
2162                         else {
2163                                 #
2164                                 # Increase/decrease nested tag count. Only do so if
2165                                 # the tag's name match base tag's.
2166                                 #
2167                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2168                                         if ($tag{1} == '/')                                             $depth--;
2169                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2170                                 }
2171
2172                                 #
2173                                 # Check for `markdown="1"` attribute and handle it.
2174                                 #
2175                                 if ($md_attr &&
2176                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2177                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2178                                 {
2179                                         # Remove `markdown` attribute from opening tag.
2180                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2181
2182                                         # Check if text inside this tag must be parsed in span mode.
2183                                         $this->mode = $attr_m[2] . $attr_m[3];
2184                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2185                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2186
2187                                         # Calculate indent before tag.
2188                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2189                                                 $strlen = $this->utf8_strlen;
2190                                                 $indent = $strlen($matches[1], 'UTF-8');
2191                                         } else {
2192                                                 $indent = 0;
2193                                         }
2194
2195                                         # End preceding block with this tag.
2196                                         $block_text .= $tag;
2197                                         $parsed .= $this->$hash_method($block_text);
2198
2199                                         # Get enclosing tag name for the ParseMarkdown function.
2200                                         # (This pattern makes $tag_name_re safe without quoting.)
2201                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2202                                         $tag_name_re = $matches[1];
2203
2204                                         # Parse the content using the HTML-in-Markdown parser.
2205                                         list ($block_text, $text)
2206                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2207                                                         $tag_name_re, $span_mode);
2208
2209                                         # Outdent markdown text.
2210                                         if ($indent > 0) {
2211                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2212                                                                                                         $block_text);
2213                                         }
2214
2215                                         # Append tag content to parsed text.
2216                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2217                                         else                            $parsed .= "$block_text";
2218
2219                                         # Start over with a new block.
2220                                         $block_text = "";
2221                                 }
2222                                 else $block_text .= $tag;
2223                         }
2224
2225                 } while ($depth > 0);
2226
2227                 #
2228                 # Hash last block text that wasn't processed inside the loop.
2229                 #
2230                 $parsed .= $this->$hash_method($block_text);
2231
2232                 return array($parsed, $text);
2233         }
2234
2235
2236         protected function hashClean($text) {
2237         #
2238         # Called whenever a tag must be hashed when a function inserts a "clean" tag
2239         # in $text, it passes through this function and is automaticaly escaped,
2240         # blocking invalid nested overlap.
2241         #
2242                 return $this->hashPart($text, 'C');
2243         }
2244
2245
2246         protected function doAnchors($text) {
2247         #
2248         # Turn Markdown link shortcuts into XHTML <a> tags.
2249         #
2250                 if ($this->in_anchor) return $text;
2251                 $this->in_anchor = true;
2252
2253                 #
2254                 # First, handle reference-style links: [link text] [id]
2255                 #
2256                 $text = preg_replace_callback('{
2257                         (                                       # wrap whole match in $1
2258                           \[
2259                                 ('.$this->nested_brackets_re.') # link text = $2
2260                           \]
2261
2262                           [ ]?                          # one optional space
2263                           (?:\n[ ]*)?           # one optional newline followed by spaces
2264
2265                           \[
2266                                 (.*?)           # id = $3
2267                           \]
2268                         )
2269                         }xs',
2270                         array($this, '_doAnchors_reference_callback'), $text);
2271
2272                 #
2273                 # Next, inline-style links: [link text](url "optional title")
2274                 #
2275                 $text = preg_replace_callback('{
2276                         (                               # wrap whole match in $1
2277                           \[
2278                                 ('.$this->nested_brackets_re.') # link text = $2
2279                           \]
2280                           \(                    # literal paren
2281                                 [ \n]*
2282                                 (?:
2283                                         <(.+?)> # href = $3
2284                                 |
2285                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
2286                                 )
2287                                 [ \n]*
2288                                 (                       # $5
2289                                   ([\'"])       # quote char = $6
2290                                   (.*?)         # Title = $7
2291                                   \6            # matching quote
2292                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
2293                                 )?                      # title is optional
2294                           \)
2295                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2296                         )
2297                         }xs',
2298                         array($this, '_doAnchors_inline_callback'), $text);
2299
2300                 #
2301                 # Last, handle reference-style shortcuts: [link text]
2302                 # These must come last in case you've also got [link text][1]
2303                 # or [link text](/foo)
2304                 #
2305                 $text = preg_replace_callback('{
2306                         (                                       # wrap whole match in $1
2307                           \[
2308                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
2309                           \]
2310                         )
2311                         }xs',
2312                         array($this, '_doAnchors_reference_callback'), $text);
2313
2314                 $this->in_anchor = false;
2315                 return $text;
2316         }
2317         protected function _doAnchors_reference_callback($matches) {
2318                 $whole_match =  $matches[1];
2319                 $link_text   =  $matches[2];
2320                 $link_id     =& $matches[3];
2321
2322                 if ($link_id == "") {
2323                         # for shortcut links like [this][] or [this].
2324                         $link_id = $link_text;
2325                 }
2326
2327                 # lower-case and turn embedded newlines into spaces
2328                 $link_id = strtolower($link_id);
2329                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2330
2331                 if (isset($this->urls[$link_id])) {
2332                         $url = $this->urls[$link_id];
2333                         $url = $this->encodeURLAttribute($url);
2334
2335                         $result = "<a href=\"$url\"";
2336                         if ( isset( $this->titles[$link_id] ) ) {
2337                                 $title = $this->titles[$link_id];
2338                                 $title = $this->encodeAttribute($title);
2339                                 $result .=  " title=\"$title\"";
2340                         }
2341                         if (isset($this->ref_attr[$link_id]))
2342                                 $result .= $this->ref_attr[$link_id];
2343
2344                         $link_text = $this->runSpanGamut($link_text);
2345                         $result .= ">$link_text</a>";
2346                         $result = $this->hashPart($result);
2347                 }
2348                 else {
2349                         $result = $whole_match;
2350                 }
2351                 return $result;
2352         }
2353         protected function _doAnchors_inline_callback($matches) {
2354                 $whole_match    =  $matches[1];
2355                 $link_text              =  $this->runSpanGamut($matches[2]);
2356                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
2357                 $title                  =& $matches[7];
2358                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2359
2360                 // if the URL was of the form <s p a c e s> it got caught by the HTML
2361                 // tag parser and hashed. Need to reverse the process before using the URL.
2362                 $unhashed = $this->unhash($url);
2363                 if ($unhashed != $url)
2364                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
2365
2366                 $url = $this->encodeURLAttribute($url);
2367
2368                 $result = "<a href=\"$url\"";
2369                 if (isset($title)) {
2370                         $title = $this->encodeAttribute($title);
2371                         $result .=  " title=\"$title\"";
2372                 }
2373                 $result .= $attr;
2374
2375                 $link_text = $this->runSpanGamut($link_text);
2376                 $result .= ">$link_text</a>";
2377
2378                 return $this->hashPart($result);
2379         }
2380
2381
2382         protected function doImages($text) {
2383         #
2384         # Turn Markdown image shortcuts into <img> tags.
2385         #
2386                 #
2387                 # First, handle reference-style labeled images: ![alt text][id]
2388                 #
2389                 $text = preg_replace_callback('{
2390                         (                               # wrap whole match in $1
2391                           !\[
2392                                 ('.$this->nested_brackets_re.')         # alt text = $2
2393                           \]
2394
2395                           [ ]?                          # one optional space
2396                           (?:\n[ ]*)?           # one optional newline followed by spaces
2397
2398                           \[
2399                                 (.*?)           # id = $3
2400                           \]
2401
2402                         )
2403                         }xs',
2404                         array($this, '_doImages_reference_callback'), $text);
2405
2406                 #
2407                 # Next, handle inline images:  ![alt text](url "optional title")
2408                 # Don't forget: encode * and _
2409                 #
2410                 $text = preg_replace_callback('{
2411                         (                               # wrap whole match in $1
2412                           !\[
2413                                 ('.$this->nested_brackets_re.')         # alt text = $2
2414                           \]
2415                           \s?                   # One optional whitespace character
2416                           \(                    # literal paren
2417                                 [ \n]*
2418                                 (?:
2419                                         <(\S*)> # src url = $3
2420                                 |
2421                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
2422                                 )
2423                                 [ \n]*
2424                                 (                       # $5
2425                                   ([\'"])       # quote char = $6
2426                                   (.*?)         # title = $7
2427                                   \6            # matching quote
2428                                   [ \n]*
2429                                 )?                      # title is optional
2430                           \)
2431                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
2432                         )
2433                         }xs',
2434                         array($this, '_doImages_inline_callback'), $text);
2435
2436                 return $text;
2437         }
2438         protected function _doImages_reference_callback($matches) {
2439                 $whole_match = $matches[1];
2440                 $alt_text    = $matches[2];
2441                 $link_id     = strtolower($matches[3]);
2442
2443                 if ($link_id == "") {
2444                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2445                 }
2446
2447                 $alt_text = $this->encodeAttribute($alt_text);
2448                 if (isset($this->urls[$link_id])) {
2449                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
2450                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
2451                         if (isset($this->titles[$link_id])) {
2452                                 $title = $this->titles[$link_id];
2453                                 $title = $this->encodeAttribute($title);
2454                                 $result .=  " title=\"$title\"";
2455                         }
2456                         if (isset($this->ref_attr[$link_id]))
2457                                 $result .= $this->ref_attr[$link_id];
2458                         $result .= $this->empty_element_suffix;
2459                         $result = $this->hashPart($result);
2460                 }
2461                 else {
2462                         # If there's no such link ID, leave intact:
2463                         $result = $whole_match;
2464                 }
2465
2466                 return $result;
2467         }
2468         protected function _doImages_inline_callback($matches) {
2469                 $whole_match    = $matches[1];
2470                 $alt_text               = $matches[2];
2471                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
2472                 $title                  =& $matches[7];
2473                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2474
2475                 $alt_text = $this->encodeAttribute($alt_text);
2476                 $url = $this->encodeURLAttribute($url);
2477                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2478                 if (isset($title)) {
2479                         $title = $this->encodeAttribute($title);
2480                         $result .=  " title=\"$title\""; # $title already quoted
2481                 }
2482                 $result .= $attr;
2483                 $result .= $this->empty_element_suffix;
2484
2485                 return $this->hashPart($result);
2486         }
2487
2488
2489         protected function doHeaders($text) {
2490         #
2491         # Redefined to add id and class attribute support.
2492         #
2493                 # Setext-style headers:
2494                 #         Header 1  {#header1}
2495                 #         ========
2496                 #
2497                 #         Header 2  {#header2 .class1 .class2}
2498                 #         --------
2499                 #
2500                 $text = preg_replace_callback(
2501                         '{
2502                                 (^.+?)                                                          # $1: Header text
2503                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2504                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2505                         }mx',
2506                         array($this, '_doHeaders_callback_setext'), $text);
2507
2508                 # atx-style headers:
2509                 #       # Header 1        {#header1}
2510                 #       ## Header 2       {#header2}
2511                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
2512                 #       ...
2513                 #       ###### Header 6   {.class2}
2514                 #
2515                 $text = preg_replace_callback('{
2516                                 ^(\#{1,6})      # $1 = string of #\'s
2517                                 [ ]*
2518                                 (.+?)           # $2 = Header text
2519                                 [ ]*
2520                                 \#*                     # optional closing #\'s (not counted)
2521                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
2522                                 [ ]*
2523                                 \n+
2524                         }xm',
2525                         array($this, '_doHeaders_callback_atx'), $text);
2526
2527                 return $text;
2528         }
2529         protected function _doHeaders_callback_setext($matches) {
2530                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2531                         return $matches[0];
2532                 $level = $matches[3]{0} == '=' ? 1 : 2;
2533                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2534                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2535                 return "\n" . $this->hashBlock($block) . "\n\n";
2536         }
2537         protected function _doHeaders_callback_atx($matches) {
2538                 $level = strlen($matches[1]);
2539                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2540                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2541                 return "\n" . $this->hashBlock($block) . "\n\n";
2542         }
2543
2544
2545         protected function doTables($text) {
2546         #
2547         # Form HTML tables.
2548         #
2549                 $less_than_tab = $this->tab_width - 1;
2550                 #
2551                 # Find tables with leading pipe.
2552                 #
2553                 #       | Header 1 | Header 2
2554                 #       | -------- | --------
2555                 #       | Cell 1   | Cell 2
2556                 #       | Cell 3   | Cell 4
2557                 #
2558                 $text = preg_replace_callback('
2559                         {
2560                                 ^                                                       # Start of a line
2561                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2562                                 [|]                                                     # Optional leading pipe (present)
2563                                 (.+) \n                                         # $1: Header row (at least one pipe)
2564
2565                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2566                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2567
2568                                 (                                                       # $3: Cells
2569                                         (?>
2570                                                 [ ]*                            # Allowed whitespace.
2571                                                 [|] .* \n                       # Row content.
2572                                         )*
2573                                 )
2574                                 (?=\n|\Z)                                       # Stop at final double newline.
2575                         }xm',
2576                         array($this, '_doTable_leadingPipe_callback'), $text);
2577
2578                 #
2579                 # Find tables without leading pipe.
2580                 #
2581                 #       Header 1 | Header 2
2582                 #       -------- | --------
2583                 #       Cell 1   | Cell 2
2584                 #       Cell 3   | Cell 4
2585                 #
2586                 $text = preg_replace_callback('
2587                         {
2588                                 ^                                                       # Start of a line
2589                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2590                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2591
2592                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2593                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2594
2595                                 (                                                       # $3: Cells
2596                                         (?>
2597                                                 .* [|] .* \n            # Row content
2598                                         )*
2599                                 )
2600                                 (?=\n|\Z)                                       # Stop at final double newline.
2601                         }xm',
2602                         array($this, '_DoTable_callback'), $text);
2603
2604                 return $text;
2605         }
2606         protected function _doTable_leadingPipe_callback($matches) {
2607                 $head           = $matches[1];
2608                 $underline      = $matches[2];
2609                 $content        = $matches[3];
2610
2611                 # Remove leading pipe for each row.
2612                 $content        = preg_replace('/^ *[|]/m', '', $content);
2613
2614                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2615         }
2616         protected function _doTable_makeAlignAttr($alignname)
2617         {
2618                 if (empty($this->table_align_class_tmpl))
2619                         return " align=\"$alignname\"";
2620
2621                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2622                 return " class=\"$classname\"";
2623         }
2624         protected function _doTable_callback($matches) {
2625                 $head           = $matches[1];
2626                 $underline      = $matches[2];
2627                 $content        = $matches[3];
2628
2629                 # Remove any tailing pipes for each line.
2630                 $head           = preg_replace('/[|] *$/m', '', $head);
2631                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2632                 $content        = preg_replace('/[|] *$/m', '', $content);
2633
2634                 # Reading alignement from header underline.
2635                 $separators     = preg_split('/ *[|] */', $underline);
2636                 foreach ($separators as $n => $s) {
2637                         if (preg_match('/^ *-+: *$/', $s))
2638                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
2639                         else if (preg_match('/^ *:-+: *$/', $s))
2640                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
2641                         else if (preg_match('/^ *:-+ *$/', $s))
2642                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
2643                         else
2644                                 $attr[$n] = '';
2645                 }
2646
2647                 # Parsing span elements, including code spans, character escapes,
2648                 # and inline HTML tags, so that pipes inside those gets ignored.
2649                 $head           = $this->parseSpan($head);
2650                 $headers        = preg_split('/ *[|] */', $head);
2651                 $col_count      = count($headers);
2652                 $attr       = array_pad($attr, $col_count, '');
2653
2654                 # Write column headers.
2655                 $text = "<table>\n";
2656                 $text .= "<thead>\n";
2657                 $text .= "<tr>\n";
2658                 foreach ($headers as $n => $header)
2659                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2660                 $text .= "</tr>\n";
2661                 $text .= "</thead>\n";
2662
2663                 # Split content by row.
2664                 $rows = explode("\n", trim($content, "\n"));
2665
2666                 $text .= "<tbody>\n";
2667                 foreach ($rows as $row) {
2668                         # Parsing span elements, including code spans, character escapes,
2669                         # and inline HTML tags, so that pipes inside those gets ignored.
2670                         $row = $this->parseSpan($row);
2671
2672                         # Split row by cell.
2673                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2674                         $row_cells = array_pad($row_cells, $col_count, '');
2675
2676                         $text .= "<tr>\n";
2677                         foreach ($row_cells as $n => $cell)
2678                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2679                         $text .= "</tr>\n";
2680                 }
2681                 $text .= "</tbody>\n";
2682                 $text .= "</table>";
2683
2684                 return $this->hashBlock($text) . "\n";
2685         }
2686
2687
2688         protected function doDefLists($text) {
2689         #
2690         # Form HTML definition lists.
2691         #
2692                 $less_than_tab = $this->tab_width - 1;
2693
2694                 # Re-usable pattern to match any entire dl list:
2695                 $whole_list_re = '(?>
2696                         (                                                               # $1 = whole list
2697                           (                                                             # $2
2698                                 [ ]{0,'.$less_than_tab.'}
2699                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2700                                 \n?
2701                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2702                           )
2703                           (?s:.+?)
2704                           (                                                             # $4
2705                                   \z
2706                                 |
2707                                   \n{2,}
2708                                   (?=\S)
2709                                   (?!                                           # Negative lookahead for another term
2710                                         [ ]{0,'.$less_than_tab.'}
2711                                         (?: \S.*\n )+?                  # defined term
2712                                         \n?
2713                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2714                                   )
2715                                   (?!                                           # Negative lookahead for another definition
2716                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2717                                   )
2718                           )
2719                         )
2720                 )'; // mx
2721
2722                 $text = preg_replace_callback('{
2723                                 (?>\A\n?|(?<=\n\n))
2724                                 '.$whole_list_re.'
2725                         }mx',
2726                         array($this, '_doDefLists_callback'), $text);
2727
2728                 return $text;
2729         }
2730         protected function _doDefLists_callback($matches) {
2731                 # Re-usable patterns to match list item bullets and number markers:
2732                 $list = $matches[1];
2733
2734                 # Turn double returns into triple returns, so that we can make a
2735                 # paragraph for the last item in a list, if necessary:
2736                 $result = trim($this->processDefListItems($list));
2737                 $result = "<dl>\n" . $result . "\n</dl>";
2738                 return $this->hashBlock($result) . "\n\n";
2739         }
2740
2741
2742         protected function processDefListItems($list_str) {
2743         #
2744         #       Process the contents of a single definition list, splitting it
2745         #       into individual term and definition list items.
2746         #
2747                 $less_than_tab = $this->tab_width - 1;
2748
2749                 # trim trailing blank lines:
2750                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2751
2752                 # Process definition terms.
2753                 $list_str = preg_replace_callback('{
2754                         (?>\A\n?|\n\n+)                                 # leading line
2755                         (                                                               # definition terms = $1
2756                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2757                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
2758                                                                                         #   mark (colon) or more whitespace.
2759                                 (?> \S.* \n)+?                          # actual term (not whitespace).
2760                         )
2761                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
2762                                                                                         #   with a definition mark.
2763                         }xm',
2764                         array($this, '_processDefListItems_callback_dt'), $list_str);
2765
2766                 # Process actual definitions.
2767                 $list_str = preg_replace_callback('{
2768                         \n(\n+)?                                                # leading line = $1
2769                         (                                                               # marker space = $2
2770                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2771                                 \:[ ]+                                          # definition mark (colon)
2772                         )
2773                         ((?s:.+?))                                              # definition text = $3
2774                         (?= \n+                                                 # stop at next definition mark,
2775                                 (?:                                                     # next term or end of text
2776                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
2777                                         <dt> | \z
2778                                 )
2779                         )
2780                         }xm',
2781                         array($this, '_processDefListItems_callback_dd'), $list_str);
2782
2783                 return $list_str;
2784         }
2785         protected function _processDefListItems_callback_dt($matches) {
2786                 $terms = explode("\n", trim($matches[1]));
2787                 $text = '';
2788                 foreach ($terms as $term) {
2789                         $term = $this->runSpanGamut(trim($term));
2790                         $text .= "\n<dt>" . $term . "</dt>";
2791                 }
2792                 return $text . "\n";
2793         }
2794         protected function _processDefListItems_callback_dd($matches) {
2795                 $leading_line   = $matches[1];
2796                 $marker_space   = $matches[2];
2797                 $def                    = $matches[3];
2798
2799                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2800                         # Replace marker with the appropriate whitespace indentation
2801                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2802                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2803                         $def = "\n". $def ."\n";
2804                 }
2805                 else {
2806                         $def = rtrim($def);
2807                         $def = $this->runSpanGamut($this->outdent($def));
2808                 }
2809
2810                 return "\n<dd>" . $def . "</dd>\n";
2811         }
2812
2813
2814         protected function doFencedCodeBlocks($text) {
2815         #
2816         # Adding the fenced code block syntax to regular Markdown:
2817         #
2818         # ~~~
2819         # Code block
2820         # ~~~
2821         #
2822                 $less_than_tab = $this->tab_width;
2823
2824                 $text = preg_replace_callback('{
2825                                 (?:\n|\A)
2826                                 # 1: Opening marker
2827                                 (
2828                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2829                                 )
2830                                 [ ]*
2831                                 (?:
2832                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2833                                 |
2834                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2835                                 )?
2836                                 [ ]* \n # Whitespace and newline following marker.
2837
2838                                 # 4: Content
2839                                 (
2840                                         (?>
2841                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2842                                                 .*\n+
2843                                         )+
2844                                 )
2845
2846                                 # Closing marker.
2847                                 \1 [ ]* (?= \n )
2848                         }xm',
2849                         array($this, '_doFencedCodeBlocks_callback'), $text);
2850
2851                 return $text;
2852         }
2853         protected function _doFencedCodeBlocks_callback($matches) {
2854                 $classname =& $matches[2];
2855                 $attrs     =& $matches[3];
2856                 $codeblock = $matches[4];
2857                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2858                 $codeblock = preg_replace_callback('/^\n+/',
2859                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
2860
2861                 if ($classname != "") {
2862                         if ($classname{0} == '.')
2863                                 $classname = substr($classname, 1);
2864                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2865                 } else {
2866                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2867                 }
2868                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2869                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2870                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2871
2872                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2873         }
2874         protected function _doFencedCodeBlocks_newlines($matches) {
2875                 return str_repeat("<br$this->empty_element_suffix",
2876                         strlen($matches[0]));
2877         }
2878
2879
2880         #
2881         # Redefining emphasis markers so that emphasis by underscore does not
2882         # work in the middle of a word.
2883         #
2884         protected $em_relist = array(
2885                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
2886                 '*' => '(?<![\s*])\*(?!\*)',
2887                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
2888                 );
2889         protected $strong_relist = array(
2890                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
2891                 '**' => '(?<![\s*])\*\*(?!\*)',
2892                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
2893                 );
2894         protected $em_strong_relist = array(
2895                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
2896                 '***' => '(?<![\s*])\*\*\*(?!\*)',
2897                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
2898                 );
2899
2900
2901         protected function formParagraphs($text) {
2902         #
2903         #       Params:
2904         #               $text - string to process with html <p> tags
2905         #
2906                 # Strip leading and trailing lines:
2907                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2908
2909                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2910
2911                 #
2912                 # Wrap <p> tags and unhashify HTML blocks
2913                 #
2914                 foreach ($grafs as $key => $value) {
2915                         $value = trim($this->runSpanGamut($value));
2916
2917                         # Check if this should be enclosed in a paragraph.
2918                         # Clean tag hashes & block tag hashes are left alone.
2919                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2920
2921                         if ($is_p) {
2922                                 $value = "<p>$value</p>";
2923                         }
2924                         $grafs[$key] = $value;
2925                 }
2926
2927                 # Join grafs in one text, then unhash HTML tags.
2928                 $text = implode("\n\n", $grafs);
2929
2930                 # Finish by removing any tag hashes still present in $text.
2931                 $text = $this->unhash($text);
2932
2933                 return $text;
2934         }
2935
2936
2937         ### Footnotes
2938
2939         protected function stripFootnotes($text) {
2940         #
2941         # Strips link definitions from text, stores the URLs and titles in
2942         # hash references.
2943         #
2944                 $less_than_tab = $this->tab_width - 1;
2945
2946                 # Link defs are in the form: [^id]: url "optional title"
2947                 $text = preg_replace_callback('{
2948                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2949                           [ ]*
2950                           \n?                                   # maybe *one* newline
2951                         (                                               # text = $2 (no blank lines allowed)
2952                                 (?:
2953                                         .+                              # actual text
2954                                 |
2955                                         \n                              # newlines but
2956                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
2957                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2958                                                                         # by non-indented content
2959                                 )*
2960                         )
2961                         }xm',
2962                         array($this, '_stripFootnotes_callback'),
2963                         $text);
2964                 return $text;
2965         }
2966         protected function _stripFootnotes_callback($matches) {
2967                 $note_id = $this->fn_id_prefix . $matches[1];
2968                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2969                 return ''; # String that will replace the block
2970         }
2971
2972
2973         protected function doFootnotes($text) {
2974         #
2975         # Replace footnote references in $text [^id] with a special text-token
2976         # which will be replaced by the actual footnote marker in appendFootnotes.
2977         #
2978                 if (!$this->in_anchor) {
2979                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2980                 }
2981                 return $text;
2982         }
2983
2984
2985         protected function appendFootnotes($text) {
2986         #
2987         # Append footnote list to text.
2988         #
2989                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2990                         array($this, '_appendFootnotes_callback'), $text);
2991
2992                 if (!empty($this->footnotes_ordered)) {
2993                         $text .= "\n\n";
2994                         $text .= "<div class=\"footnotes\">\n";
2995                         $text .= "<hr". $this->empty_element_suffix ."\n";
2996                         $text .= "<ol>\n\n";
2997
2998                         $attr = "";
2999                         if ($this->fn_backlink_class != "") {
3000                                 $class = $this->fn_backlink_class;
3001                                 $class = $this->encodeAttribute($class);
3002                                 $attr .= " class=\"$class\"";
3003                         }
3004                         if ($this->fn_backlink_title != "") {
3005                                 $title = $this->fn_backlink_title;
3006                                 $title = $this->encodeAttribute($title);
3007                                 $attr .= " title=\"$title\"";
3008                         }
3009                         $num = 0;
3010
3011                         while (!empty($this->footnotes_ordered)) {
3012                                 $footnote = reset($this->footnotes_ordered);
3013                                 $note_id = key($this->footnotes_ordered);
3014                                 unset($this->footnotes_ordered[$note_id]);
3015                                 $ref_count = $this->footnotes_ref_count[$note_id];
3016                                 unset($this->footnotes_ref_count[$note_id]);
3017                                 unset($this->footnotes[$note_id]);
3018
3019                                 $footnote .= "\n"; # Need to append newline before parsing.
3020                                 $footnote = $this->runBlockGamut("$footnote\n");
3021                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
3022                                         array($this, '_appendFootnotes_callback'), $footnote);
3023
3024                                 $attr = str_replace("%%", ++$num, $attr);
3025                                 $note_id = $this->encodeAttribute($note_id);
3026
3027                                 # Prepare backlink, multiple backlinks if multiple references
3028                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
3029                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
3030                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
3031                                 }
3032                                 # Add backlink to last paragraph; create new paragraph if needed.
3033                                 if (preg_match('{</p>$}', $footnote)) {
3034                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
3035                                 } else {
3036                                         $footnote .= "\n\n<p>$backlink</p>";
3037                                 }
3038
3039                                 $text .= "<li id=\"fn:$note_id\">\n";
3040                                 $text .= $footnote . "\n";
3041                                 $text .= "</li>\n\n";
3042                         }
3043
3044                         $text .= "</ol>\n";
3045                         $text .= "</div>";
3046                 }
3047                 return $text;
3048         }
3049         protected function _appendFootnotes_callback($matches) {
3050                 $node_id = $this->fn_id_prefix . $matches[1];
3051
3052                 # Create footnote marker only if it has a corresponding footnote *and*
3053                 # the footnote hasn't been used by another marker.
3054                 if (isset($this->footnotes[$node_id])) {
3055                         $num =& $this->footnotes_numbers[$node_id];
3056                         if (!isset($num)) {
3057                                 # Transfer footnote content to the ordered list and give it its
3058                                 # number
3059                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3060                                 $this->footnotes_ref_count[$node_id] = 1;
3061                                 $num = $this->footnote_counter++;
3062                                 $ref_count_mark = '';
3063                         } else {
3064                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3065                         }
3066
3067                         $attr = "";
3068                         if ($this->fn_link_class != "") {
3069                                 $class = $this->fn_link_class;
3070                                 $class = $this->encodeAttribute($class);
3071                                 $attr .= " class=\"$class\"";
3072                         }
3073                         if ($this->fn_link_title != "") {
3074                                 $title = $this->fn_link_title;
3075                                 $title = $this->encodeAttribute($title);
3076                                 $attr .= " title=\"$title\"";
3077                         }
3078
3079                         $attr = str_replace("%%", $num, $attr);
3080                         $node_id = $this->encodeAttribute($node_id);
3081
3082                         return
3083                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
3084                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
3085                                 "</sup>";
3086                 }
3087
3088                 return "[^".$matches[1]."]";
3089         }
3090
3091
3092         ### Abbreviations ###
3093
3094         protected function stripAbbreviations($text) {
3095         #
3096         # Strips abbreviations from text, stores titles in hash references.
3097         #
3098                 $less_than_tab = $this->tab_width - 1;
3099
3100                 # Link defs are in the form: [id]*: url "optional title"
3101                 $text = preg_replace_callback('{
3102                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
3103                         (.*)                                    # text = $2 (no blank lines allowed)
3104                         }xm',
3105                         array($this, '_stripAbbreviations_callback'),
3106                         $text);
3107                 return $text;
3108         }
3109         protected function _stripAbbreviations_callback($matches) {
3110                 $abbr_word = $matches[1];
3111                 $abbr_desc = $matches[2];
3112                 if ($this->abbr_word_re)
3113                         $this->abbr_word_re .= '|';
3114                 $this->abbr_word_re .= preg_quote($abbr_word);
3115                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3116                 return ''; # String that will replace the block
3117         }
3118
3119
3120         protected function doAbbreviations($text) {
3121         #
3122         # Find defined abbreviations in text and wrap them in <abbr> elements.
3123         #
3124                 if ($this->abbr_word_re) {
3125                         // cannot use the /x modifier because abbr_word_re may
3126                         // contain significant spaces:
3127                         $text = preg_replace_callback('{'.
3128                                 '(?<![\w\x1A])'.
3129                                 '(?:'.$this->abbr_word_re.')'.
3130                                 '(?![\w\x1A])'.
3131                                 '}',
3132                                 array($this, '_doAbbreviations_callback'), $text);
3133                 }
3134                 return $text;
3135         }
3136         protected function _doAbbreviations_callback($matches) {
3137                 $abbr = $matches[0];
3138                 if (isset($this->abbr_desciptions[$abbr])) {
3139                         $desc = $this->abbr_desciptions[$abbr];
3140                         if (empty($desc)) {
3141                                 return $this->hashPart("<abbr>$abbr</abbr>");
3142                         } else {
3143                                 $desc = $this->encodeAttribute($desc);
3144                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3145                         }
3146                 } else {
3147                         return $matches[0];
3148                 }
3149         }
3150
3151 }