extlib/Michelf/MarkdownExtra.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown Extra
   6 # Copyright (c) 2004-2015 Michel Fortin
   7 # <https://michelf.ca/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <https://daringfireball.net/projects/markdown/>
  12 #
  13 namespace Michelf;
  14
  15
  16 #
  17 # Markdown Extra Parser Class
  18 #
  19
  20 class MarkdownExtra extends \Michelf\Markdown {
  21
  22         ### Configuration Variables ###
  23
  24         # Prefix for footnote ids.
  25         public $fn_id_prefix = "";
  26
  27         # Optional title attribute for footnote links and backlinks.
  28         public $fn_link_title = "";
  29         public $fn_backlink_title = "";
  30
  31         # Optional class attribute for footnote links and backlinks.
  32         public $fn_link_class = "footnote-ref";
  33         public $fn_backlink_class = "footnote-backref";
  34
  35         # Content to be displayed within footnote backlinks. The default is '↩';
  36         # the U+FE0E on the end is a Unicode variant selector used to prevent iOS
  37         # from displaying the arrow character as an emoji.
  38         public $fn_backlink_html = '&#8617;&#xFE0E;';
  39
  40         # Class name for table cell alignment (%% replaced left/center/right)
  41         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  42         # If empty, the align attribute is used instead of a class name.
  43         public $table_align_class_tmpl = '';
  44
  45         # Optional class prefix for fenced code block.
  46         public $code_class_prefix = "";
  47         # Class attribute for code blocks goes on the `code` tag;
  48         # setting this to true will put attributes on the `pre` tag instead.
  49         public $code_attr_on_pre = false;
  50
  51         # Predefined abbreviations.
  52         public $predef_abbr = array();
  53
  54         ### Parser Implementation ###
  55
  56         public function __construct() {
  57         #
  58         # Constructor function. Initialize the parser object.
  59         #
  60                 # Add extra escapable characters before parent constructor
  61                 # initialize the table.
  62                 $this->escape_chars .= ':|';
  63
  64                 # Insert extra document, block, and span transformations.
  65                 # Parent constructor will do the sorting.
  66                 $this->document_gamut += array(
  67                         "doFencedCodeBlocks" => 5,
  68                         "stripFootnotes"     => 15,
  69                         "stripAbbreviations" => 25,
  70                         "appendFootnotes"    => 50,
  71                         );
  72                 $this->block_gamut += array(
  73                         "doFencedCodeBlocks" => 5,
  74                         "doTables"           => 15,
  75                         "doDefLists"         => 45,
  76                         );
  77                 $this->span_gamut += array(
  78                         "doFootnotes"        => 5,
  79                         "doAbbreviations"    => 70,
  80                         );
  81
  82                 $this->enhanced_ordered_list = true;
  83                 parent::__construct();
  84         }
  85
  86
  87         # Extra variables used during extra transformations.
  88         protected $footnotes = array();
  89         protected $footnotes_ordered = array();
  90         protected $footnotes_ref_count = array();
  91         protected $footnotes_numbers = array();
  92         protected $abbr_desciptions = array();
  93         protected $abbr_word_re = '';
  94
  95         # Give the current footnote number.
  96         protected $footnote_counter = 1;
  97
  98
  99         protected function setup() {
 100         #
 101         # Setting up Extra-specific variables.
 102         #
 103                 parent::setup();
 104
 105                 $this->footnotes = array();
 106                 $this->footnotes_ordered = array();
 107                 $this->footnotes_ref_count = array();
 108                 $this->footnotes_numbers = array();
 109                 $this->abbr_desciptions = array();
 110                 $this->abbr_word_re = '';
 111                 $this->footnote_counter = 1;
 112
 113                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
 114                         if ($this->abbr_word_re)
 115                                 $this->abbr_word_re .= '|';
 116                         $this->abbr_word_re .= preg_quote($abbr_word);
 117                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
 118                 }
 119         }
 120
 121         protected function teardown() {
 122         #
 123         # Clearing Extra-specific variables.
 124         #
 125                 $this->footnotes = array();
 126                 $this->footnotes_ordered = array();
 127                 $this->footnotes_ref_count = array();
 128                 $this->footnotes_numbers = array();
 129                 $this->abbr_desciptions = array();
 130                 $this->abbr_word_re = '';
 131
 132                 parent::teardown();
 133         }
 134
 135
 136         ### Extra Attribute Parser ###
 137
 138         # Expression to use to catch attributes (includes the braces)
 139         protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
 140         # Expression to use when parsing in a context when no capture is desired
 141         protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
 142
 143         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
 144         #
 145         # Parse attributes caught by the $this->id_class_attr_catch_re expression
 146         # and return the HTML-formatted list of attributes.
 147         #
 148         # Currently supported attributes are .class and #id.
 149         #
 150         # In addition, this method also supports supplying a default Id value,
 151         # which will be used to populate the id attribute in case it was not
 152         # overridden.
 153                 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
 154
 155                 # Split on components
 156                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
 157                 $elements = $matches[0];
 158
 159                 # handle classes and ids (only first id taken into account)
 160                 $attributes = array();
 161                 $id = false;
 162                 foreach ($elements as $element) {
 163                         if ($element{0} == '.') {
 164                                 $classes[] = substr($element, 1);
 165                         } else if ($element{0} == '#') {
 166                                 if ($id === false) $id = substr($element, 1);
 167                         } else if (strpos($element, '=') > 0) {
 168                                 $parts = explode('=', $element, 2);
 169                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
 170                         }
 171                 }
 172
 173                 if (!$id) $id = $defaultIdValue;
 174
 175                 # compose attributes as string
 176                 $attr_str = "";
 177                 if (!empty($id)) {
 178                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
 179                 }
 180                 if (!empty($classes)) {
 181                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
 182                 }
 183                 if (!$this->no_markup && !empty($attributes)) {
 184                         $attr_str .= ' '.implode(" ", $attributes);
 185                 }
 186                 return $attr_str;
 187         }
 188
 189
 190         protected function stripLinkDefinitions($text) {
 191         #
 192         # Strips link definitions from text, stores the URLs and titles in
 193         # hash references.
 194         #
 195                 $less_than_tab = $this->tab_width - 1;
 196
 197                 # Link defs are in the form: ^[id]: url "optional title"
 198                 $text = preg_replace_callback('{
 199                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 200                                                           [ ]*
 201                                                           \n?                           # maybe *one* newline
 202                                                           [ ]*
 203                                                         (?:
 204                                                           <(.+?)>                       # url = $2
 205                                                         |
 206                                                           (\S+?)                        # url = $3
 207                                                         )
 208                                                           [ ]*
 209                                                           \n?                           # maybe one newline
 210                                                           [ ]*
 211                                                         (?:
 212                                                                 (?<=\s)                 # lookbehind for whitespace
 213                                                                 ["(]
 214                                                                 (.*?)                   # title = $4
 215                                                                 [")]
 216                                                                 [ ]*
 217                                                         )?      # title is optional
 218                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
 219                                                         (?:\n+|\Z)
 220                         }xm',
 221                         array($this, '_stripLinkDefinitions_callback'),
 222                         $text);
 223                 return $text;
 224         }
 225         protected function _stripLinkDefinitions_callback($matches) {
 226                 $link_id = strtolower($matches[1]);
 227                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 228                 $this->urls[$link_id] = $url;
 229                 $this->titles[$link_id] =& $matches[4];
 230                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
 231                 return ''; # String that will replace the block
 232         }
 233
 234
 235         ### HTML Block Parser ###
 236
 237         # Tags that are always treated as block tags:
 238         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
 239
 240         # Tags treated as block tags only if the opening tag is alone on its line:
 241         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
 242
 243         # Tags where markdown="1" default to span mode:
 244         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
 245
 246         # Tags which must not have their contents modified, no matter where
 247         # they appear:
 248         protected $clean_tags_re = 'script|style|math|svg';
 249
 250         # Tags that do not need to be closed.
 251         protected $auto_close_tags_re = 'hr|img|param|source|track';
 252
 253
 254         protected function hashHTMLBlocks($text) {
 255         #
 256         # Hashify HTML Blocks and "clean tags".
 257         #
 258         # We only want to do this for block-level HTML tags, such as headers,
 259         # lists, and tables. That's because we still want to wrap <p>s around
 260         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 261         # phrase emphasis, and spans. The list of tags we're looking for is
 262         # hard-coded.
 263         #
 264         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
 265         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
 266         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
 267         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
 268         # These two functions are calling each other. It's recursive!
 269         #
 270                 if ($this->no_markup)  return $text;
 271
 272                 #
 273                 # Call the HTML-in-Markdown hasher.
 274                 #
 275                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
 276
 277                 return $text;
 278         }
 279         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
 280                                                                                 $enclosing_tag_re = '', $span = false)
 281         {
 282         #
 283         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
 284         #
 285         # *   $indent is the number of space to be ignored when checking for code
 286         #     blocks. This is important because if we don't take the indent into
 287         #     account, something like this (which looks right) won't work as expected:
 288         #
 289         #     <div>
 290         #         <div markdown="1">
 291         #         Hello World.  <-- Is this a Markdown code block or text?
 292         #         </div>  <-- Is this a Markdown code block or a real tag?
 293         #     <div>
 294         #
 295         #     If you don't like this, just don't indent the tag on which
 296         #     you apply the markdown="1" attribute.
 297         #
 298         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
 299         #     tag with that name. Nested tags supported.
 300         #
 301         # *   If $span is true, text inside must treated as span. So any double
 302         #     newline will be replaced by a single newline so that it does not create
 303         #     paragraphs.
 304         #
 305         # Returns an array of that form: ( processed text , remaining text )
 306         #
 307                 if ($text === '') return array('', '');
 308
 309                 # Regex to check for the presense of newlines around a block tag.
 310                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
 311                 $newline_after_re =
 312                         '{
 313                                 ^                                               # Start of text following the tag.
 314                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
 315                                 [ ]*\n                                  # Must be followed by newline.
 316                         }xs';
 317
 318                 # Regex to match any tag.
 319                 $block_tag_re =
 320                         '{
 321                                 (                                       # $2: Capture whole tag.
 322                                         </?                                     # Any opening or closing tag.
 323                                                 (?>                             # Tag name.
 324                                                         '.$this->block_tags_re.'                        |
 325                                                         '.$this->context_block_tags_re.'        |
 326                                                         '.$this->clean_tags_re.'                |
 327                                                         (?!\s)'.$enclosing_tag_re.'
 328                                                 )
 329                                                 (?:
 330                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 331                                                         (?>
 332                                                                 ".*?"           |       # Double quotes (can contain `>`)
 333                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 334                                                                 .+?                             # Anything but quotes and `>`.
 335                                                         )*?
 336                                                 )?
 337                                         >                                       # End of tag.
 338                                 |
 339                                         <!--    .*?     -->     # HTML Comment
 340                                 |
 341                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 342                                 |
 343                                         <!\[CDATA\[.*?\]\]>     # CData Block
 344                                 '. ( !$span ? ' # If not in span.
 345                                 |
 346                                         # Indented code block
 347                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
 348                                         [ ]{'.($indent+4).'}[^\n]* \n
 349                                         (?>
 350                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
 351                                         )*
 352                                 |
 353                                         # Fenced code block marker
 354                                         (?<= ^ | \n )
 355                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
 356                                         [ ]*
 357                                         (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
 358                                         [ ]*
 359                                         (?: '.$this->id_class_attr_nocatch_re.' )? # extra attributes
 360                                         [ ]*
 361                                         (?= \n )
 362                                 ' : '' ). ' # End (if not is span).
 363                                 |
 364                                         # Code span marker
 365                                         # Note, this regex needs to go after backtick fenced
 366                                         # code blocks but it should also be kept outside of the
 367                                         # "if not in span" condition adding backticks to the parser
 368                                         `+
 369                                 )
 370                         }xs';
 371
 372
 373                 $depth = 0;             # Current depth inside the tag tree.
 374                 $parsed = "";   # Parsed text that will be returned.
 375
 376                 #
 377                 # Loop through every tag until we find the closing tag of the parent
 378                 # or loop until reaching the end of text if no parent tag specified.
 379                 #
 380                 do {
 381                         #
 382                         # Split the text using the first $tag_match pattern found.
 383                         # Text before  pattern will be first in the array, text after
 384                         # pattern will be at the end, and between will be any catches made
 385                         # by the pattern.
 386                         #
 387                         $parts = preg_split($block_tag_re, $text, 2,
 388                                                                 PREG_SPLIT_DELIM_CAPTURE);
 389
 390                         # If in Markdown span mode, add a empty-string span-level hash
 391                         # after each newline to prevent triggering any block element.
 392                         if ($span) {
 393                                 $void = $this->hashPart("", ':');
 394                                 $newline = "$void\n";
 395                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
 396                         }
 397
 398                         $parsed .= $parts[0]; # Text before current tag.
 399
 400                         # If end of $text has been reached. Stop loop.
 401                         if (count($parts) < 3) {
 402                                 $text = "";
 403                                 break;
 404                         }
 405
 406                         $tag  = $parts[1]; # Tag to handle.
 407                         $text = $parts[2]; # Remaining text after current tag.
 408                         $tag_re = preg_quote($tag); # For use in a regular expression.
 409
 410                         #
 411                         # Check for: Fenced code block marker.
 412                         # Note: need to recheck the whole tag to disambiguate backtick
 413                         # fences from code spans
 414                         #
 415                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
 416                                 # Fenced code block marker: find matching end marker.
 417                                 $fence_indent = strlen($capture[1]); # use captured indent in re
 418                                 $fence_re = $capture[2]; # use captured fence in re
 419                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
 420                                         $matches))
 421                                 {
 422                                         # End marker found: pass text unchanged until marker.
 423                                         $parsed .= $tag . $matches[0];
 424                                         $text = substr($text, strlen($matches[0]));
 425                                 }
 426                                 else {
 427                                         # No end marker: just skip it.
 428                                         $parsed .= $tag;
 429                                 }
 430                         }
 431                         #
 432                         # Check for: Indented code block.
 433                         #
 434                         else if ($tag{0} == "\n" || $tag{0} == " ") {
 435                                 # Indented code block: pass it unchanged, will be handled
 436                                 # later.
 437                                 $parsed .= $tag;
 438                         }
 439                         #
 440                         # Check for: Code span marker
 441                         # Note: need to check this after backtick fenced code blocks
 442                         #
 443                         else if ($tag{0} == "`") {
 444                                 # Find corresponding end marker.
 445                                 $tag_re = preg_quote($tag);
 446                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
 447                                         $text, $matches))
 448                                 {
 449                                         # End marker found: pass text unchanged until marker.
 450                                         $parsed .= $tag . $matches[0];
 451                                         $text = substr($text, strlen($matches[0]));
 452                                 }
 453                                 else {
 454                                         # Unmatched marker: just skip it.
 455                                         $parsed .= $tag;
 456                                 }
 457                         }
 458                         #
 459                         # Check for: Opening Block level tag or
 460                         #            Opening Context Block tag (like ins and del)
 461                         #               used as a block tag (tag is alone on it's line).
 462                         #
 463                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
 464                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
 465                                         preg_match($newline_before_re, $parsed) &&
 466                                         preg_match($newline_after_re, $text)    )
 467                                 )
 468                         {
 469                                 # Need to parse tag and following text using the HTML parser.
 470                                 list($block_text, $text) =
 471                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
 472
 473                                 # Make sure it stays outside of any paragraph by adding newlines.
 474                                 $parsed .= "\n\n$block_text\n\n";
 475                         }
 476                         #
 477                         # Check for: Clean tag (like script, math)
 478                         #            HTML Comments, processing instructions.
 479                         #
 480                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
 481                                 $tag{1} == '!' || $tag{1} == '?')
 482                         {
 483                                 # Need to parse tag and following text using the HTML parser.
 484                                 # (don't check for markdown attribute)
 485                                 list($block_text, $text) =
 486                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
 487
 488                                 $parsed .= $block_text;
 489                         }
 490                         #
 491                         # Check for: Tag with same name as enclosing tag.
 492                         #
 493                         else if ($enclosing_tag_re !== '' &&
 494                                 # Same name as enclosing tag.
 495                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
 496                         {
 497                                 #
 498                                 # Increase/decrease nested tag count.
 499                                 #
 500                                 if ($tag{1} == '/')                                             $depth--;
 501                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
 502
 503                                 if ($depth < 0) {
 504                                         #
 505                                         # Going out of parent element. Clean up and break so we
 506                                         # return to the calling function.
 507                                         #
 508                                         $text = $tag . $text;
 509                                         break;
 510                                 }
 511
 512                                 $parsed .= $tag;
 513                         }
 514                         else {
 515                                 $parsed .= $tag;
 516                         }
 517                 } while ($depth >= 0);
 518
 519                 return array($parsed, $text);
 520         }
 521         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
 522         #
 523         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
 524         #
 525         # *   Calls $hash_method to convert any blocks.
 526         # *   Stops when the first opening tag closes.
 527         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
 528         #     (it is not inside clean tags)
 529         #
 530         # Returns an array of that form: ( processed text , remaining text )
 531         #
 532                 if ($text === '') return array('', '');
 533
 534                 # Regex to match `markdown` attribute inside of a tag.
 535                 $markdown_attr_re = '
 536                         {
 537                                 \s*                     # Eat whitespace before the `markdown` attribute
 538                                 markdown
 539                                 \s*=\s*
 540                                 (?>
 541                                         (["\'])         # $1: quote delimiter
 542                                         (.*?)           # $2: attribute value
 543                                         \1                      # matching delimiter
 544                                 |
 545                                         ([^\s>]*)       # $3: unquoted attribute value
 546                                 )
 547                                 ()                              # $4: make $3 always defined (avoid warnings)
 548                         }xs';
 549
 550                 # Regex to match any tag.
 551                 $tag_re = '{
 552                                 (                                       # $2: Capture whole tag.
 553                                         </?                                     # Any opening or closing tag.
 554                                                 [\w:$]+                 # Tag name.
 555                                                 (?:
 556                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 557                                                         (?>
 558                                                                 ".*?"           |       # Double quotes (can contain `>`)
 559                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 560                                                                 .+?                             # Anything but quotes and `>`.
 561                                                         )*?
 562                                                 )?
 563                                         >                                       # End of tag.
 564                                 |
 565                                         <!--    .*?     -->     # HTML Comment
 566                                 |
 567                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 568                                 |
 569                                         <!\[CDATA\[.*?\]\]>     # CData Block
 570                                 )
 571                         }xs';
 572
 573                 $original_text = $text;         # Save original text in case of faliure.
 574
 575                 $depth          = 0;    # Current depth inside the tag tree.
 576                 $block_text     = "";   # Temporary text holder for current text.
 577                 $parsed         = "";   # Parsed text that will be returned.
 578
 579                 #
 580                 # Get the name of the starting tag.
 581                 # (This pattern makes $base_tag_name_re safe without quoting.)
 582                 #
 583                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
 584                         $base_tag_name_re = $matches[1];
 585
 586                 #
 587                 # Loop through every tag until we find the corresponding closing tag.
 588                 #
 589                 do {
 590                         #
 591                         # Split the text using the first $tag_match pattern found.
 592                         # Text before  pattern will be first in the array, text after
 593                         # pattern will be at the end, and between will be any catches made
 594                         # by the pattern.
 595                         #
 596                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
 597
 598                         if (count($parts) < 3) {
 599                                 #
 600                                 # End of $text reached with unbalenced tag(s).
 601                                 # In that case, we return original text unchanged and pass the
 602                                 # first character as filtered to prevent an infinite loop in the
 603                                 # parent function.
 604                                 #
 605                                 return array($original_text{0}, substr($original_text, 1));
 606                         }
 607
 608                         $block_text .= $parts[0]; # Text before current tag.
 609                         $tag         = $parts[1]; # Tag to handle.
 610                         $text        = $parts[2]; # Remaining text after current tag.
 611
 612                         #
 613                         # Check for: Auto-close tag (like <hr/>)
 614                         #                        Comments and Processing Instructions.
 615                         #
 616                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
 617                                 $tag{1} == '!' || $tag{1} == '?')
 618                         {
 619                                 # Just add the tag to the block as if it was text.
 620                                 $block_text .= $tag;
 621                         }
 622                         else {
 623                                 #
 624                                 # Increase/decrease nested tag count. Only do so if
 625                                 # the tag's name match base tag's.
 626                                 #
 627                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
 628                                         if ($tag{1} == '/')                                             $depth--;
 629                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
 630                                 }
 631
 632                                 #
 633                                 # Check for `markdown="1"` attribute and handle it.
 634                                 #
 635                                 if ($md_attr &&
 636                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
 637                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
 638                                 {
 639                                         # Remove `markdown` attribute from opening tag.
 640                                         $tag = preg_replace($markdown_attr_re, '', $tag);
 641
 642                                         # Check if text inside this tag must be parsed in span mode.
 643                                         $this->mode = $attr_m[2] . $attr_m[3];
 644                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
 645                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
 646
 647                                         # Calculate indent before tag.
 648                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
 649                                                 $strlen = $this->utf8_strlen;
 650                                                 $indent = $strlen($matches[1], 'UTF-8');
 651                                         } else {
 652                                                 $indent = 0;
 653                                         }
 654
 655                                         # End preceding block with this tag.
 656                                         $block_text .= $tag;
 657                                         $parsed .= $this->$hash_method($block_text);
 658
 659                                         # Get enclosing tag name for the ParseMarkdown function.
 660                                         # (This pattern makes $tag_name_re safe without quoting.)
 661                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
 662                                         $tag_name_re = $matches[1];
 663
 664                                         # Parse the content using the HTML-in-Markdown parser.
 665                                         list ($block_text, $text)
 666                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
 667                                                         $tag_name_re, $span_mode);
 668
 669                                         # Outdent markdown text.
 670                                         if ($indent > 0) {
 671                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
 672                                                                                                         $block_text);
 673                                         }
 674
 675                                         # Append tag content to parsed text.
 676                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
 677                                         else                            $parsed .= "$block_text";
 678
 679                                         # Start over with a new block.
 680                                         $block_text = "";
 681                                 }
 682                                 else $block_text .= $tag;
 683                         }
 684
 685                 } while ($depth > 0);
 686
 687                 #
 688                 # Hash last block text that wasn't processed inside the loop.
 689                 #
 690                 $parsed .= $this->$hash_method($block_text);
 691
 692                 return array($parsed, $text);
 693         }
 694
 695
 696         protected function hashClean($text) {
 697         #
 698         # Called whenever a tag must be hashed when a function inserts a "clean" tag
 699         # in $text, it passes through this function and is automaticaly escaped,
 700         # blocking invalid nested overlap.
 701         #
 702                 return $this->hashPart($text, 'C');
 703         }
 704
 705
 706         protected function doAnchors($text) {
 707         #
 708         # Turn Markdown link shortcuts into XHTML <a> tags.
 709         #
 710                 if ($this->in_anchor) return $text;
 711                 $this->in_anchor = true;
 712
 713                 #
 714                 # First, handle reference-style links: [link text] [id]
 715                 #
 716                 $text = preg_replace_callback('{
 717                         (                                       # wrap whole match in $1
 718                           \[
 719                                 ('.$this->nested_brackets_re.') # link text = $2
 720                           \]
 721
 722                           [ ]?                          # one optional space
 723                           (?:\n[ ]*)?           # one optional newline followed by spaces
 724
 725                           \[
 726                                 (.*?)           # id = $3
 727                           \]
 728                         )
 729                         }xs',
 730                         array($this, '_doAnchors_reference_callback'), $text);
 731
 732                 #
 733                 # Next, inline-style links: [link text](url "optional title")
 734                 #
 735                 $text = preg_replace_callback('{
 736                         (                               # wrap whole match in $1
 737                           \[
 738                                 ('.$this->nested_brackets_re.') # link text = $2
 739                           \]
 740                           \(                    # literal paren
 741                                 [ \n]*
 742                                 (?:
 743                                         <(.+?)> # href = $3
 744                                 |
 745                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 746                                 )
 747                                 [ \n]*
 748                                 (                       # $5
 749                                   ([\'"])       # quote char = $6
 750                                   (.*?)         # Title = $7
 751                                   \6            # matching quote
 752                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 753                                 )?                      # title is optional
 754                           \)
 755                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
 756                         )
 757                         }xs',
 758                         array($this, '_doAnchors_inline_callback'), $text);
 759
 760                 #
 761                 # Last, handle reference-style shortcuts: [link text]
 762                 # These must come last in case you've also got [link text][1]
 763                 # or [link text](/foo)
 764                 #
 765                 $text = preg_replace_callback('{
 766                         (                                       # wrap whole match in $1
 767                           \[
 768                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 769                           \]
 770                         )
 771                         }xs',
 772                         array($this, '_doAnchors_reference_callback'), $text);
 773
 774                 $this->in_anchor = false;
 775                 return $text;
 776         }
 777         protected function _doAnchors_reference_callback($matches) {
 778                 $whole_match =  $matches[1];
 779                 $link_text   =  $matches[2];
 780                 $link_id     =& $matches[3];
 781
 782                 if ($link_id == "") {
 783                         # for shortcut links like [this][] or [this].
 784                         $link_id = $link_text;
 785                 }
 786
 787                 # lower-case and turn embedded newlines into spaces
 788                 $link_id = strtolower($link_id);
 789                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 790
 791                 if (isset($this->urls[$link_id])) {
 792                         $url = $this->urls[$link_id];
 793                         $url = $this->encodeURLAttribute($url);
 794
 795                         $result = "<a href=\"$url\"";
 796                         if ( isset( $this->titles[$link_id] ) ) {
 797                                 $title = $this->titles[$link_id];
 798                                 $title = $this->encodeAttribute($title);
 799                                 $result .=  " title=\"$title\"";
 800                         }
 801                         if (isset($this->ref_attr[$link_id]))
 802                                 $result .= $this->ref_attr[$link_id];
 803
 804                         $link_text = $this->runSpanGamut($link_text);
 805                         $result .= ">$link_text</a>";
 806                         $result = $this->hashPart($result);
 807                 }
 808                 else {
 809                         $result = $whole_match;
 810                 }
 811                 return $result;
 812         }
 813         protected function _doAnchors_inline_callback($matches) {
 814                 $whole_match    =  $matches[1];
 815                 $link_text              =  $this->runSpanGamut($matches[2]);
 816                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 817                 $title                  =& $matches[7];
 818                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
 819
 820                 // if the URL was of the form <s p a c e s> it got caught by the HTML
 821                 // tag parser and hashed. Need to reverse the process before using the URL.
 822                 $unhashed = $this->unhash($url);
 823                 if ($unhashed != $url)
 824                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 825
 826                 $url = $this->encodeURLAttribute($url);
 827
 828                 $result = "<a href=\"$url\"";
 829                 if (isset($title)) {
 830                         $title = $this->encodeAttribute($title);
 831                         $result .=  " title=\"$title\"";
 832                 }
 833                 $result .= $attr;
 834
 835                 $link_text = $this->runSpanGamut($link_text);
 836                 $result .= ">$link_text</a>";
 837
 838                 return $this->hashPart($result);
 839         }
 840
 841
 842         protected function doImages($text) {
 843         #
 844         # Turn Markdown image shortcuts into <img> tags.
 845         #
 846                 #
 847                 # First, handle reference-style labeled images: ![alt text][id]
 848                 #
 849                 $text = preg_replace_callback('{
 850                         (                               # wrap whole match in $1
 851                           !\[
 852                                 ('.$this->nested_brackets_re.')         # alt text = $2
 853                           \]
 854
 855                           [ ]?                          # one optional space
 856                           (?:\n[ ]*)?           # one optional newline followed by spaces
 857
 858                           \[
 859                                 (.*?)           # id = $3
 860                           \]
 861
 862                         )
 863                         }xs',
 864                         array($this, '_doImages_reference_callback'), $text);
 865
 866                 #
 867                 # Next, handle inline images:  ![alt text](url "optional title")
 868                 # Don't forget: encode * and _
 869                 #
 870                 $text = preg_replace_callback('{
 871                         (                               # wrap whole match in $1
 872                           !\[
 873                                 ('.$this->nested_brackets_re.')         # alt text = $2
 874                           \]
 875                           \s?                   # One optional whitespace character
 876                           \(                    # literal paren
 877                                 [ \n]*
 878                                 (?:
 879                                         <(\S*)> # src url = $3
 880                                 |
 881                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 882                                 )
 883                                 [ \n]*
 884                                 (                       # $5
 885                                   ([\'"])       # quote char = $6
 886                                   (.*?)         # title = $7
 887                                   \6            # matching quote
 888                                   [ \n]*
 889                                 )?                      # title is optional
 890                           \)
 891                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
 892                         )
 893                         }xs',
 894                         array($this, '_doImages_inline_callback'), $text);
 895
 896                 return $text;
 897         }
 898         protected function _doImages_reference_callback($matches) {
 899                 $whole_match = $matches[1];
 900                 $alt_text    = $matches[2];
 901                 $link_id     = strtolower($matches[3]);
 902
 903                 if ($link_id == "") {
 904                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 905                 }
 906
 907                 $alt_text = $this->encodeAttribute($alt_text);
 908                 if (isset($this->urls[$link_id])) {
 909                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 910                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 911                         if (isset($this->titles[$link_id])) {
 912                                 $title = $this->titles[$link_id];
 913                                 $title = $this->encodeAttribute($title);
 914                                 $result .=  " title=\"$title\"";
 915                         }
 916                         if (isset($this->ref_attr[$link_id]))
 917                                 $result .= $this->ref_attr[$link_id];
 918                         $result .= $this->empty_element_suffix;
 919                         $result = $this->hashPart($result);
 920                 }
 921                 else {
 922                         # If there's no such link ID, leave intact:
 923                         $result = $whole_match;
 924                 }
 925
 926                 return $result;
 927         }
 928         protected function _doImages_inline_callback($matches) {
 929                 $whole_match    = $matches[1];
 930                 $alt_text               = $matches[2];
 931                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 932                 $title                  =& $matches[7];
 933                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
 934
 935                 $alt_text = $this->encodeAttribute($alt_text);
 936                 $url = $this->encodeURLAttribute($url);
 937                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 938                 if (isset($title)) {
 939                         $title = $this->encodeAttribute($title);
 940                         $result .=  " title=\"$title\""; # $title already quoted
 941                 }
 942                 $result .= $attr;
 943                 $result .= $this->empty_element_suffix;
 944
 945                 return $this->hashPart($result);
 946         }
 947
 948
 949         protected function doHeaders($text) {
 950         #
 951         # Redefined to add id and class attribute support.
 952         #
 953                 # Setext-style headers:
 954                 #         Header 1  {#header1}
 955                 #         ========
 956                 #
 957                 #         Header 2  {#header2 .class1 .class2}
 958                 #         --------
 959                 #
 960                 $text = preg_replace_callback(
 961                         '{
 962                                 (^.+?)                                                          # $1: Header text
 963                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 964                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
 965                         }mx',
 966                         array($this, '_doHeaders_callback_setext'), $text);
 967
 968                 # atx-style headers:
 969                 #       # Header 1        {#header1}
 970                 #       ## Header 2       {#header2}
 971                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
 972                 #       ...
 973                 #       ###### Header 6   {.class2}
 974                 #
 975                 $text = preg_replace_callback('{
 976                                 ^(\#{1,6})      # $1 = string of #\'s
 977                                 [ ]*
 978                                 (.+?)           # $2 = Header text
 979                                 [ ]*
 980                                 \#*                     # optional closing #\'s (not counted)
 981                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 982                                 [ ]*
 983                                 \n+
 984                         }xm',
 985                         array($this, '_doHeaders_callback_atx'), $text);
 986
 987                 return $text;
 988         }
 989         protected function _doHeaders_callback_setext($matches) {
 990                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
 991                         return $matches[0];
 992
 993                 $level = $matches[3]{0} == '=' ? 1 : 2;
 994
 995                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
 996
 997                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
 998                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
 999                 return "\n" . $this->hashBlock($block) . "\n\n";
1000         }
1001         protected function _doHeaders_callback_atx($matches) {
1002                 $level = strlen($matches[1]);
1003
1004                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1005                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1006                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1007                 return "\n" . $this->hashBlock($block) . "\n\n";
1008         }
1009
1010
1011         protected function doTables($text) {
1012         #
1013         # Form HTML tables.
1014         #
1015                 $less_than_tab = $this->tab_width - 1;
1016                 #
1017                 # Find tables with leading pipe.
1018                 #
1019                 #       | Header 1 | Header 2
1020                 #       | -------- | --------
1021                 #       | Cell 1   | Cell 2
1022                 #       | Cell 3   | Cell 4
1023                 #
1024                 $text = preg_replace_callback('
1025                         {
1026                                 ^                                                       # Start of a line
1027                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1028                                 [|]                                                     # Optional leading pipe (present)
1029                                 (.+) \n                                         # $1: Header row (at least one pipe)
1030
1031                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1032                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1033
1034                                 (                                                       # $3: Cells
1035                                         (?>
1036                                                 [ ]*                            # Allowed whitespace.
1037                                                 [|] .* \n                       # Row content.
1038                                         )*
1039                                 )
1040                                 (?=\n|\Z)                                       # Stop at final double newline.
1041                         }xm',
1042                         array($this, '_doTable_leadingPipe_callback'), $text);
1043
1044                 #
1045                 # Find tables without leading pipe.
1046                 #
1047                 #       Header 1 | Header 2
1048                 #       -------- | --------
1049                 #       Cell 1   | Cell 2
1050                 #       Cell 3   | Cell 4
1051                 #
1052                 $text = preg_replace_callback('
1053                         {
1054                                 ^                                                       # Start of a line
1055                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1056                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1057
1058                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1059                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1060
1061                                 (                                                       # $3: Cells
1062                                         (?>
1063                                                 .* [|] .* \n            # Row content
1064                                         )*
1065                                 )
1066                                 (?=\n|\Z)                                       # Stop at final double newline.
1067                         }xm',
1068                         array($this, '_DoTable_callback'), $text);
1069
1070                 return $text;
1071         }
1072         protected function _doTable_leadingPipe_callback($matches) {
1073                 $head           = $matches[1];
1074                 $underline      = $matches[2];
1075                 $content        = $matches[3];
1076
1077                 # Remove leading pipe for each row.
1078                 $content        = preg_replace('/^ *[|]/m', '', $content);
1079
1080                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1081         }
1082         protected function _doTable_makeAlignAttr($alignname)
1083         {
1084                 if (empty($this->table_align_class_tmpl))
1085                         return " align=\"$alignname\"";
1086
1087                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1088                 return " class=\"$classname\"";
1089         }
1090         protected function _doTable_callback($matches) {
1091                 $head           = $matches[1];
1092                 $underline      = $matches[2];
1093                 $content        = $matches[3];
1094
1095                 # Remove any tailing pipes for each line.
1096                 $head           = preg_replace('/[|] *$/m', '', $head);
1097                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1098                 $content        = preg_replace('/[|] *$/m', '', $content);
1099
1100                 # Reading alignement from header underline.
1101                 $separators     = preg_split('/ *[|] */', $underline);
1102                 foreach ($separators as $n => $s) {
1103                         if (preg_match('/^ *-+: *$/', $s))
1104                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1105                         else if (preg_match('/^ *:-+: *$/', $s))
1106                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1107                         else if (preg_match('/^ *:-+ *$/', $s))
1108                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1109                         else
1110                                 $attr[$n] = '';
1111                 }
1112
1113                 # Parsing span elements, including code spans, character escapes,
1114                 # and inline HTML tags, so that pipes inside those gets ignored.
1115                 $head           = $this->parseSpan($head);
1116                 $headers        = preg_split('/ *[|] */', $head);
1117                 $col_count      = count($headers);
1118                 $attr       = array_pad($attr, $col_count, '');
1119
1120                 # Write column headers.
1121                 $text = "<table>\n";
1122                 $text .= "<thead>\n";
1123                 $text .= "<tr>\n";
1124                 foreach ($headers as $n => $header)
1125                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1126                 $text .= "</tr>\n";
1127                 $text .= "</thead>\n";
1128
1129                 # Split content by row.
1130                 $rows = explode("\n", trim($content, "\n"));
1131
1132                 $text .= "<tbody>\n";
1133                 foreach ($rows as $row) {
1134                         # Parsing span elements, including code spans, character escapes,
1135                         # and inline HTML tags, so that pipes inside those gets ignored.
1136                         $row = $this->parseSpan($row);
1137
1138                         # Split row by cell.
1139                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1140                         $row_cells = array_pad($row_cells, $col_count, '');
1141
1142                         $text .= "<tr>\n";
1143                         foreach ($row_cells as $n => $cell)
1144                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1145                         $text .= "</tr>\n";
1146                 }
1147                 $text .= "</tbody>\n";
1148                 $text .= "</table>";
1149
1150                 return $this->hashBlock($text) . "\n";
1151         }
1152
1153
1154         protected function doDefLists($text) {
1155         #
1156         # Form HTML definition lists.
1157         #
1158                 $less_than_tab = $this->tab_width - 1;
1159
1160                 # Re-usable pattern to match any entire dl list:
1161                 $whole_list_re = '(?>
1162                         (                                                               # $1 = whole list
1163                           (                                                             # $2
1164                                 [ ]{0,'.$less_than_tab.'}
1165                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1166                                 \n?
1167                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1168                           )
1169                           (?s:.+?)
1170                           (                                                             # $4
1171                                   \z
1172                                 |
1173                                   \n{2,}
1174                                   (?=\S)
1175                                   (?!                                           # Negative lookahead for another term
1176                                         [ ]{0,'.$less_than_tab.'}
1177                                         (?: \S.*\n )+?                  # defined term
1178                                         \n?
1179                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1180                                   )
1181                                   (?!                                           # Negative lookahead for another definition
1182                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1183                                   )
1184                           )
1185                         )
1186                 )'; // mx
1187
1188                 $text = preg_replace_callback('{
1189                                 (?>\A\n?|(?<=\n\n))
1190                                 '.$whole_list_re.'
1191                         }mx',
1192                         array($this, '_doDefLists_callback'), $text);
1193
1194                 return $text;
1195         }
1196         protected function _doDefLists_callback($matches) {
1197                 # Re-usable patterns to match list item bullets and number markers:
1198                 $list = $matches[1];
1199
1200                 # Turn double returns into triple returns, so that we can make a
1201                 # paragraph for the last item in a list, if necessary:
1202                 $result = trim($this->processDefListItems($list));
1203                 $result = "<dl>\n" . $result . "\n</dl>";
1204                 return $this->hashBlock($result) . "\n\n";
1205         }
1206
1207
1208         protected function processDefListItems($list_str) {
1209         #
1210         #       Process the contents of a single definition list, splitting it
1211         #       into individual term and definition list items.
1212         #
1213                 $less_than_tab = $this->tab_width - 1;
1214
1215                 # trim trailing blank lines:
1216                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1217
1218                 # Process definition terms.
1219                 $list_str = preg_replace_callback('{
1220                         (?>\A\n?|\n\n+)                                 # leading line
1221                         (                                                               # definition terms = $1
1222                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
1223                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
1224                                                                                         #   mark (colon) or more whitespace.
1225                                 (?> \S.* \n)+?                          # actual term (not whitespace).
1226                         )
1227                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
1228                                                                                         #   with a definition mark.
1229                         }xm',
1230                         array($this, '_processDefListItems_callback_dt'), $list_str);
1231
1232                 # Process actual definitions.
1233                 $list_str = preg_replace_callback('{
1234                         \n(\n+)?                                                # leading line = $1
1235                         (                                                               # marker space = $2
1236                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
1237                                 \:[ ]+                                          # definition mark (colon)
1238                         )
1239                         ((?s:.+?))                                              # definition text = $3
1240                         (?= \n+                                                 # stop at next definition mark,
1241                                 (?:                                                     # next term or end of text
1242                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
1243                                         <dt> | \z
1244                                 )
1245                         )
1246                         }xm',
1247                         array($this, '_processDefListItems_callback_dd'), $list_str);
1248
1249                 return $list_str;
1250         }
1251         protected function _processDefListItems_callback_dt($matches) {
1252                 $terms = explode("\n", trim($matches[1]));
1253                 $text = '';
1254                 foreach ($terms as $term) {
1255                         $term = $this->runSpanGamut(trim($term));
1256                         $text .= "\n<dt>" . $term . "</dt>";
1257                 }
1258                 return $text . "\n";
1259         }
1260         protected function _processDefListItems_callback_dd($matches) {
1261                 $leading_line   = $matches[1];
1262                 $marker_space   = $matches[2];
1263                 $def                    = $matches[3];
1264
1265                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1266                         # Replace marker with the appropriate whitespace indentation
1267                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1268                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1269                         $def = "\n". $def ."\n";
1270                 }
1271                 else {
1272                         $def = rtrim($def);
1273                         $def = $this->runSpanGamut($this->outdent($def));
1274                 }
1275
1276                 return "\n<dd>" . $def . "</dd>\n";
1277         }
1278
1279
1280         protected function doFencedCodeBlocks($text) {
1281         #
1282         # Adding the fenced code block syntax to regular Markdown:
1283         #
1284         # ~~~
1285         # Code block
1286         # ~~~
1287         #
1288                 $less_than_tab = $this->tab_width;
1289
1290                 $text = preg_replace_callback('{
1291                                 (?:\n|\A)
1292                                 # 1: Opening marker
1293                                 (
1294                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1295                                 )
1296                                 [ ]*
1297                                 (?:
1298                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1299                                 )?
1300                                 [ ]*
1301                                 (?:
1302                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1303                                 )?
1304                                 [ ]* \n # Whitespace and newline following marker.
1305
1306                                 # 4: Content
1307                                 (
1308                                         (?>
1309                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1310                                                 .*\n+
1311                                         )+
1312                                 )
1313
1314                                 # Closing marker.
1315                                 \1 [ ]* (?= \n )
1316                         }xm',
1317                         array($this, '_doFencedCodeBlocks_callback'), $text);
1318
1319                 return $text;
1320         }
1321         protected function _doFencedCodeBlocks_callback($matches) {
1322                 $classname =& $matches[2];
1323                 $attrs     =& $matches[3];
1324                 $codeblock = $matches[4];
1325
1326                 if ($this->code_block_content_func) {
1327                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1328                 } else {
1329                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1330                 }
1331
1332                 $codeblock = preg_replace_callback('/^\n+/',
1333                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1334
1335                 $classes = array();
1336                 if ($classname != "") {
1337                         if ($classname{0} == '.')
1338                                 $classname = substr($classname, 1);
1339                         $classes[] = $this->code_class_prefix.$classname;
1340                 }
1341                 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1342                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1343                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1344                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1345
1346                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1347         }
1348         protected function _doFencedCodeBlocks_newlines($matches) {
1349                 return str_repeat("<br$this->empty_element_suffix",
1350                         strlen($matches[0]));
1351         }
1352
1353
1354         #
1355         # Redefining emphasis markers so that emphasis by underscore does not
1356         # work in the middle of a word.
1357         #
1358         protected $em_relist = array(
1359                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1360                 '*' => '(?<![\s*])\*(?!\*)',
1361                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1362                 );
1363         protected $strong_relist = array(
1364                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1365                 '**' => '(?<![\s*])\*\*(?!\*)',
1366                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1367                 );
1368         protected $em_strong_relist = array(
1369                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1370                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1371                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1372                 );
1373
1374
1375         protected function formParagraphs($text) {
1376         #
1377         #       Params:
1378         #               $text - string to process with html <p> tags
1379         #
1380                 # Strip leading and trailing lines:
1381                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1382
1383                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1384
1385                 #
1386                 # Wrap <p> tags and unhashify HTML blocks
1387                 #
1388                 foreach ($grafs as $key => $value) {
1389                         $value = trim($this->runSpanGamut($value));
1390
1391                         # Check if this should be enclosed in a paragraph.
1392                         # Clean tag hashes & block tag hashes are left alone.
1393                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1394
1395                         if ($is_p) {
1396                                 $value = "<p>$value</p>";
1397                         }
1398                         $grafs[$key] = $value;
1399                 }
1400
1401                 # Join grafs in one text, then unhash HTML tags.
1402                 $text = implode("\n\n", $grafs);
1403
1404                 # Finish by removing any tag hashes still present in $text.
1405                 $text = $this->unhash($text);
1406
1407                 return $text;
1408         }
1409
1410
1411         ### Footnotes
1412
1413         protected function stripFootnotes($text) {
1414         #
1415         # Strips link definitions from text, stores the URLs and titles in
1416         # hash references.
1417         #
1418                 $less_than_tab = $this->tab_width - 1;
1419
1420                 # Link defs are in the form: [^id]: url "optional title"
1421                 $text = preg_replace_callback('{
1422                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
1423                           [ ]*
1424                           \n?                                   # maybe *one* newline
1425                         (                                               # text = $2 (no blank lines allowed)
1426                                 (?:
1427                                         .+                              # actual text
1428                                 |
1429                                         \n                              # newlines but
1430                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1431                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1432                                                                         # by non-indented content
1433                                 )*
1434                         )
1435                         }xm',
1436                         array($this, '_stripFootnotes_callback'),
1437                         $text);
1438                 return $text;
1439         }
1440         protected function _stripFootnotes_callback($matches) {
1441                 $note_id = $this->fn_id_prefix . $matches[1];
1442                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1443                 return ''; # String that will replace the block
1444         }
1445
1446
1447         protected function doFootnotes($text) {
1448         #
1449         # Replace footnote references in $text [^id] with a special text-token
1450         # which will be replaced by the actual footnote marker in appendFootnotes.
1451         #
1452                 if (!$this->in_anchor) {
1453                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1454                 }
1455                 return $text;
1456         }
1457
1458
1459         protected function appendFootnotes($text) {
1460         #
1461         # Append footnote list to text.
1462         #
1463                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1464                         array($this, '_appendFootnotes_callback'), $text);
1465
1466                 if (!empty($this->footnotes_ordered)) {
1467                         $text .= "\n\n";
1468                         $text .= "<div class=\"footnotes\">\n";
1469                         $text .= "<hr". $this->empty_element_suffix ."\n";
1470                         $text .= "<ol>\n\n";
1471
1472                         $attr = "";
1473                         if ($this->fn_backlink_class != "") {
1474                                 $class = $this->fn_backlink_class;
1475                                 $class = $this->encodeAttribute($class);
1476                                 $attr .= " class=\"$class\"";
1477                         }
1478                         if ($this->fn_backlink_title != "") {
1479                                 $title = $this->fn_backlink_title;
1480                                 $title = $this->encodeAttribute($title);
1481                                 $attr .= " title=\"$title\"";
1482                         }
1483                         $backlink_text = $this->fn_backlink_html;
1484                         $num = 0;
1485
1486                         while (!empty($this->footnotes_ordered)) {
1487                                 $footnote = reset($this->footnotes_ordered);
1488                                 $note_id = key($this->footnotes_ordered);
1489                                 unset($this->footnotes_ordered[$note_id]);
1490                                 $ref_count = $this->footnotes_ref_count[$note_id];
1491                                 unset($this->footnotes_ref_count[$note_id]);
1492                                 unset($this->footnotes[$note_id]);
1493
1494                                 $footnote .= "\n"; # Need to append newline before parsing.
1495                                 $footnote = $this->runBlockGamut("$footnote\n");
1496                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1497                                         array($this, '_appendFootnotes_callback'), $footnote);
1498
1499                                 $attr = str_replace("%%", ++$num, $attr);
1500                                 $note_id = $this->encodeAttribute($note_id);
1501
1502                                 # Prepare backlink, multiple backlinks if multiple references
1503                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1504                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1505                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1506                                 }
1507                                 # Add backlink to last paragraph; create new paragraph if needed.
1508                                 if (preg_match('{</p>$}', $footnote)) {
1509                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1510                                 } else {
1511                                         $footnote .= "\n\n<p>$backlink</p>";
1512                                 }
1513
1514                                 $text .= "<li id=\"fn:$note_id\">\n";
1515                                 $text .= $footnote . "\n";
1516                                 $text .= "</li>\n\n";
1517                         }
1518
1519                         $text .= "</ol>\n";
1520                         $text .= "</div>";
1521                 }
1522                 return $text;
1523         }
1524         protected function _appendFootnotes_callback($matches) {
1525                 $node_id = $this->fn_id_prefix . $matches[1];
1526
1527                 # Create footnote marker only if it has a corresponding footnote *and*
1528                 # the footnote hasn't been used by another marker.
1529                 if (isset($this->footnotes[$node_id])) {
1530                         $num =& $this->footnotes_numbers[$node_id];
1531                         if (!isset($num)) {
1532                                 # Transfer footnote content to the ordered list and give it its
1533                                 # number
1534                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1535                                 $this->footnotes_ref_count[$node_id] = 1;
1536                                 $num = $this->footnote_counter++;
1537                                 $ref_count_mark = '';
1538                         } else {
1539                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1540                         }
1541
1542                         $attr = "";
1543                         if ($this->fn_link_class != "") {
1544                                 $class = $this->fn_link_class;
1545                                 $class = $this->encodeAttribute($class);
1546                                 $attr .= " class=\"$class\"";
1547                         }
1548                         if ($this->fn_link_title != "") {
1549                                 $title = $this->fn_link_title;
1550                                 $title = $this->encodeAttribute($title);
1551                                 $attr .= " title=\"$title\"";
1552                         }
1553
1554                         $attr = str_replace("%%", $num, $attr);
1555                         $node_id = $this->encodeAttribute($node_id);
1556
1557                         return
1558                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1559                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1560                                 "</sup>";
1561                 }
1562
1563                 return "[^".$matches[1]."]";
1564         }
1565
1566
1567         ### Abbreviations ###
1568
1569         protected function stripAbbreviations($text) {
1570         #
1571         # Strips abbreviations from text, stores titles in hash references.
1572         #
1573                 $less_than_tab = $this->tab_width - 1;
1574
1575                 # Link defs are in the form: [id]*: url "optional title"
1576                 $text = preg_replace_callback('{
1577                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
1578                         (.*)                                    # text = $2 (no blank lines allowed)
1579                         }xm',
1580                         array($this, '_stripAbbreviations_callback'),
1581                         $text);
1582                 return $text;
1583         }
1584         protected function _stripAbbreviations_callback($matches) {
1585                 $abbr_word = $matches[1];
1586                 $abbr_desc = $matches[2];
1587                 if ($this->abbr_word_re)
1588                         $this->abbr_word_re .= '|';
1589                 $this->abbr_word_re .= preg_quote($abbr_word);
1590                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1591                 return ''; # String that will replace the block
1592         }
1593
1594
1595         protected function doAbbreviations($text) {
1596         #
1597         # Find defined abbreviations in text and wrap them in <abbr> elements.
1598         #
1599                 if ($this->abbr_word_re) {
1600                         // cannot use the /x modifier because abbr_word_re may
1601                         // contain significant spaces:
1602                         $text = preg_replace_callback('{'.
1603                                 '(?<![\w\x1A])'.
1604                                 '(?:'.$this->abbr_word_re.')'.
1605                                 '(?![\w\x1A])'.
1606                                 '}',
1607                                 array($this, '_doAbbreviations_callback'), $text);
1608                 }
1609                 return $text;
1610         }
1611         protected function _doAbbreviations_callback($matches) {
1612                 $abbr = $matches[0];
1613                 if (isset($this->abbr_desciptions[$abbr])) {
1614                         $desc = $this->abbr_desciptions[$abbr];
1615                         if (empty($desc)) {
1616                                 return $this->hashPart("<abbr>$abbr</abbr>");
1617                         } else {
1618                                 $desc = $this->encodeAttribute($desc);
1619                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1620                         }
1621                 } else {
1622                         return $matches[0];
1623                 }
1624         }
1625 }