3 # Markdown Extra - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2015 Michel Fortin
7 # <https://michelf.ca/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <https://daringfireball.net/projects/markdown/>
17 # Markdown Extra Parser Class
20 class MarkdownExtra extends \Michelf\Markdown {
22 ### Configuration Variables ###
24 # Prefix for footnote ids.
25 public $fn_id_prefix = "";
27 # Optional title attribute for footnote links and backlinks.
28 public $fn_link_title = "";
29 public $fn_backlink_title = "";
31 # Optional class attribute for footnote links and backlinks.
32 public $fn_link_class = "footnote-ref";
33 public $fn_backlink_class = "footnote-backref";
35 # Content to be displayed within footnote backlinks. The default is '↩';
36 # the U+FE0E on the end is a Unicode variant selector used to prevent iOS
37 # from displaying the arrow character as an emoji.
38 public $fn_backlink_html = '↩︎';
40 # Class name for table cell alignment (%% replaced left/center/right)
41 # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
42 # If empty, the align attribute is used instead of a class name.
43 public $table_align_class_tmpl = '';
45 # Optional class prefix for fenced code block.
46 public $code_class_prefix = "";
47 # Class attribute for code blocks goes on the `code` tag;
48 # setting this to true will put attributes on the `pre` tag instead.
49 public $code_attr_on_pre = false;
51 # Predefined abbreviations.
52 public $predef_abbr = array();
54 ### Parser Implementation ###
56 public function __construct() {
58 # Constructor function. Initialize the parser object.
60 # Add extra escapable characters before parent constructor
61 # initialize the table.
62 $this->escape_chars .= ':|';
64 # Insert extra document, block, and span transformations.
65 # Parent constructor will do the sorting.
66 $this->document_gamut += array(
67 "doFencedCodeBlocks" => 5,
68 "stripFootnotes" => 15,
69 "stripAbbreviations" => 25,
70 "appendFootnotes" => 50,
72 $this->block_gamut += array(
73 "doFencedCodeBlocks" => 5,
77 $this->span_gamut += array(
79 "doAbbreviations" => 70,
82 $this->enhanced_ordered_list = true;
83 parent::__construct();
87 # Extra variables used during extra transformations.
88 protected $footnotes = array();
89 protected $footnotes_ordered = array();
90 protected $footnotes_ref_count = array();
91 protected $footnotes_numbers = array();
92 protected $abbr_desciptions = array();
93 protected $abbr_word_re = '';
95 # Give the current footnote number.
96 protected $footnote_counter = 1;
99 protected function setup() {
101 # Setting up Extra-specific variables.
105 $this->footnotes = array();
106 $this->footnotes_ordered = array();
107 $this->footnotes_ref_count = array();
108 $this->footnotes_numbers = array();
109 $this->abbr_desciptions = array();
110 $this->abbr_word_re = '';
111 $this->footnote_counter = 1;
113 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
114 if ($this->abbr_word_re)
115 $this->abbr_word_re .= '|';
116 $this->abbr_word_re .= preg_quote($abbr_word);
117 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
121 protected function teardown() {
123 # Clearing Extra-specific variables.
125 $this->footnotes = array();
126 $this->footnotes_ordered = array();
127 $this->footnotes_ref_count = array();
128 $this->footnotes_numbers = array();
129 $this->abbr_desciptions = array();
130 $this->abbr_word_re = '';
136 ### Extra Attribute Parser ###
138 # Expression to use to catch attributes (includes the braces)
139 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
140 # Expression to use when parsing in a context when no capture is desired
141 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
143 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
145 # Parse attributes caught by the $this->id_class_attr_catch_re expression
146 # and return the HTML-formatted list of attributes.
148 # Currently supported attributes are .class and #id.
150 # In addition, this method also supports supplying a default Id value,
151 # which will be used to populate the id attribute in case it was not
153 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
155 # Split on components
156 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
157 $elements = $matches[0];
159 # handle classes and ids (only first id taken into account)
160 $attributes = array();
162 foreach ($elements as $element) {
163 if ($element{0} == '.') {
164 $classes[] = substr($element, 1);
165 } else if ($element{0} == '#') {
166 if ($id === false) $id = substr($element, 1);
167 } else if (strpos($element, '=') > 0) {
168 $parts = explode('=', $element, 2);
169 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
173 if (!$id) $id = $defaultIdValue;
175 # compose attributes as string
178 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
180 if (!empty($classes)) {
181 $attr_str .= ' class="'. implode(" ", $classes) . '"';
183 if (!$this->no_markup && !empty($attributes)) {
184 $attr_str .= ' '.implode(" ", $attributes);
190 protected function stripLinkDefinitions($text) {
192 # Strips link definitions from text, stores the URLs and titles in
195 $less_than_tab = $this->tab_width - 1;
197 # Link defs are in the form: ^[id]: url "optional title"
198 $text = preg_replace_callback('{
199 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
201 \n? # maybe *one* newline
209 \n? # maybe one newline
212 (?<=\s) # lookbehind for whitespace
217 )? # title is optional
218 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
221 array($this, '_stripLinkDefinitions_callback'),
225 protected function _stripLinkDefinitions_callback($matches) {
226 $link_id = strtolower($matches[1]);
227 $url = $matches[2] == '' ? $matches[3] : $matches[2];
228 $this->urls[$link_id] = $url;
229 $this->titles[$link_id] =& $matches[4];
230 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
231 return ''; # String that will replace the block
235 ### HTML Block Parser ###
237 # Tags that are always treated as block tags:
238 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
240 # Tags treated as block tags only if the opening tag is alone on its line:
241 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
243 # Tags where markdown="1" default to span mode:
244 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
246 # Tags which must not have their contents modified, no matter where
248 protected $clean_tags_re = 'script|style|math|svg';
250 # Tags that do not need to be closed.
251 protected $auto_close_tags_re = 'hr|img|param|source|track';
254 protected function hashHTMLBlocks($text) {
256 # Hashify HTML Blocks and "clean tags".
258 # We only want to do this for block-level HTML tags, such as headers,
259 # lists, and tables. That's because we still want to wrap <p>s around
260 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
261 # phrase emphasis, and spans. The list of tags we're looking for is
264 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
265 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
266 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
267 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
268 # These two functions are calling each other. It's recursive!
270 if ($this->no_markup) return $text;
273 # Call the HTML-in-Markdown hasher.
275 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
279 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
280 $enclosing_tag_re = '', $span = false)
283 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
285 # * $indent is the number of space to be ignored when checking for code
286 # blocks. This is important because if we don't take the indent into
287 # account, something like this (which looks right) won't work as expected:
291 # Hello World. <-- Is this a Markdown code block or text?
292 # </div> <-- Is this a Markdown code block or a real tag?
295 # If you don't like this, just don't indent the tag on which
296 # you apply the markdown="1" attribute.
298 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
299 # tag with that name. Nested tags supported.
301 # * If $span is true, text inside must treated as span. So any double
302 # newline will be replaced by a single newline so that it does not create
305 # Returns an array of that form: ( processed text , remaining text )
307 if ($text === '') return array('', '');
309 # Regex to check for the presense of newlines around a block tag.
310 $newline_before_re = '/(?:^\n?|\n\n)*$/';
313 ^ # Start of text following the tag.
314 (?>[ ]*<!--.*?-->)? # Optional comment.
315 [ ]*\n # Must be followed by newline.
318 # Regex to match any tag.
321 ( # $2: Capture whole tag.
322 </? # Any opening or closing tag.
324 '.$this->block_tags_re.' |
325 '.$this->context_block_tags_re.' |
326 '.$this->clean_tags_re.' |
327 (?!\s)'.$enclosing_tag_re.'
330 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
332 ".*?" | # Double quotes (can contain `>`)
333 \'.*?\' | # Single quotes (can contain `>`)
334 .+? # Anything but quotes and `>`.
339 <!-- .*? --> # HTML Comment
341 <\?.*?\?> | <%.*?%> # Processing instruction
343 <!\[CDATA\[.*?\]\]> # CData Block
344 '. ( !$span ? ' # If not in span.
346 # Indented code block
347 (?: ^[ ]*\n | ^ | \n[ ]*\n )
348 [ ]{'.($indent+4).'}[^\n]* \n
350 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
353 # Fenced code block marker
355 [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
357 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
359 (?: '.$this->id_class_attr_nocatch_re.' )? # extra attributes
362 ' : '' ). ' # End (if not is span).
365 # Note, this regex needs to go after backtick fenced
366 # code blocks but it should also be kept outside of the
367 # "if not in span" condition adding backticks to the parser
373 $depth = 0; # Current depth inside the tag tree.
374 $parsed = ""; # Parsed text that will be returned.
377 # Loop through every tag until we find the closing tag of the parent
378 # or loop until reaching the end of text if no parent tag specified.
382 # Split the text using the first $tag_match pattern found.
383 # Text before pattern will be first in the array, text after
384 # pattern will be at the end, and between will be any catches made
387 $parts = preg_split($block_tag_re, $text, 2,
388 PREG_SPLIT_DELIM_CAPTURE);
390 # If in Markdown span mode, add a empty-string span-level hash
391 # after each newline to prevent triggering any block element.
393 $void = $this->hashPart("", ':');
394 $newline = "$void\n";
395 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
398 $parsed .= $parts[0]; # Text before current tag.
400 # If end of $text has been reached. Stop loop.
401 if (count($parts) < 3) {
406 $tag = $parts[1]; # Tag to handle.
407 $text = $parts[2]; # Remaining text after current tag.
408 $tag_re = preg_quote($tag); # For use in a regular expression.
411 # Check for: Fenced code block marker.
412 # Note: need to recheck the whole tag to disambiguate backtick
413 # fences from code spans
415 if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
416 # Fenced code block marker: find matching end marker.
417 $fence_indent = strlen($capture[1]); # use captured indent in re
418 $fence_re = $capture[2]; # use captured fence in re
419 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
422 # End marker found: pass text unchanged until marker.
423 $parsed .= $tag . $matches[0];
424 $text = substr($text, strlen($matches[0]));
427 # No end marker: just skip it.
432 # Check for: Indented code block.
434 else if ($tag{0} == "\n" || $tag{0} == " ") {
435 # Indented code block: pass it unchanged, will be handled
440 # Check for: Code span marker
441 # Note: need to check this after backtick fenced code blocks
443 else if ($tag{0} == "`") {
444 # Find corresponding end marker.
445 $tag_re = preg_quote($tag);
446 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
449 # End marker found: pass text unchanged until marker.
450 $parsed .= $tag . $matches[0];
451 $text = substr($text, strlen($matches[0]));
454 # Unmatched marker: just skip it.
459 # Check for: Opening Block level tag or
460 # Opening Context Block tag (like ins and del)
461 # used as a block tag (tag is alone on it's line).
463 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
464 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
465 preg_match($newline_before_re, $parsed) &&
466 preg_match($newline_after_re, $text) )
469 # Need to parse tag and following text using the HTML parser.
470 list($block_text, $text) =
471 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
473 # Make sure it stays outside of any paragraph by adding newlines.
474 $parsed .= "\n\n$block_text\n\n";
477 # Check for: Clean tag (like script, math)
478 # HTML Comments, processing instructions.
480 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
481 $tag{1} == '!' || $tag{1} == '?')
483 # Need to parse tag and following text using the HTML parser.
484 # (don't check for markdown attribute)
485 list($block_text, $text) =
486 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
488 $parsed .= $block_text;
491 # Check for: Tag with same name as enclosing tag.
493 else if ($enclosing_tag_re !== '' &&
494 # Same name as enclosing tag.
495 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
498 # Increase/decrease nested tag count.
500 if ($tag{1} == '/') $depth--;
501 else if ($tag{strlen($tag)-2} != '/') $depth++;
505 # Going out of parent element. Clean up and break so we
506 # return to the calling function.
508 $text = $tag . $text;
517 } while ($depth >= 0);
519 return array($parsed, $text);
521 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
523 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
525 # * Calls $hash_method to convert any blocks.
526 # * Stops when the first opening tag closes.
527 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
528 # (it is not inside clean tags)
530 # Returns an array of that form: ( processed text , remaining text )
532 if ($text === '') return array('', '');
534 # Regex to match `markdown` attribute inside of a tag.
535 $markdown_attr_re = '
537 \s* # Eat whitespace before the `markdown` attribute
541 (["\']) # $1: quote delimiter
542 (.*?) # $2: attribute value
543 \1 # matching delimiter
545 ([^\s>]*) # $3: unquoted attribute value
547 () # $4: make $3 always defined (avoid warnings)
550 # Regex to match any tag.
552 ( # $2: Capture whole tag.
553 </? # Any opening or closing tag.
556 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
558 ".*?" | # Double quotes (can contain `>`)
559 \'.*?\' | # Single quotes (can contain `>`)
560 .+? # Anything but quotes and `>`.
565 <!-- .*? --> # HTML Comment
567 <\?.*?\?> | <%.*?%> # Processing instruction
569 <!\[CDATA\[.*?\]\]> # CData Block
573 $original_text = $text; # Save original text in case of faliure.
575 $depth = 0; # Current depth inside the tag tree.
576 $block_text = ""; # Temporary text holder for current text.
577 $parsed = ""; # Parsed text that will be returned.
580 # Get the name of the starting tag.
581 # (This pattern makes $base_tag_name_re safe without quoting.)
583 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
584 $base_tag_name_re = $matches[1];
587 # Loop through every tag until we find the corresponding closing tag.
591 # Split the text using the first $tag_match pattern found.
592 # Text before pattern will be first in the array, text after
593 # pattern will be at the end, and between will be any catches made
596 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
598 if (count($parts) < 3) {
600 # End of $text reached with unbalenced tag(s).
601 # In that case, we return original text unchanged and pass the
602 # first character as filtered to prevent an infinite loop in the
605 return array($original_text{0}, substr($original_text, 1));
608 $block_text .= $parts[0]; # Text before current tag.
609 $tag = $parts[1]; # Tag to handle.
610 $text = $parts[2]; # Remaining text after current tag.
613 # Check for: Auto-close tag (like <hr/>)
614 # Comments and Processing Instructions.
616 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
617 $tag{1} == '!' || $tag{1} == '?')
619 # Just add the tag to the block as if it was text.
624 # Increase/decrease nested tag count. Only do so if
625 # the tag's name match base tag's.
627 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
628 if ($tag{1} == '/') $depth--;
629 else if ($tag{strlen($tag)-2} != '/') $depth++;
633 # Check for `markdown="1"` attribute and handle it.
636 preg_match($markdown_attr_re, $tag, $attr_m) &&
637 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
639 # Remove `markdown` attribute from opening tag.
640 $tag = preg_replace($markdown_attr_re, '', $tag);
642 # Check if text inside this tag must be parsed in span mode.
643 $this->mode = $attr_m[2] . $attr_m[3];
644 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
645 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
647 # Calculate indent before tag.
648 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
649 $strlen = $this->utf8_strlen;
650 $indent = $strlen($matches[1], 'UTF-8');
655 # End preceding block with this tag.
657 $parsed .= $this->$hash_method($block_text);
659 # Get enclosing tag name for the ParseMarkdown function.
660 # (This pattern makes $tag_name_re safe without quoting.)
661 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
662 $tag_name_re = $matches[1];
664 # Parse the content using the HTML-in-Markdown parser.
665 list ($block_text, $text)
666 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
667 $tag_name_re, $span_mode);
669 # Outdent markdown text.
671 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
675 # Append tag content to parsed text.
676 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
677 else $parsed .= "$block_text";
679 # Start over with a new block.
682 else $block_text .= $tag;
685 } while ($depth > 0);
688 # Hash last block text that wasn't processed inside the loop.
690 $parsed .= $this->$hash_method($block_text);
692 return array($parsed, $text);
696 protected function hashClean($text) {
698 # Called whenever a tag must be hashed when a function inserts a "clean" tag
699 # in $text, it passes through this function and is automaticaly escaped,
700 # blocking invalid nested overlap.
702 return $this->hashPart($text, 'C');
706 protected function doAnchors($text) {
708 # Turn Markdown link shortcuts into XHTML <a> tags.
710 if ($this->in_anchor) return $text;
711 $this->in_anchor = true;
714 # First, handle reference-style links: [link text] [id]
716 $text = preg_replace_callback('{
717 ( # wrap whole match in $1
719 ('.$this->nested_brackets_re.') # link text = $2
722 [ ]? # one optional space
723 (?:\n[ ]*)? # one optional newline followed by spaces
730 array($this, '_doAnchors_reference_callback'), $text);
733 # Next, inline-style links: [link text](url "optional title")
735 $text = preg_replace_callback('{
736 ( # wrap whole match in $1
738 ('.$this->nested_brackets_re.') # link text = $2
745 ('.$this->nested_url_parenthesis_re.') # href = $4
749 ([\'"]) # quote char = $6
752 [ \n]* # ignore any spaces/tabs between closing quote and )
753 )? # title is optional
755 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
758 array($this, '_doAnchors_inline_callback'), $text);
761 # Last, handle reference-style shortcuts: [link text]
762 # These must come last in case you've also got [link text][1]
763 # or [link text](/foo)
765 $text = preg_replace_callback('{
766 ( # wrap whole match in $1
768 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
772 array($this, '_doAnchors_reference_callback'), $text);
774 $this->in_anchor = false;
777 protected function _doAnchors_reference_callback($matches) {
778 $whole_match = $matches[1];
779 $link_text = $matches[2];
780 $link_id =& $matches[3];
782 if ($link_id == "") {
783 # for shortcut links like [this][] or [this].
784 $link_id = $link_text;
787 # lower-case and turn embedded newlines into spaces
788 $link_id = strtolower($link_id);
789 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
791 if (isset($this->urls[$link_id])) {
792 $url = $this->urls[$link_id];
793 $url = $this->encodeURLAttribute($url);
795 $result = "<a href=\"$url\"";
796 if ( isset( $this->titles[$link_id] ) ) {
797 $title = $this->titles[$link_id];
798 $title = $this->encodeAttribute($title);
799 $result .= " title=\"$title\"";
801 if (isset($this->ref_attr[$link_id]))
802 $result .= $this->ref_attr[$link_id];
804 $link_text = $this->runSpanGamut($link_text);
805 $result .= ">$link_text</a>";
806 $result = $this->hashPart($result);
809 $result = $whole_match;
813 protected function _doAnchors_inline_callback($matches) {
814 $whole_match = $matches[1];
815 $link_text = $this->runSpanGamut($matches[2]);
816 $url = $matches[3] == '' ? $matches[4] : $matches[3];
817 $title =& $matches[7];
818 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
820 // if the URL was of the form <s p a c e s> it got caught by the HTML
821 // tag parser and hashed. Need to reverse the process before using the URL.
822 $unhashed = $this->unhash($url);
823 if ($unhashed != $url)
824 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
826 $url = $this->encodeURLAttribute($url);
828 $result = "<a href=\"$url\"";
830 $title = $this->encodeAttribute($title);
831 $result .= " title=\"$title\"";
835 $link_text = $this->runSpanGamut($link_text);
836 $result .= ">$link_text</a>";
838 return $this->hashPart($result);
842 protected function doImages($text) {
844 # Turn Markdown image shortcuts into <img> tags.
847 # First, handle reference-style labeled images: ![alt text][id]
849 $text = preg_replace_callback('{
850 ( # wrap whole match in $1
852 ('.$this->nested_brackets_re.') # alt text = $2
855 [ ]? # one optional space
856 (?:\n[ ]*)? # one optional newline followed by spaces
864 array($this, '_doImages_reference_callback'), $text);
867 # Next, handle inline images: ![alt text](url "optional title")
868 # Don't forget: encode * and _
870 $text = preg_replace_callback('{
871 ( # wrap whole match in $1
873 ('.$this->nested_brackets_re.') # alt text = $2
875 \s? # One optional whitespace character
879 <(\S*)> # src url = $3
881 ('.$this->nested_url_parenthesis_re.') # src url = $4
885 ([\'"]) # quote char = $6
889 )? # title is optional
891 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
894 array($this, '_doImages_inline_callback'), $text);
898 protected function _doImages_reference_callback($matches) {
899 $whole_match = $matches[1];
900 $alt_text = $matches[2];
901 $link_id = strtolower($matches[3]);
903 if ($link_id == "") {
904 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
907 $alt_text = $this->encodeAttribute($alt_text);
908 if (isset($this->urls[$link_id])) {
909 $url = $this->encodeURLAttribute($this->urls[$link_id]);
910 $result = "<img src=\"$url\" alt=\"$alt_text\"";
911 if (isset($this->titles[$link_id])) {
912 $title = $this->titles[$link_id];
913 $title = $this->encodeAttribute($title);
914 $result .= " title=\"$title\"";
916 if (isset($this->ref_attr[$link_id]))
917 $result .= $this->ref_attr[$link_id];
918 $result .= $this->empty_element_suffix;
919 $result = $this->hashPart($result);
922 # If there's no such link ID, leave intact:
923 $result = $whole_match;
928 protected function _doImages_inline_callback($matches) {
929 $whole_match = $matches[1];
930 $alt_text = $matches[2];
931 $url = $matches[3] == '' ? $matches[4] : $matches[3];
932 $title =& $matches[7];
933 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
935 $alt_text = $this->encodeAttribute($alt_text);
936 $url = $this->encodeURLAttribute($url);
937 $result = "<img src=\"$url\" alt=\"$alt_text\"";
939 $title = $this->encodeAttribute($title);
940 $result .= " title=\"$title\""; # $title already quoted
943 $result .= $this->empty_element_suffix;
945 return $this->hashPart($result);
949 protected function doHeaders($text) {
951 # Redefined to add id and class attribute support.
953 # Setext-style headers:
954 # Header 1 {#header1}
957 # Header 2 {#header2 .class1 .class2}
960 $text = preg_replace_callback(
962 (^.+?) # $1: Header text
963 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
964 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
966 array($this, '_doHeaders_callback_setext'), $text);
969 # # Header 1 {#header1}
970 # ## Header 2 {#header2}
971 # ## Header 2 with closing hashes ## {#header3.class1.class2}
973 # ###### Header 6 {.class2}
975 $text = preg_replace_callback('{
976 ^(\#{1,6}) # $1 = string of #\'s
978 (.+?) # $2 = Header text
980 \#* # optional closing #\'s (not counted)
981 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
985 array($this, '_doHeaders_callback_atx'), $text);
989 protected function _doHeaders_callback_setext($matches) {
990 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
993 $level = $matches[3]{0} == '=' ? 1 : 2;
995 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
997 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
998 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
999 return "\n" . $this->hashBlock($block) . "\n\n";
1001 protected function _doHeaders_callback_atx($matches) {
1002 $level = strlen($matches[1]);
1004 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1005 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1006 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1007 return "\n" . $this->hashBlock($block) . "\n\n";
1011 protected function doTables($text) {
1015 $less_than_tab = $this->tab_width - 1;
1017 # Find tables with leading pipe.
1019 # | Header 1 | Header 2
1020 # | -------- | --------
1024 $text = preg_replace_callback('
1027 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1028 [|] # Optional leading pipe (present)
1029 (.+) \n # $1: Header row (at least one pipe)
1031 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1032 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
1036 [ ]* # Allowed whitespace.
1037 [|] .* \n # Row content.
1040 (?=\n|\Z) # Stop at final double newline.
1042 array($this, '_doTable_leadingPipe_callback'), $text);
1045 # Find tables without leading pipe.
1047 # Header 1 | Header 2
1048 # -------- | --------
1052 $text = preg_replace_callback('
1055 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1056 (\S.*[|].*) \n # $1: Header row (at least one pipe)
1058 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1059 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
1063 .* [|] .* \n # Row content
1066 (?=\n|\Z) # Stop at final double newline.
1068 array($this, '_DoTable_callback'), $text);
1072 protected function _doTable_leadingPipe_callback($matches) {
1073 $head = $matches[1];
1074 $underline = $matches[2];
1075 $content = $matches[3];
1077 # Remove leading pipe for each row.
1078 $content = preg_replace('/^ *[|]/m', '', $content);
1080 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1082 protected function _doTable_makeAlignAttr($alignname)
1084 if (empty($this->table_align_class_tmpl))
1085 return " align=\"$alignname\"";
1087 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1088 return " class=\"$classname\"";
1090 protected function _doTable_callback($matches) {
1091 $head = $matches[1];
1092 $underline = $matches[2];
1093 $content = $matches[3];
1095 # Remove any tailing pipes for each line.
1096 $head = preg_replace('/[|] *$/m', '', $head);
1097 $underline = preg_replace('/[|] *$/m', '', $underline);
1098 $content = preg_replace('/[|] *$/m', '', $content);
1100 # Reading alignement from header underline.
1101 $separators = preg_split('/ *[|] */', $underline);
1102 foreach ($separators as $n => $s) {
1103 if (preg_match('/^ *-+: *$/', $s))
1104 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1105 else if (preg_match('/^ *:-+: *$/', $s))
1106 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1107 else if (preg_match('/^ *:-+ *$/', $s))
1108 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1113 # Parsing span elements, including code spans, character escapes,
1114 # and inline HTML tags, so that pipes inside those gets ignored.
1115 $head = $this->parseSpan($head);
1116 $headers = preg_split('/ *[|] */', $head);
1117 $col_count = count($headers);
1118 $attr = array_pad($attr, $col_count, '');
1120 # Write column headers.
1121 $text = "<table>\n";
1122 $text .= "<thead>\n";
1124 foreach ($headers as $n => $header)
1125 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1127 $text .= "</thead>\n";
1129 # Split content by row.
1130 $rows = explode("\n", trim($content, "\n"));
1132 $text .= "<tbody>\n";
1133 foreach ($rows as $row) {
1134 # Parsing span elements, including code spans, character escapes,
1135 # and inline HTML tags, so that pipes inside those gets ignored.
1136 $row = $this->parseSpan($row);
1138 # Split row by cell.
1139 $row_cells = preg_split('/ *[|] */', $row, $col_count);
1140 $row_cells = array_pad($row_cells, $col_count, '');
1143 foreach ($row_cells as $n => $cell)
1144 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1147 $text .= "</tbody>\n";
1148 $text .= "</table>";
1150 return $this->hashBlock($text) . "\n";
1154 protected function doDefLists($text) {
1156 # Form HTML definition lists.
1158 $less_than_tab = $this->tab_width - 1;
1160 # Re-usable pattern to match any entire dl list:
1161 $whole_list_re = '(?>
1164 [ ]{0,'.$less_than_tab.'}
1165 ((?>.*\S.*\n)+) # $3 = defined term
1167 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1175 (?! # Negative lookahead for another term
1176 [ ]{0,'.$less_than_tab.'}
1177 (?: \S.*\n )+? # defined term
1179 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1181 (?! # Negative lookahead for another definition
1182 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1188 $text = preg_replace_callback('{
1192 array($this, '_doDefLists_callback'), $text);
1196 protected function _doDefLists_callback($matches) {
1197 # Re-usable patterns to match list item bullets and number markers:
1198 $list = $matches[1];
1200 # Turn double returns into triple returns, so that we can make a
1201 # paragraph for the last item in a list, if necessary:
1202 $result = trim($this->processDefListItems($list));
1203 $result = "<dl>\n" . $result . "\n</dl>";
1204 return $this->hashBlock($result) . "\n\n";
1208 protected function processDefListItems($list_str) {
1210 # Process the contents of a single definition list, splitting it
1211 # into individual term and definition list items.
1213 $less_than_tab = $this->tab_width - 1;
1215 # trim trailing blank lines:
1216 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1218 # Process definition terms.
1219 $list_str = preg_replace_callback('{
1220 (?>\A\n?|\n\n+) # leading line
1221 ( # definition terms = $1
1222 [ ]{0,'.$less_than_tab.'} # leading whitespace
1223 (?!\:[ ]|[ ]) # negative lookahead for a definition
1224 # mark (colon) or more whitespace.
1225 (?> \S.* \n)+? # actual term (not whitespace).
1227 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
1228 # with a definition mark.
1230 array($this, '_processDefListItems_callback_dt'), $list_str);
1232 # Process actual definitions.
1233 $list_str = preg_replace_callback('{
1234 \n(\n+)? # leading line = $1
1235 ( # marker space = $2
1236 [ ]{0,'.$less_than_tab.'} # whitespace before colon
1237 \:[ ]+ # definition mark (colon)
1239 ((?s:.+?)) # definition text = $3
1240 (?= \n+ # stop at next definition mark,
1241 (?: # next term or end of text
1242 [ ]{0,'.$less_than_tab.'} \:[ ] |
1247 array($this, '_processDefListItems_callback_dd'), $list_str);
1251 protected function _processDefListItems_callback_dt($matches) {
1252 $terms = explode("\n", trim($matches[1]));
1254 foreach ($terms as $term) {
1255 $term = $this->runSpanGamut(trim($term));
1256 $text .= "\n<dt>" . $term . "</dt>";
1258 return $text . "\n";
1260 protected function _processDefListItems_callback_dd($matches) {
1261 $leading_line = $matches[1];
1262 $marker_space = $matches[2];
1265 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1266 # Replace marker with the appropriate whitespace indentation
1267 $def = str_repeat(' ', strlen($marker_space)) . $def;
1268 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1269 $def = "\n". $def ."\n";
1273 $def = $this->runSpanGamut($this->outdent($def));
1276 return "\n<dd>" . $def . "</dd>\n";
1280 protected function doFencedCodeBlocks($text) {
1282 # Adding the fenced code block syntax to regular Markdown:
1288 $less_than_tab = $this->tab_width;
1290 $text = preg_replace_callback('{
1294 (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1298 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1302 '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1304 [ ]* \n # Whitespace and newline following marker.
1309 (?!\1 [ ]* \n) # Not a closing marker.
1317 array($this, '_doFencedCodeBlocks_callback'), $text);
1321 protected function _doFencedCodeBlocks_callback($matches) {
1322 $classname =& $matches[2];
1323 $attrs =& $matches[3];
1324 $codeblock = $matches[4];
1326 if ($this->code_block_content_func) {
1327 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1329 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1332 $codeblock = preg_replace_callback('/^\n+/',
1333 array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1336 if ($classname != "") {
1337 if ($classname{0} == '.')
1338 $classname = substr($classname, 1);
1339 $classes[] = $this->code_class_prefix.$classname;
1341 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1342 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
1343 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1344 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1346 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1348 protected function _doFencedCodeBlocks_newlines($matches) {
1349 return str_repeat("<br$this->empty_element_suffix",
1350 strlen($matches[0]));
1355 # Redefining emphasis markers so that emphasis by underscore does not
1356 # work in the middle of a word.
1358 protected $em_relist = array(
1359 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1360 '*' => '(?<![\s*])\*(?!\*)',
1361 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1363 protected $strong_relist = array(
1364 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1365 '**' => '(?<![\s*])\*\*(?!\*)',
1366 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1368 protected $em_strong_relist = array(
1369 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1370 '***' => '(?<![\s*])\*\*\*(?!\*)',
1371 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1375 protected function formParagraphs($text) {
1378 # $text - string to process with html <p> tags
1380 # Strip leading and trailing lines:
1381 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1383 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1386 # Wrap <p> tags and unhashify HTML blocks
1388 foreach ($grafs as $key => $value) {
1389 $value = trim($this->runSpanGamut($value));
1391 # Check if this should be enclosed in a paragraph.
1392 # Clean tag hashes & block tag hashes are left alone.
1393 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1396 $value = "<p>$value</p>";
1398 $grafs[$key] = $value;
1401 # Join grafs in one text, then unhash HTML tags.
1402 $text = implode("\n\n", $grafs);
1404 # Finish by removing any tag hashes still present in $text.
1405 $text = $this->unhash($text);
1413 protected function stripFootnotes($text) {
1415 # Strips link definitions from text, stores the URLs and titles in
1418 $less_than_tab = $this->tab_width - 1;
1420 # Link defs are in the form: [^id]: url "optional title"
1421 $text = preg_replace_callback('{
1422 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
1424 \n? # maybe *one* newline
1425 ( # text = $2 (no blank lines allowed)
1430 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1431 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1432 # by non-indented content
1436 array($this, '_stripFootnotes_callback'),
1440 protected function _stripFootnotes_callback($matches) {
1441 $note_id = $this->fn_id_prefix . $matches[1];
1442 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1443 return ''; # String that will replace the block
1447 protected function doFootnotes($text) {
1449 # Replace footnote references in $text [^id] with a special text-token
1450 # which will be replaced by the actual footnote marker in appendFootnotes.
1452 if (!$this->in_anchor) {
1453 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1459 protected function appendFootnotes($text) {
1461 # Append footnote list to text.
1463 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1464 array($this, '_appendFootnotes_callback'), $text);
1466 if (!empty($this->footnotes_ordered)) {
1468 $text .= "<div class=\"footnotes\">\n";
1469 $text .= "<hr". $this->empty_element_suffix ."\n";
1470 $text .= "<ol>\n\n";
1473 if ($this->fn_backlink_class != "") {
1474 $class = $this->fn_backlink_class;
1475 $class = $this->encodeAttribute($class);
1476 $attr .= " class=\"$class\"";
1478 if ($this->fn_backlink_title != "") {
1479 $title = $this->fn_backlink_title;
1480 $title = $this->encodeAttribute($title);
1481 $attr .= " title=\"$title\"";
1483 $backlink_text = $this->fn_backlink_html;
1486 while (!empty($this->footnotes_ordered)) {
1487 $footnote = reset($this->footnotes_ordered);
1488 $note_id = key($this->footnotes_ordered);
1489 unset($this->footnotes_ordered[$note_id]);
1490 $ref_count = $this->footnotes_ref_count[$note_id];
1491 unset($this->footnotes_ref_count[$note_id]);
1492 unset($this->footnotes[$note_id]);
1494 $footnote .= "\n"; # Need to append newline before parsing.
1495 $footnote = $this->runBlockGamut("$footnote\n");
1496 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1497 array($this, '_appendFootnotes_callback'), $footnote);
1499 $attr = str_replace("%%", ++$num, $attr);
1500 $note_id = $this->encodeAttribute($note_id);
1502 # Prepare backlink, multiple backlinks if multiple references
1503 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1504 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1505 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1507 # Add backlink to last paragraph; create new paragraph if needed.
1508 if (preg_match('{</p>$}', $footnote)) {
1509 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
1511 $footnote .= "\n\n<p>$backlink</p>";
1514 $text .= "<li id=\"fn:$note_id\">\n";
1515 $text .= $footnote . "\n";
1516 $text .= "</li>\n\n";
1524 protected function _appendFootnotes_callback($matches) {
1525 $node_id = $this->fn_id_prefix . $matches[1];
1527 # Create footnote marker only if it has a corresponding footnote *and*
1528 # the footnote hasn't been used by another marker.
1529 if (isset($this->footnotes[$node_id])) {
1530 $num =& $this->footnotes_numbers[$node_id];
1532 # Transfer footnote content to the ordered list and give it its
1534 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1535 $this->footnotes_ref_count[$node_id] = 1;
1536 $num = $this->footnote_counter++;
1537 $ref_count_mark = '';
1539 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1543 if ($this->fn_link_class != "") {
1544 $class = $this->fn_link_class;
1545 $class = $this->encodeAttribute($class);
1546 $attr .= " class=\"$class\"";
1548 if ($this->fn_link_title != "") {
1549 $title = $this->fn_link_title;
1550 $title = $this->encodeAttribute($title);
1551 $attr .= " title=\"$title\"";
1554 $attr = str_replace("%%", $num, $attr);
1555 $node_id = $this->encodeAttribute($node_id);
1558 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1559 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1563 return "[^".$matches[1]."]";
1567 ### Abbreviations ###
1569 protected function stripAbbreviations($text) {
1571 # Strips abbreviations from text, stores titles in hash references.
1573 $less_than_tab = $this->tab_width - 1;
1575 # Link defs are in the form: [id]*: url "optional title"
1576 $text = preg_replace_callback('{
1577 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
1578 (.*) # text = $2 (no blank lines allowed)
1580 array($this, '_stripAbbreviations_callback'),
1584 protected function _stripAbbreviations_callback($matches) {
1585 $abbr_word = $matches[1];
1586 $abbr_desc = $matches[2];
1587 if ($this->abbr_word_re)
1588 $this->abbr_word_re .= '|';
1589 $this->abbr_word_re .= preg_quote($abbr_word);
1590 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1591 return ''; # String that will replace the block
1595 protected function doAbbreviations($text) {
1597 # Find defined abbreviations in text and wrap them in <abbr> elements.
1599 if ($this->abbr_word_re) {
1600 // cannot use the /x modifier because abbr_word_re may
1601 // contain significant spaces:
1602 $text = preg_replace_callback('{'.
1604 '(?:'.$this->abbr_word_re.')'.
1607 array($this, '_doAbbreviations_callback'), $text);
1611 protected function _doAbbreviations_callback($matches) {
1612 $abbr = $matches[0];
1613 if (isset($this->abbr_desciptions[$abbr])) {
1614 $desc = $this->abbr_desciptions[$abbr];
1616 return $this->hashPart("<abbr>$abbr</abbr>");
1618 $desc = $this->encodeAttribute($desc);
1619 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");