3 # Markdown - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2014 Michel Fortin
7 # <http://michelf.com/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
17 # Markdown Parser Class
20 class Markdown implements MarkdownInterface {
24 const MARKDOWNLIB_VERSION = "1.4.1";
26 ### Simple Function Interface ###
28 public static function defaultTransform($text) {
30 # Initialize the parser and return the result of its transform method.
31 # This will work fine for derived classes too.
33 # Take parser class on which this function was called.
34 $parser_class = \get_called_class();
36 # try to take parser from the static parser list
38 $parser =& $parser_list[$parser_class];
40 # create the parser it not already set
42 $parser = new $parser_class;
44 # Transform text using parser.
45 return $parser->transform($text);
48 ### Configuration Variables ###
50 # Change to ">" for HTML output.
51 public $empty_element_suffix = " />";
52 public $tab_width = 4;
54 # Change to `true` to disallow markup or entities.
55 public $no_markup = false;
56 public $no_entities = false;
58 # Predefined urls and titles for reference links and images.
59 public $predef_urls = array();
60 public $predef_titles = array();
62 # Optional filter function for URLs
63 public $url_filter_func = null;
66 ### Parser Implementation ###
68 # Regex to match balanced [brackets].
69 # Needed to insert a maximum bracked depth while converting to PHP.
70 protected $nested_brackets_depth = 6;
71 protected $nested_brackets_re;
73 protected $nested_url_parenthesis_depth = 4;
74 protected $nested_url_parenthesis_re;
76 # Table of hash values for escaped characters:
77 protected $escape_chars = '\`*_{}[]()>#+-.!';
78 protected $escape_chars_re;
81 public function __construct() {
83 # Constructor function. Initialize appropriate member variables.
86 $this->prepareItalicsAndBold();
88 $this->nested_brackets_re =
89 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
90 str_repeat('\])*', $this->nested_brackets_depth);
92 $this->nested_url_parenthesis_re =
93 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
94 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
96 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
98 # Sort document, block, and span gamut in ascendent priority order.
99 asort($this->document_gamut);
100 asort($this->block_gamut);
101 asort($this->span_gamut);
105 # Internal hashes used during transformation.
106 protected $urls = array();
107 protected $titles = array();
108 protected $html_hashes = array();
110 # Status flag to avoid invalid nesting.
111 protected $in_anchor = false;
114 protected function setup() {
116 # Called before the transformation process starts to setup parser
119 # Clear global hashes.
120 $this->urls = $this->predef_urls;
121 $this->titles = $this->predef_titles;
122 $this->html_hashes = array();
124 $this->in_anchor = false;
127 protected function teardown() {
129 # Called after the transformation process to clear any variable
130 # which may be taking up memory unnecessarly.
132 $this->urls = array();
133 $this->titles = array();
134 $this->html_hashes = array();
138 public function transform($text) {
140 # Main function. Performs some preprocessing on the input text
141 # and pass it through the document gamut.
145 # Remove UTF-8 BOM and marker character in input, if present.
146 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
148 # Standardize line endings:
149 # DOS to Unix and Mac to Unix
150 $text = preg_replace('{\r\n?}', "\n", $text);
152 # Make sure $text ends with a couple of newlines:
155 # Convert all tabs to spaces.
156 $text = $this->detab($text);
158 # Turn block-level HTML blocks into hash entries
159 $text = $this->hashHTMLBlocks($text);
161 # Strip any lines consisting only of spaces and tabs.
162 # This makes subsequent regexen easier to write, because we can
163 # match consecutive blank lines with /\n+/ instead of something
164 # contorted like /[ ]*\n+/ .
165 $text = preg_replace('/^[ ]+$/m', '', $text);
167 # Run document gamut methods.
168 foreach ($this->document_gamut as $method => $priority) {
169 $text = $this->$method($text);
177 protected $document_gamut = array(
178 # Strip link definitions, store in hashes.
179 "stripLinkDefinitions" => 20,
181 "runBasicBlockGamut" => 30,
185 protected function stripLinkDefinitions($text) {
187 # Strips link definitions from text, stores the URLs and titles in
190 $less_than_tab = $this->tab_width - 1;
192 # Link defs are in the form: ^[id]: url "optional title"
193 $text = preg_replace_callback('{
194 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
196 \n? # maybe *one* newline
204 \n? # maybe one newline
207 (?<=\s) # lookbehind for whitespace
212 )? # title is optional
215 array($this, '_stripLinkDefinitions_callback'),
219 protected function _stripLinkDefinitions_callback($matches) {
220 $link_id = strtolower($matches[1]);
221 $url = $matches[2] == '' ? $matches[3] : $matches[2];
222 $this->urls[$link_id] = $url;
223 $this->titles[$link_id] =& $matches[4];
224 return ''; # String that will replace the block
228 protected function hashHTMLBlocks($text) {
229 if ($this->no_markup) return $text;
231 $less_than_tab = $this->tab_width - 1;
233 # Hashify HTML blocks:
234 # We only want to do this for block-level HTML tags, such as headers,
235 # lists, and tables. That's because we still want to wrap <p>s around
236 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
237 # phrase emphasis, and spans. The list of tags we're looking for is
240 # * List "a" is made of tags which can be both inline or block-level.
241 # These will be treated block-level when the start tag is alone on
242 # its line, otherwise they're not matched here and will be taken as
244 # * List "b" is made of tags which are always block-level;
246 $block_tags_a_re = 'ins|del';
247 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
248 'script|noscript|style|form|fieldset|iframe|math|svg|'.
249 'article|section|nav|aside|hgroup|header|footer|'.
252 # Regular expression for the content of a block tag.
253 $nested_tags_level = 4;
255 (?> # optional tag attributes
256 \s # starts with whitespace
258 [^>"/]+ # text outside quotes
260 /+(?!>) # slash not followed by ">"
262 "[^"]*" # text inside double quotes (tolerate ">")
264 \'[^\']*\' # text inside single quotes (tolerate ">")
271 [^<]+ # content without tag
273 <\2 # nested opening tag
274 '.$attr.' # attributes
278 >', $nested_tags_level). # end of opening tag
279 '.*?'. # last level nested tag content
281 </\2\s*> # closing nested tag
284 <(?!/\2\s*> # other tags with a different name
288 $content2 = str_replace('\2', '\3', $content);
290 # First, look for nested blocks, e.g.:
293 # tags for inner block must be indented.
297 # The outermost tags must start at the left margin for this to match, and
298 # the inner nested divs must be indented.
299 # We need to do this before the next, more liberal match, because the next
300 # match will start at the first `<div>` and stop at the first `</div>`.
301 $text = preg_replace_callback('{(?>
303 (?<=\n) # Starting on its own line
305 \A\n? # the at beginning of the doc
309 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
312 [ ]{0,'.$less_than_tab.'}
313 <('.$block_tags_b_re.')# start tag = $2
314 '.$attr.'> # attributes followed by > and \n
315 '.$content.' # content, support nesting
316 </\2> # the matching end tag
317 [ ]* # trailing spaces/tabs
318 (?=\n+|\Z) # followed by a newline or end of document
320 | # Special version for tags of group a.
322 [ ]{0,'.$less_than_tab.'}
323 <('.$block_tags_a_re.')# start tag = $3
324 '.$attr.'>[ ]*\n # attributes followed by >
325 '.$content2.' # content, support nesting
326 </\3> # the matching end tag
327 [ ]* # trailing spaces/tabs
328 (?=\n+|\Z) # followed by a newline or end of document
330 | # Special case just for <hr />. It was easier to make a special
331 # case than to make the other regex more complicated.
333 [ ]{0,'.$less_than_tab.'}
334 <(hr) # start tag = $2
335 '.$attr.' # attributes
336 /?> # the matching end tag
338 (?=\n{2,}|\Z) # followed by a blank line or end of document
340 | # Special case for standalone HTML comments:
342 [ ]{0,'.$less_than_tab.'}
347 (?=\n{2,}|\Z) # followed by a blank line or end of document
349 | # PHP and ASP-style processor instructions (<? and <%)
351 [ ]{0,'.$less_than_tab.'}
358 (?=\n{2,}|\Z) # followed by a blank line or end of document
362 array($this, '_hashHTMLBlocks_callback'),
367 protected function _hashHTMLBlocks_callback($matches) {
369 $key = $this->hashBlock($text);
370 return "\n\n$key\n\n";
374 protected function hashPart($text, $boundary = 'X') {
376 # Called whenever a tag must be hashed when a function insert an atomic
377 # element in the text stream. Passing $text to through this function gives
378 # a unique text-token which will be reverted back when calling unhash.
380 # The $boundary argument specify what character should be used to surround
381 # the token. By convension, "B" is used for block elements that needs not
382 # to be wrapped into paragraph tags at the end, ":" is used for elements
383 # that are word separators and "X" is used in the general case.
385 # Swap back any tag hash found in $text so we do not have to `unhash`
386 # multiple times at the end.
387 $text = $this->unhash($text);
389 # Then hash the block.
391 $key = "$boundary\x1A" . ++$i . $boundary;
392 $this->html_hashes[$key] = $text;
393 return $key; # String that will replace the tag.
397 protected function hashBlock($text) {
399 # Shortcut function for hashPart with block-level boundaries.
401 return $this->hashPart($text, 'B');
405 protected $block_gamut = array(
407 # These are all the transformations that form block-level
408 # tags like paragraphs, headers, and list items.
411 "doHorizontalRules" => 20,
414 "doCodeBlocks" => 50,
415 "doBlockQuotes" => 60,
418 protected function runBlockGamut($text) {
420 # Run block gamut tranformations.
422 # We need to escape raw HTML in Markdown source before doing anything
423 # else. This need to be done for each block, and not only at the
424 # begining in the Markdown function since hashed blocks can be part of
425 # list items and could have been indented. Indented blocks would have
426 # been seen as a code block in a previous pass of hashHTMLBlocks.
427 $text = $this->hashHTMLBlocks($text);
429 return $this->runBasicBlockGamut($text);
432 protected function runBasicBlockGamut($text) {
434 # Run block gamut tranformations, without hashing HTML blocks. This is
435 # useful when HTML blocks are known to be already hashed, like in the first
436 # whole-document pass.
438 foreach ($this->block_gamut as $method => $priority) {
439 $text = $this->$method($text);
442 # Finally form paragraph and restore hashed blocks.
443 $text = $this->formParagraphs($text);
449 protected function doHorizontalRules($text) {
450 # Do Horizontal Rules:
453 ^[ ]{0,3} # Leading space
454 ([-*_]) # $1: First marker
455 (?> # Repeated marker group
456 [ ]{0,2} # Zero, one, or two spaces.
457 \1 # Marker character
458 ){2,} # Group repeated at least twice
459 [ ]* # Tailing spaces
462 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
467 protected $span_gamut = array(
469 # These are all the transformations that occur *within* block-level
470 # tags like paragraphs, headers, and list items.
472 # Process character escapes, code spans, and inline HTML
476 # Process anchor and image tags. Images must come first,
477 # because ![foo][f] looks like an anchor.
481 # Make links out of things like `<http://example.com/>`
482 # Must come after doAnchors, because you can use < and >
483 # delimiters in inline links like [this](<url>).
485 "encodeAmpsAndAngles" => 40,
487 "doItalicsAndBold" => 50,
488 "doHardBreaks" => 60,
491 protected function runSpanGamut($text) {
493 # Run span gamut tranformations.
495 foreach ($this->span_gamut as $method => $priority) {
496 $text = $this->$method($text);
503 protected function doHardBreaks($text) {
505 return preg_replace_callback('/ {2,}\n/',
506 array($this, '_doHardBreaks_callback'), $text);
508 protected function _doHardBreaks_callback($matches) {
509 return $this->hashPart("<br$this->empty_element_suffix\n");
513 protected function doAnchors($text) {
515 # Turn Markdown link shortcuts into XHTML <a> tags.
517 if ($this->in_anchor) return $text;
518 $this->in_anchor = true;
521 # First, handle reference-style links: [link text] [id]
523 $text = preg_replace_callback('{
524 ( # wrap whole match in $1
526 ('.$this->nested_brackets_re.') # link text = $2
529 [ ]? # one optional space
530 (?:\n[ ]*)? # one optional newline followed by spaces
537 array($this, '_doAnchors_reference_callback'), $text);
540 # Next, inline-style links: [link text](url "optional title")
542 $text = preg_replace_callback('{
543 ( # wrap whole match in $1
545 ('.$this->nested_brackets_re.') # link text = $2
552 ('.$this->nested_url_parenthesis_re.') # href = $4
556 ([\'"]) # quote char = $6
559 [ \n]* # ignore any spaces/tabs between closing quote and )
560 )? # title is optional
564 array($this, '_doAnchors_inline_callback'), $text);
567 # Last, handle reference-style shortcuts: [link text]
568 # These must come last in case you've also got [link text][1]
569 # or [link text](/foo)
571 $text = preg_replace_callback('{
572 ( # wrap whole match in $1
574 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
578 array($this, '_doAnchors_reference_callback'), $text);
580 $this->in_anchor = false;
583 protected function _doAnchors_reference_callback($matches) {
584 $whole_match = $matches[1];
585 $link_text = $matches[2];
586 $link_id =& $matches[3];
588 if ($link_id == "") {
589 # for shortcut links like [this][] or [this].
590 $link_id = $link_text;
593 # lower-case and turn embedded newlines into spaces
594 $link_id = strtolower($link_id);
595 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
597 if (isset($this->urls[$link_id])) {
598 $url = $this->urls[$link_id];
599 $url = $this->encodeURLAttribute($url);
601 $result = "<a href=\"$url\"";
602 if ( isset( $this->titles[$link_id] ) ) {
603 $title = $this->titles[$link_id];
604 $title = $this->encodeAttribute($title);
605 $result .= " title=\"$title\"";
608 $link_text = $this->runSpanGamut($link_text);
609 $result .= ">$link_text</a>";
610 $result = $this->hashPart($result);
613 $result = $whole_match;
617 protected function _doAnchors_inline_callback($matches) {
618 $whole_match = $matches[1];
619 $link_text = $this->runSpanGamut($matches[2]);
620 $url = $matches[3] == '' ? $matches[4] : $matches[3];
621 $title =& $matches[7];
623 // if the URL was of the form <s p a c e s> it got caught by the HTML
624 // tag parser and hashed. Need to reverse the process before using the URL.
625 $unhashed = $this->unhash($url);
626 if ($unhashed != $url)
627 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
629 $url = $this->encodeURLAttribute($url);
631 $result = "<a href=\"$url\"";
633 $title = $this->encodeAttribute($title);
634 $result .= " title=\"$title\"";
637 $link_text = $this->runSpanGamut($link_text);
638 $result .= ">$link_text</a>";
640 return $this->hashPart($result);
644 protected function doImages($text) {
646 # Turn Markdown image shortcuts into <img> tags.
649 # First, handle reference-style labeled images: ![alt text][id]
651 $text = preg_replace_callback('{
652 ( # wrap whole match in $1
654 ('.$this->nested_brackets_re.') # alt text = $2
657 [ ]? # one optional space
658 (?:\n[ ]*)? # one optional newline followed by spaces
666 array($this, '_doImages_reference_callback'), $text);
669 # Next, handle inline images: ![alt text](url "optional title")
670 # Don't forget: encode * and _
672 $text = preg_replace_callback('{
673 ( # wrap whole match in $1
675 ('.$this->nested_brackets_re.') # alt text = $2
677 \s? # One optional whitespace character
681 <(\S*)> # src url = $3
683 ('.$this->nested_url_parenthesis_re.') # src url = $4
687 ([\'"]) # quote char = $6
691 )? # title is optional
695 array($this, '_doImages_inline_callback'), $text);
699 protected function _doImages_reference_callback($matches) {
700 $whole_match = $matches[1];
701 $alt_text = $matches[2];
702 $link_id = strtolower($matches[3]);
704 if ($link_id == "") {
705 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
708 $alt_text = $this->encodeAttribute($alt_text);
709 if (isset($this->urls[$link_id])) {
710 $url = $this->encodeURLAttribute($this->urls[$link_id]);
711 $result = "<img src=\"$url\" alt=\"$alt_text\"";
712 if (isset($this->titles[$link_id])) {
713 $title = $this->titles[$link_id];
714 $title = $this->encodeAttribute($title);
715 $result .= " title=\"$title\"";
717 $result .= $this->empty_element_suffix;
718 $result = $this->hashPart($result);
721 # If there's no such link ID, leave intact:
722 $result = $whole_match;
727 protected function _doImages_inline_callback($matches) {
728 $whole_match = $matches[1];
729 $alt_text = $matches[2];
730 $url = $matches[3] == '' ? $matches[4] : $matches[3];
731 $title =& $matches[7];
733 $alt_text = $this->encodeAttribute($alt_text);
734 $url = $this->encodeURLAttribute($url);
735 $result = "<img src=\"$url\" alt=\"$alt_text\"";
737 $title = $this->encodeAttribute($title);
738 $result .= " title=\"$title\""; # $title already quoted
740 $result .= $this->empty_element_suffix;
742 return $this->hashPart($result);
746 protected function doHeaders($text) {
747 # Setext-style headers:
754 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
755 array($this, '_doHeaders_callback_setext'), $text);
760 # ## Header 2 with closing hashes ##
764 $text = preg_replace_callback('{
765 ^(\#{1,6}) # $1 = string of #\'s
767 (.+?) # $2 = Header text
769 \#* # optional closing #\'s (not counted)
772 array($this, '_doHeaders_callback_atx'), $text);
776 protected function _doHeaders_callback_setext($matches) {
777 # Terrible hack to check we haven't found an empty list item.
778 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
781 $level = $matches[2]{0} == '=' ? 1 : 2;
782 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
783 return "\n" . $this->hashBlock($block) . "\n\n";
785 protected function _doHeaders_callback_atx($matches) {
786 $level = strlen($matches[1]);
787 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
788 return "\n" . $this->hashBlock($block) . "\n\n";
792 protected function doLists($text) {
794 # Form HTML ordered (numbered) and unordered (bulleted) lists.
796 $less_than_tab = $this->tab_width - 1;
798 # Re-usable patterns to match list item bullets and number markers:
799 $marker_ul_re = '[*+-]';
800 $marker_ol_re = '\d+[\.]';
802 $markers_relist = array(
803 $marker_ul_re => $marker_ol_re,
804 $marker_ol_re => $marker_ul_re,
807 foreach ($markers_relist as $marker_re => $other_marker_re) {
808 # Re-usable pattern to match any entirel ul or ol list:
812 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
813 ('.$marker_re.') # $4 = first list item marker
822 (?! # Negative lookahead for another list item marker
827 (?= # Lookahead for another kind of list
829 \3 # Must have the same indentation
830 '.$other_marker_re.'[ ]+
836 # We use a different prefix before nested lists than top-level lists.
837 # See extended comment in _ProcessListItems().
839 if ($this->list_level) {
840 $text = preg_replace_callback('{
844 array($this, '_doLists_callback'), $text);
847 $text = preg_replace_callback('{
848 (?:(?<=\n)\n|\A\n?) # Must eat the newline
851 array($this, '_doLists_callback'), $text);
857 protected function _doLists_callback($matches) {
858 # Re-usable patterns to match list item bullets and number markers:
859 $marker_ul_re = '[*+-]';
860 $marker_ol_re = '\d+[\.]';
861 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
864 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
866 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
869 $result = $this->processListItems($list, $marker_any_re);
871 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
872 return "\n". $result ."\n\n";
875 protected $list_level = 0;
877 protected function processListItems($list_str, $marker_any_re) {
879 # Process the contents of a single ordered or unordered list, splitting it
880 # into individual list items.
882 # The $this->list_level global keeps track of when we're inside a list.
883 # Each time we enter a list, we increment it; when we leave a list,
884 # we decrement. If it's zero, we're not in a list anymore.
886 # We do this because when we're not inside a list, we want to treat
887 # something like this:
889 # I recommend upgrading to version
890 # 8. Oops, now this line is treated
893 # As a single paragraph, despite the fact that the second line starts
894 # with a digit-period-space sequence.
896 # Whereas when we're inside a list (or sub-list), that line will be
897 # treated as the start of a sub-list. What a kludge, huh? This is
898 # an aspect of Markdown's syntax that's hard to parse perfectly
899 # without resorting to mind-reading. Perhaps the solution is to
900 # change the syntax rules such that sub-lists must start with a
901 # starting cardinal number; e.g. "1." or "a.".
905 # trim trailing blank lines:
906 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
908 $list_str = preg_replace_callback('{
909 (\n)? # leading line = $1
910 (^[ ]*) # leading whitespace = $2
911 ('.$marker_any_re.' # list marker and space = $3
912 (?:[ ]+|(?=\n)) # space only required if item is not empty
914 ((?s:.*?)) # list item text = $4
915 (?:(\n+(?=\n))|\n) # tailing blank line = $5
916 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
918 array($this, '_processListItems_callback'), $list_str);
923 protected function _processListItems_callback($matches) {
925 $leading_line =& $matches[1];
926 $leading_space =& $matches[2];
927 $marker_space = $matches[3];
928 $tailing_blank_line =& $matches[5];
930 if ($leading_line || $tailing_blank_line ||
931 preg_match('/\n{2,}/', $item))
933 # Replace marker with the appropriate whitespace indentation
934 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
935 $item = $this->runBlockGamut($this->outdent($item)."\n");
938 # Recursion for sub-lists:
939 $item = $this->doLists($this->outdent($item));
940 $item = preg_replace('/\n+$/', '', $item);
941 $item = $this->runSpanGamut($item);
944 return "<li>" . $item . "</li>\n";
948 protected function doCodeBlocks($text) {
950 # Process Markdown `<pre><code>` blocks.
952 $text = preg_replace_callback('{
954 ( # $1 = the code block -- one or more lines, starting with a space/tab
956 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
960 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
962 array($this, '_doCodeBlocks_callback'), $text);
966 protected function _doCodeBlocks_callback($matches) {
967 $codeblock = $matches[1];
969 $codeblock = $this->outdent($codeblock);
970 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
972 # trim leading newlines and trailing newlines
973 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
975 $codeblock = "<pre><code>$codeblock\n</code></pre>";
976 return "\n\n".$this->hashBlock($codeblock)."\n\n";
980 protected function makeCodeSpan($code) {
982 # Create a code span markup for $code. Called from handleSpanToken.
984 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
985 return $this->hashPart("<code>$code</code>");
989 protected $em_relist = array(
990 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
991 '*' => '(?<![\s*])\*(?!\*)',
992 '_' => '(?<![\s_])_(?!_)',
994 protected $strong_relist = array(
995 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
996 '**' => '(?<![\s*])\*\*(?!\*)',
997 '__' => '(?<![\s_])__(?!_)',
999 protected $em_strong_relist = array(
1000 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1001 '***' => '(?<![\s*])\*\*\*(?!\*)',
1002 '___' => '(?<![\s_])___(?!_)',
1004 protected $em_strong_prepared_relist;
1006 protected function prepareItalicsAndBold() {
1008 # Prepare regular expressions for searching emphasis tokens in any
1011 foreach ($this->em_relist as $em => $em_re) {
1012 foreach ($this->strong_relist as $strong => $strong_re) {
1013 # Construct list of allowed token expressions.
1014 $token_relist = array();
1015 if (isset($this->em_strong_relist["$em$strong"])) {
1016 $token_relist[] = $this->em_strong_relist["$em$strong"];
1018 $token_relist[] = $em_re;
1019 $token_relist[] = $strong_re;
1021 # Construct master expression from list.
1022 $token_re = '{('. implode('|', $token_relist) .')}';
1023 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1028 protected function doItalicsAndBold($text) {
1029 $token_stack = array('');
1030 $text_stack = array('');
1033 $tree_char_em = false;
1037 # Get prepared regular expression for seraching emphasis tokens
1038 # in current context.
1040 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1043 # Each loop iteration search for the next emphasis token.
1044 # Each token is then passed to handleSpanToken.
1046 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1047 $text_stack[0] .= $parts[0];
1048 $token =& $parts[1];
1051 if (empty($token)) {
1052 # Reached end of text span: empty stack without emitting.
1053 # any more emphasis.
1054 while ($token_stack[0]) {
1055 $text_stack[1] .= array_shift($token_stack);
1056 $text_stack[0] .= array_shift($text_stack);
1061 $token_len = strlen($token);
1062 if ($tree_char_em) {
1063 # Reached closing marker while inside a three-char emphasis.
1064 if ($token_len == 3) {
1065 # Three-char closing marker, close em and strong.
1066 array_shift($token_stack);
1067 $span = array_shift($text_stack);
1068 $span = $this->runSpanGamut($span);
1069 $span = "<strong><em>$span</em></strong>";
1070 $text_stack[0] .= $this->hashPart($span);
1074 # Other closing marker: close one em or strong and
1075 # change current token state to match the other
1076 $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1077 $tag = $token_len == 2 ? "strong" : "em";
1078 $span = $text_stack[0];
1079 $span = $this->runSpanGamut($span);
1080 $span = "<$tag>$span</$tag>";
1081 $text_stack[0] = $this->hashPart($span);
1082 $$tag = ''; # $$tag stands for $em or $strong
1084 $tree_char_em = false;
1085 } else if ($token_len == 3) {
1087 # Reached closing marker for both em and strong.
1088 # Closing strong marker:
1089 for ($i = 0; $i < 2; ++$i) {
1090 $shifted_token = array_shift($token_stack);
1091 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1092 $span = array_shift($text_stack);
1093 $span = $this->runSpanGamut($span);
1094 $span = "<$tag>$span</$tag>";
1095 $text_stack[0] .= $this->hashPart($span);
1096 $$tag = ''; # $$tag stands for $em or $strong
1099 # Reached opening three-char emphasis marker. Push on token
1100 # stack; will be handled by the special condition above.
1103 array_unshift($token_stack, $token);
1104 array_unshift($text_stack, '');
1105 $tree_char_em = true;
1107 } else if ($token_len == 2) {
1109 # Unwind any dangling emphasis marker:
1110 if (strlen($token_stack[0]) == 1) {
1111 $text_stack[1] .= array_shift($token_stack);
1112 $text_stack[0] .= array_shift($text_stack);
1114 # Closing strong marker:
1115 array_shift($token_stack);
1116 $span = array_shift($text_stack);
1117 $span = $this->runSpanGamut($span);
1118 $span = "<strong>$span</strong>";
1119 $text_stack[0] .= $this->hashPart($span);
1122 array_unshift($token_stack, $token);
1123 array_unshift($text_stack, '');
1127 # Here $token_len == 1
1129 if (strlen($token_stack[0]) == 1) {
1130 # Closing emphasis marker:
1131 array_shift($token_stack);
1132 $span = array_shift($text_stack);
1133 $span = $this->runSpanGamut($span);
1134 $span = "<em>$span</em>";
1135 $text_stack[0] .= $this->hashPart($span);
1138 $text_stack[0] .= $token;
1141 array_unshift($token_stack, $token);
1142 array_unshift($text_stack, '');
1147 return $text_stack[0];
1151 protected function doBlockQuotes($text) {
1152 $text = preg_replace_callback('/
1153 ( # Wrap whole match in $1
1155 ^[ ]*>[ ]? # ">" at the start of a line
1156 .+\n # rest of the first line
1157 (.+\n)* # subsequent consecutive lines
1162 array($this, '_doBlockQuotes_callback'), $text);
1166 protected function _doBlockQuotes_callback($matches) {
1168 # trim one level of quoting - trim whitespace-only lines
1169 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1170 $bq = $this->runBlockGamut($bq); # recurse
1172 $bq = preg_replace('/^/m', " ", $bq);
1173 # These leading spaces cause problem with <pre> content,
1174 # so we need to fix that:
1175 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1176 array($this, '_doBlockQuotes_callback2'), $bq);
1178 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1180 protected function _doBlockQuotes_callback2($matches) {
1182 $pre = preg_replace('/^ /m', '', $pre);
1187 protected function formParagraphs($text) {
1190 # $text - string to process with html <p> tags
1192 # Strip leading and trailing lines:
1193 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1195 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1198 # Wrap <p> tags and unhashify HTML blocks
1200 foreach ($grafs as $key => $value) {
1201 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1203 $value = $this->runSpanGamut($value);
1204 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1206 $grafs[$key] = $this->unhash($value);
1210 # Modify elements of @grafs in-place...
1212 $block = $this->html_hashes[$graf];
1214 // if (preg_match('{
1216 // ( # $1 = <div> tag
1220 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1226 // ( # $3 = contents
1229 // (</div>) # $4 = closing tag
1231 // }xs', $block, $matches))
1233 // list(, $div_open, , $div_content, $div_close) = $matches;
1235 // # We can't call Markdown(), because that resets the hash;
1236 // # that initialization code should be pulled into its own sub, though.
1237 // $div_content = $this->hashHTMLBlocks($div_content);
1239 // # Run document gamut methods on the content.
1240 // foreach ($this->document_gamut as $method => $priority) {
1241 // $div_content = $this->$method($div_content);
1244 // $div_open = preg_replace(
1245 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1247 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1249 $grafs[$key] = $graf;
1253 return implode("\n\n", $grafs);
1257 protected function encodeAttribute($text) {
1259 # Encode text for a double-quoted HTML attribute. This function
1260 # is *not* suitable for attributes enclosed in single quotes.
1262 $text = $this->encodeAmpsAndAngles($text);
1263 $text = str_replace('"', '"', $text);
1268 protected function encodeURLAttribute($url, &$text = null) {
1270 # Encode text for a double-quoted HTML attribute containing a URL,
1271 # applying the URL filter if set. Also generates the textual
1272 # representation for the URL (removing mailto: or tel:) storing it in $text.
1273 # This function is *not* suitable for attributes enclosed in single quotes.
1275 if ($this->url_filter_func)
1276 $url = call_user_func($this->url_filter_func, $url);
1278 if (preg_match('{^mailto:}i', $url))
1279 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1280 else if (preg_match('{^tel:}i', $url))
1282 $url = $this->encodeAttribute($url);
1283 $text = substr($url, 4);
1287 $url = $this->encodeAttribute($url);
1295 protected function encodeAmpsAndAngles($text) {
1297 # Smart processing for ampersands and angle brackets that need to
1298 # be encoded. Valid character entities are left alone unless the
1299 # no-entities mode is set.
1301 if ($this->no_entities) {
1302 $text = str_replace('&', '&', $text);
1304 # Ampersand-encoding based entirely on Nat Irons's Amputator
1305 # MT plugin: <http://bumppo.net/projects/amputator/>
1306 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1309 # Encode remaining <'s
1310 $text = str_replace('<', '<', $text);
1316 protected function doAutoLinks($text) {
1317 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1318 array($this, '_doAutoLinks_url_callback'), $text);
1320 # Email addresses: <address@domain.foo>
1321 $text = preg_replace_callback('{
1326 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1332 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1334 \[[\d.a-fA-F:]+\] # IPv4 & IPv6
1339 array($this, '_doAutoLinks_email_callback'), $text);
1343 protected function _doAutoLinks_url_callback($matches) {
1344 $url = $this->encodeURLAttribute($matches[1], $text);
1345 $link = "<a href=\"$url\">$text</a>";
1346 return $this->hashPart($link);
1348 protected function _doAutoLinks_email_callback($matches) {
1349 $addr = $matches[1];
1350 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1351 $link = "<a href=\"$url\">$text</a>";
1352 return $this->hashPart($link);
1356 protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1358 # Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1360 # Output: the same text but with most characters encoded as either a
1361 # decimal or hex entity, in the hopes of foiling most address
1362 # harvesting spam bots. E.g.:
1364 # mailto:foo
1365 # @example.co
1368 # Note: the additional output $tail is assigned the same value as the
1369 # ouput, minus the number of characters specified by $head_length.
1371 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1372 # With some optimizations by Milian Wolff. Forced encoding of HTML
1373 # attribute special characters by Allan Odgaard.
1375 if ($text == "") return $tail = "";
1377 $chars = preg_split('/(?<!^)(?!$)/', $text);
1378 $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
1380 foreach ($chars as $key => $char) {
1382 # Ignore non-ascii chars.
1384 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1385 # roughly 10% raw, 45% hex, 45% dec
1386 # '@' *must* be encoded. I insist.
1387 # '"' and '>' have to be encoded inside the attribute
1388 if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
1389 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1390 else $chars[$key] = '&#'.$ord.';';
1394 $text = implode('', $chars);
1395 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1401 protected function parseSpan($str) {
1403 # Take the string $str and parse it into tokens, hashing embeded HTML,
1404 # escaped characters and handling code spans.
1410 \\\\'.$this->escape_chars_re.'
1413 `+ # code span marker
1414 '.( $this->no_markup ? '' : '
1416 <!-- .*? --> # comment
1418 <\?.*?\?> | <%.*?%> # processing instruction
1420 <[!$]?[-a-zA-Z0-9:_]+ # regular tags
1423 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1427 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1429 </[-a-zA-Z0-9:_]+\s*> # closing tag
1436 # Each loop iteration seach for either the next tag, the next
1437 # openning code span marker, or the next escaped character.
1438 # Each token is then passed to handleSpanToken.
1440 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1442 # Create token from text preceding tag.
1443 if ($parts[0] != "") {
1444 $output .= $parts[0];
1447 # Check if we reach the end.
1448 if (isset($parts[1])) {
1449 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1461 protected function handleSpanToken($token, &$str) {
1463 # Handle $token provided by parseSpan by determining its nature and
1464 # returning the corresponding value that should replace it.
1466 switch ($token{0}) {
1468 return $this->hashPart("&#". ord($token{1}). ";");
1470 # Search for end marker in remaining text.
1471 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1475 $codespan = $this->makeCodeSpan($matches[1]);
1476 return $this->hashPart($codespan);
1478 return $token; // return as text since no ending marker found.
1480 return $this->hashPart($token);
1485 protected function outdent($text) {
1487 # Remove one level of line-leading tabs or spaces
1489 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1493 # String length function for detab. `_initDetab` will create a function to
1494 # hanlde UTF-8 if the default function does not exist.
1495 protected $utf8_strlen = 'mb_strlen';
1497 protected function detab($text) {
1499 # Replace tabs with the appropriate amount of space.
1501 # For each line we separate the line in blocks delemited by
1502 # tab characters. Then we reconstruct every line by adding the
1503 # appropriate number of space between each blocks.
1505 $text = preg_replace_callback('/^.*\t.*$/m',
1506 array($this, '_detab_callback'), $text);
1510 protected function _detab_callback($matches) {
1511 $line = $matches[0];
1512 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1515 $blocks = explode("\t", $line);
1516 # Add each blocks to the line.
1518 unset($blocks[0]); # Do not add first block twice.
1519 foreach ($blocks as $block) {
1520 # Calculate amount of space, insert spaces, insert block.
1521 $amount = $this->tab_width -
1522 $strlen($line, 'UTF-8') % $this->tab_width;
1523 $line .= str_repeat(" ", $amount) . $block;
1527 protected function _initDetab() {
1529 # Check for the availability of the function in the `utf8_strlen` property
1530 # (initially `mb_strlen`). If the function is not available, create a
1531 # function that will loosely count the number of UTF-8 characters with a
1532 # regular expression.
1534 if (function_exists($this->utf8_strlen)) return;
1535 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1536 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1541 protected function unhash($text) {
1543 # Swap back in all the tags hashed by _HashHTMLBlocks.
1545 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1546 array($this, '_unhash_callback'), $text);
1548 protected function _unhash_callback($matches) {
1549 return $this->html_hashes[$matches[0]];
1556 # Temporary Markdown Extra Parser Implementation Class
1558 # NOTE: DON'T USE THIS CLASS
1559 # Currently the implementation of of Extra resides here in this temporary class.
1560 # This makes it easier to propagate the changes between the three different
1561 # packaging styles of PHP Markdown. When this issue is resolved, this
1562 # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
1563 # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
1567 abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
1569 ### Configuration Variables ###
1571 # Prefix for footnote ids.
1572 public $fn_id_prefix = "";
1574 # Optional title attribute for footnote links and backlinks.
1575 public $fn_link_title = "";
1576 public $fn_backlink_title = "";
1578 # Optional class attribute for footnote links and backlinks.
1579 public $fn_link_class = "footnote-ref";
1580 public $fn_backlink_class = "footnote-backref";
1582 # Class name for table cell alignment (%% replaced left/center/right)
1583 # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
1584 # If empty, the align attribute is used instead of a class name.
1585 public $table_align_class_tmpl = '';
1587 # Optional class prefix for fenced code block.
1588 public $code_class_prefix = "";
1589 # Class attribute for code blocks goes on the `code` tag;
1590 # setting this to true will put attributes on the `pre` tag instead.
1591 public $code_attr_on_pre = false;
1593 # Predefined abbreviations.
1594 public $predef_abbr = array();
1597 ### Parser Implementation ###
1599 public function __construct() {
1601 # Constructor function. Initialize the parser object.
1603 # Add extra escapable characters before parent constructor
1604 # initialize the table.
1605 $this->escape_chars .= ':|';
1607 # Insert extra document, block, and span transformations.
1608 # Parent constructor will do the sorting.
1609 $this->document_gamut += array(
1610 "doFencedCodeBlocks" => 5,
1611 "stripFootnotes" => 15,
1612 "stripAbbreviations" => 25,
1613 "appendFootnotes" => 50,
1615 $this->block_gamut += array(
1616 "doFencedCodeBlocks" => 5,
1620 $this->span_gamut += array(
1622 "doAbbreviations" => 70,
1625 parent::__construct();
1629 # Extra variables used during extra transformations.
1630 protected $footnotes = array();
1631 protected $footnotes_ordered = array();
1632 protected $footnotes_ref_count = array();
1633 protected $footnotes_numbers = array();
1634 protected $abbr_desciptions = array();
1635 protected $abbr_word_re = '';
1637 # Give the current footnote number.
1638 protected $footnote_counter = 1;
1641 protected function setup() {
1643 # Setting up Extra-specific variables.
1647 $this->footnotes = array();
1648 $this->footnotes_ordered = array();
1649 $this->footnotes_ref_count = array();
1650 $this->footnotes_numbers = array();
1651 $this->abbr_desciptions = array();
1652 $this->abbr_word_re = '';
1653 $this->footnote_counter = 1;
1655 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1656 if ($this->abbr_word_re)
1657 $this->abbr_word_re .= '|';
1658 $this->abbr_word_re .= preg_quote($abbr_word);
1659 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1663 protected function teardown() {
1665 # Clearing Extra-specific variables.
1667 $this->footnotes = array();
1668 $this->footnotes_ordered = array();
1669 $this->footnotes_ref_count = array();
1670 $this->footnotes_numbers = array();
1671 $this->abbr_desciptions = array();
1672 $this->abbr_word_re = '';
1678 ### Extra Attribute Parser ###
1680 # Expression to use to catch attributes (includes the braces)
1681 protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
1682 # Expression to use when parsing in a context when no capture is desired
1683 protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
1685 protected function doExtraAttributes($tag_name, $attr) {
1687 # Parse attributes caught by the $this->id_class_attr_catch_re expression
1688 # and return the HTML-formatted list of attributes.
1690 # Currently supported attributes are .class and #id.
1692 if (empty($attr)) return "";
1694 # Split on components
1695 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
1696 $elements = $matches[0];
1698 # handle classes and ids (only first id taken into account)
1700 $attributes = array();
1702 foreach ($elements as $element) {
1703 if ($element{0} == '.') {
1704 $classes[] = substr($element, 1);
1705 } else if ($element{0} == '#') {
1706 if ($id === false) $id = substr($element, 1);
1707 } else if (strpos($element, '=') > 0) {
1708 $parts = explode('=', $element, 2);
1709 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
1713 # compose attributes as string
1716 $attr_str .= ' id="'.$id.'"';
1718 if (!empty($classes)) {
1719 $attr_str .= ' class="'.implode(" ", $classes).'"';
1721 if (!$this->no_markup && !empty($attributes)) {
1722 $attr_str .= ' '.implode(" ", $attributes);
1728 protected function stripLinkDefinitions($text) {
1730 # Strips link definitions from text, stores the URLs and titles in
1733 $less_than_tab = $this->tab_width - 1;
1735 # Link defs are in the form: ^[id]: url "optional title"
1736 $text = preg_replace_callback('{
1737 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
1739 \n? # maybe *one* newline
1747 \n? # maybe one newline
1750 (?<=\s) # lookbehind for whitespace
1755 )? # title is optional
1756 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
1759 array($this, '_stripLinkDefinitions_callback'),
1763 protected function _stripLinkDefinitions_callback($matches) {
1764 $link_id = strtolower($matches[1]);
1765 $url = $matches[2] == '' ? $matches[3] : $matches[2];
1766 $this->urls[$link_id] = $url;
1767 $this->titles[$link_id] =& $matches[4];
1768 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1769 return ''; # String that will replace the block
1773 ### HTML Block Parser ###
1775 # Tags that are always treated as block tags:
1776 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
1778 # Tags treated as block tags only if the opening tag is alone on its line:
1779 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1781 # Tags where markdown="1" default to span mode:
1782 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1784 # Tags which must not have their contents modified, no matter where
1786 protected $clean_tags_re = 'script|style|math|svg';
1788 # Tags that do not need to be closed.
1789 protected $auto_close_tags_re = 'hr|img|param|source|track';
1792 protected function hashHTMLBlocks($text) {
1794 # Hashify HTML Blocks and "clean tags".
1796 # We only want to do this for block-level HTML tags, such as headers,
1797 # lists, and tables. That's because we still want to wrap <p>s around
1798 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1799 # phrase emphasis, and spans. The list of tags we're looking for is
1802 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1803 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1804 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1805 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1806 # These two functions are calling each other. It's recursive!
1808 if ($this->no_markup) return $text;
1811 # Call the HTML-in-Markdown hasher.
1813 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1817 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1818 $enclosing_tag_re = '', $span = false)
1821 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1823 # * $indent is the number of space to be ignored when checking for code
1824 # blocks. This is important because if we don't take the indent into
1825 # account, something like this (which looks right) won't work as expected:
1828 # <div markdown="1">
1829 # Hello World. <-- Is this a Markdown code block or text?
1830 # </div> <-- Is this a Markdown code block or a real tag?
1833 # If you don't like this, just don't indent the tag on which
1834 # you apply the markdown="1" attribute.
1836 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
1837 # tag with that name. Nested tags supported.
1839 # * If $span is true, text inside must treated as span. So any double
1840 # newline will be replaced by a single newline so that it does not create
1843 # Returns an array of that form: ( processed text , remaining text )
1845 if ($text === '') return array('', '');
1847 # Regex to check for the presense of newlines around a block tag.
1848 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1851 ^ # Start of text following the tag.
1852 (?>[ ]*<!--.*?-->)? # Optional comment.
1853 [ ]*\n # Must be followed by newline.
1856 # Regex to match any tag.
1859 ( # $2: Capture whole tag.
1860 </? # Any opening or closing tag.
1862 '.$this->block_tags_re.' |
1863 '.$this->context_block_tags_re.' |
1864 '.$this->clean_tags_re.' |
1865 (?!\s)'.$enclosing_tag_re.'
1868 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
1870 ".*?" | # Double quotes (can contain `>`)
1871 \'.*?\' | # Single quotes (can contain `>`)
1872 .+? # Anything but quotes and `>`.
1877 <!-- .*? --> # HTML Comment
1879 <\?.*?\?> | <%.*?%> # Processing instruction
1881 <!\[CDATA\[.*?\]\]> # CData Block
1882 '. ( !$span ? ' # If not in span.
1884 # Indented code block
1885 (?: ^[ ]*\n | ^ | \n[ ]*\n )
1886 [ ]{'.($indent+4).'}[^\n]* \n
1888 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1891 # Fenced code block marker
1893 [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
1896 \.?[-_:a-zA-Z0-9]+ # standalone class name
1898 '.$this->id_class_attr_nocatch_re.' # extra attributes
1902 ' : '' ). ' # End (if not is span).
1905 # Note, this regex needs to go after backtick fenced
1906 # code blocks but it should also be kept outside of the
1907 # "if not in span" condition adding backticks to the parser
1913 $depth = 0; # Current depth inside the tag tree.
1914 $parsed = ""; # Parsed text that will be returned.
1917 # Loop through every tag until we find the closing tag of the parent
1918 # or loop until reaching the end of text if no parent tag specified.
1922 # Split the text using the first $tag_match pattern found.
1923 # Text before pattern will be first in the array, text after
1924 # pattern will be at the end, and between will be any catches made
1927 $parts = preg_split($block_tag_re, $text, 2,
1928 PREG_SPLIT_DELIM_CAPTURE);
1930 # If in Markdown span mode, add a empty-string span-level hash
1931 # after each newline to prevent triggering any block element.
1933 $void = $this->hashPart("", ':');
1934 $newline = "$void\n";
1935 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1938 $parsed .= $parts[0]; # Text before current tag.
1940 # If end of $text has been reached. Stop loop.
1941 if (count($parts) < 3) {
1946 $tag = $parts[1]; # Tag to handle.
1947 $text = $parts[2]; # Remaining text after current tag.
1948 $tag_re = preg_quote($tag); # For use in a regular expression.
1951 # Check for: Fenced code block marker.
1952 # Note: need to recheck the whole tag to disambiguate backtick
1953 # fences from code spans
1955 if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
1956 # Fenced code block marker: find matching end marker.
1957 $fence_indent = strlen($capture[1]); # use captured indent in re
1958 $fence_re = $capture[2]; # use captured fence in re
1959 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
1962 # End marker found: pass text unchanged until marker.
1963 $parsed .= $tag . $matches[0];
1964 $text = substr($text, strlen($matches[0]));
1967 # No end marker: just skip it.
1972 # Check for: Indented code block.
1974 else if ($tag{0} == "\n" || $tag{0} == " ") {
1975 # Indented code block: pass it unchanged, will be handled
1980 # Check for: Code span marker
1981 # Note: need to check this after backtick fenced code blocks
1983 else if ($tag{0} == "`") {
1984 # Find corresponding end marker.
1985 $tag_re = preg_quote($tag);
1986 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1989 # End marker found: pass text unchanged until marker.
1990 $parsed .= $tag . $matches[0];
1991 $text = substr($text, strlen($matches[0]));
1994 # Unmatched marker: just skip it.
1999 # Check for: Opening Block level tag or
2000 # Opening Context Block tag (like ins and del)
2001 # used as a block tag (tag is alone on it's line).
2003 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
2004 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
2005 preg_match($newline_before_re, $parsed) &&
2006 preg_match($newline_after_re, $text) )
2009 # Need to parse tag and following text using the HTML parser.
2010 list($block_text, $text) =
2011 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
2013 # Make sure it stays outside of any paragraph by adding newlines.
2014 $parsed .= "\n\n$block_text\n\n";
2017 # Check for: Clean tag (like script, math)
2018 # HTML Comments, processing instructions.
2020 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
2021 $tag{1} == '!' || $tag{1} == '?')
2023 # Need to parse tag and following text using the HTML parser.
2024 # (don't check for markdown attribute)
2025 list($block_text, $text) =
2026 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2028 $parsed .= $block_text;
2031 # Check for: Tag with same name as enclosing tag.
2033 else if ($enclosing_tag_re !== '' &&
2034 # Same name as enclosing tag.
2035 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2038 # Increase/decrease nested tag count.
2040 if ($tag{1} == '/') $depth--;
2041 else if ($tag{strlen($tag)-2} != '/') $depth++;
2045 # Going out of parent element. Clean up and break so we
2046 # return to the calling function.
2048 $text = $tag . $text;
2057 } while ($depth >= 0);
2059 return array($parsed, $text);
2061 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2063 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2065 # * Calls $hash_method to convert any blocks.
2066 # * Stops when the first opening tag closes.
2067 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2068 # (it is not inside clean tags)
2070 # Returns an array of that form: ( processed text , remaining text )
2072 if ($text === '') return array('', '');
2074 # Regex to match `markdown` attribute inside of a tag.
2075 $markdown_attr_re = '
2077 \s* # Eat whitespace before the `markdown` attribute
2081 (["\']) # $1: quote delimiter
2082 (.*?) # $2: attribute value
2083 \1 # matching delimiter
2085 ([^\s>]*) # $3: unquoted attribute value
2087 () # $4: make $3 always defined (avoid warnings)
2090 # Regex to match any tag.
2092 ( # $2: Capture whole tag.
2093 </? # Any opening or closing tag.
2096 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
2098 ".*?" | # Double quotes (can contain `>`)
2099 \'.*?\' | # Single quotes (can contain `>`)
2100 .+? # Anything but quotes and `>`.
2105 <!-- .*? --> # HTML Comment
2107 <\?.*?\?> | <%.*?%> # Processing instruction
2109 <!\[CDATA\[.*?\]\]> # CData Block
2113 $original_text = $text; # Save original text in case of faliure.
2115 $depth = 0; # Current depth inside the tag tree.
2116 $block_text = ""; # Temporary text holder for current text.
2117 $parsed = ""; # Parsed text that will be returned.
2120 # Get the name of the starting tag.
2121 # (This pattern makes $base_tag_name_re safe without quoting.)
2123 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2124 $base_tag_name_re = $matches[1];
2127 # Loop through every tag until we find the corresponding closing tag.
2131 # Split the text using the first $tag_match pattern found.
2132 # Text before pattern will be first in the array, text after
2133 # pattern will be at the end, and between will be any catches made
2136 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2138 if (count($parts) < 3) {
2140 # End of $text reached with unbalenced tag(s).
2141 # In that case, we return original text unchanged and pass the
2142 # first character as filtered to prevent an infinite loop in the
2145 return array($original_text{0}, substr($original_text, 1));
2148 $block_text .= $parts[0]; # Text before current tag.
2149 $tag = $parts[1]; # Tag to handle.
2150 $text = $parts[2]; # Remaining text after current tag.
2153 # Check for: Auto-close tag (like <hr/>)
2154 # Comments and Processing Instructions.
2156 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2157 $tag{1} == '!' || $tag{1} == '?')
2159 # Just add the tag to the block as if it was text.
2160 $block_text .= $tag;
2164 # Increase/decrease nested tag count. Only do so if
2165 # the tag's name match base tag's.
2167 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2168 if ($tag{1} == '/') $depth--;
2169 else if ($tag{strlen($tag)-2} != '/') $depth++;
2173 # Check for `markdown="1"` attribute and handle it.
2176 preg_match($markdown_attr_re, $tag, $attr_m) &&
2177 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2179 # Remove `markdown` attribute from opening tag.
2180 $tag = preg_replace($markdown_attr_re, '', $tag);
2182 # Check if text inside this tag must be parsed in span mode.
2183 $this->mode = $attr_m[2] . $attr_m[3];
2184 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2185 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2187 # Calculate indent before tag.
2188 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2189 $strlen = $this->utf8_strlen;
2190 $indent = $strlen($matches[1], 'UTF-8');
2195 # End preceding block with this tag.
2196 $block_text .= $tag;
2197 $parsed .= $this->$hash_method($block_text);
2199 # Get enclosing tag name for the ParseMarkdown function.
2200 # (This pattern makes $tag_name_re safe without quoting.)
2201 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2202 $tag_name_re = $matches[1];
2204 # Parse the content using the HTML-in-Markdown parser.
2205 list ($block_text, $text)
2206 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2207 $tag_name_re, $span_mode);
2209 # Outdent markdown text.
2211 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2215 # Append tag content to parsed text.
2216 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
2217 else $parsed .= "$block_text";
2219 # Start over with a new block.
2222 else $block_text .= $tag;
2225 } while ($depth > 0);
2228 # Hash last block text that wasn't processed inside the loop.
2230 $parsed .= $this->$hash_method($block_text);
2232 return array($parsed, $text);
2236 protected function hashClean($text) {
2238 # Called whenever a tag must be hashed when a function inserts a "clean" tag
2239 # in $text, it passes through this function and is automaticaly escaped,
2240 # blocking invalid nested overlap.
2242 return $this->hashPart($text, 'C');
2246 protected function doAnchors($text) {
2248 # Turn Markdown link shortcuts into XHTML <a> tags.
2250 if ($this->in_anchor) return $text;
2251 $this->in_anchor = true;
2254 # First, handle reference-style links: [link text] [id]
2256 $text = preg_replace_callback('{
2257 ( # wrap whole match in $1
2259 ('.$this->nested_brackets_re.') # link text = $2
2262 [ ]? # one optional space
2263 (?:\n[ ]*)? # one optional newline followed by spaces
2270 array($this, '_doAnchors_reference_callback'), $text);
2273 # Next, inline-style links: [link text](url "optional title")
2275 $text = preg_replace_callback('{
2276 ( # wrap whole match in $1
2278 ('.$this->nested_brackets_re.') # link text = $2
2285 ('.$this->nested_url_parenthesis_re.') # href = $4
2289 ([\'"]) # quote char = $6
2292 [ \n]* # ignore any spaces/tabs between closing quote and )
2293 )? # title is optional
2295 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
2298 array($this, '_doAnchors_inline_callback'), $text);
2301 # Last, handle reference-style shortcuts: [link text]
2302 # These must come last in case you've also got [link text][1]
2303 # or [link text](/foo)
2305 $text = preg_replace_callback('{
2306 ( # wrap whole match in $1
2308 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
2312 array($this, '_doAnchors_reference_callback'), $text);
2314 $this->in_anchor = false;
2317 protected function _doAnchors_reference_callback($matches) {
2318 $whole_match = $matches[1];
2319 $link_text = $matches[2];
2320 $link_id =& $matches[3];
2322 if ($link_id == "") {
2323 # for shortcut links like [this][] or [this].
2324 $link_id = $link_text;
2327 # lower-case and turn embedded newlines into spaces
2328 $link_id = strtolower($link_id);
2329 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2331 if (isset($this->urls[$link_id])) {
2332 $url = $this->urls[$link_id];
2333 $url = $this->encodeURLAttribute($url);
2335 $result = "<a href=\"$url\"";
2336 if ( isset( $this->titles[$link_id] ) ) {
2337 $title = $this->titles[$link_id];
2338 $title = $this->encodeAttribute($title);
2339 $result .= " title=\"$title\"";
2341 if (isset($this->ref_attr[$link_id]))
2342 $result .= $this->ref_attr[$link_id];
2344 $link_text = $this->runSpanGamut($link_text);
2345 $result .= ">$link_text</a>";
2346 $result = $this->hashPart($result);
2349 $result = $whole_match;
2353 protected function _doAnchors_inline_callback($matches) {
2354 $whole_match = $matches[1];
2355 $link_text = $this->runSpanGamut($matches[2]);
2356 $url = $matches[3] == '' ? $matches[4] : $matches[3];
2357 $title =& $matches[7];
2358 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2360 // if the URL was of the form <s p a c e s> it got caught by the HTML
2361 // tag parser and hashed. Need to reverse the process before using the URL.
2362 $unhashed = $this->unhash($url);
2363 if ($unhashed != $url)
2364 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
2366 $url = $this->encodeURLAttribute($url);
2368 $result = "<a href=\"$url\"";
2369 if (isset($title)) {
2370 $title = $this->encodeAttribute($title);
2371 $result .= " title=\"$title\"";
2375 $link_text = $this->runSpanGamut($link_text);
2376 $result .= ">$link_text</a>";
2378 return $this->hashPart($result);
2382 protected function doImages($text) {
2384 # Turn Markdown image shortcuts into <img> tags.
2387 # First, handle reference-style labeled images: ![alt text][id]
2389 $text = preg_replace_callback('{
2390 ( # wrap whole match in $1
2392 ('.$this->nested_brackets_re.') # alt text = $2
2395 [ ]? # one optional space
2396 (?:\n[ ]*)? # one optional newline followed by spaces
2404 array($this, '_doImages_reference_callback'), $text);
2407 # Next, handle inline images: ![alt text](url "optional title")
2408 # Don't forget: encode * and _
2410 $text = preg_replace_callback('{
2411 ( # wrap whole match in $1
2413 ('.$this->nested_brackets_re.') # alt text = $2
2415 \s? # One optional whitespace character
2419 <(\S*)> # src url = $3
2421 ('.$this->nested_url_parenthesis_re.') # src url = $4
2425 ([\'"]) # quote char = $6
2429 )? # title is optional
2431 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
2434 array($this, '_doImages_inline_callback'), $text);
2438 protected function _doImages_reference_callback($matches) {
2439 $whole_match = $matches[1];
2440 $alt_text = $matches[2];
2441 $link_id = strtolower($matches[3]);
2443 if ($link_id == "") {
2444 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
2447 $alt_text = $this->encodeAttribute($alt_text);
2448 if (isset($this->urls[$link_id])) {
2449 $url = $this->encodeURLAttribute($this->urls[$link_id]);
2450 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2451 if (isset($this->titles[$link_id])) {
2452 $title = $this->titles[$link_id];
2453 $title = $this->encodeAttribute($title);
2454 $result .= " title=\"$title\"";
2456 if (isset($this->ref_attr[$link_id]))
2457 $result .= $this->ref_attr[$link_id];
2458 $result .= $this->empty_element_suffix;
2459 $result = $this->hashPart($result);
2462 # If there's no such link ID, leave intact:
2463 $result = $whole_match;
2468 protected function _doImages_inline_callback($matches) {
2469 $whole_match = $matches[1];
2470 $alt_text = $matches[2];
2471 $url = $matches[3] == '' ? $matches[4] : $matches[3];
2472 $title =& $matches[7];
2473 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2475 $alt_text = $this->encodeAttribute($alt_text);
2476 $url = $this->encodeURLAttribute($url);
2477 $result = "<img src=\"$url\" alt=\"$alt_text\"";
2478 if (isset($title)) {
2479 $title = $this->encodeAttribute($title);
2480 $result .= " title=\"$title\""; # $title already quoted
2483 $result .= $this->empty_element_suffix;
2485 return $this->hashPart($result);
2489 protected function doHeaders($text) {
2491 # Redefined to add id and class attribute support.
2493 # Setext-style headers:
2494 # Header 1 {#header1}
2497 # Header 2 {#header2 .class1 .class2}
2500 $text = preg_replace_callback(
2502 (^.+?) # $1: Header text
2503 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
2504 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
2506 array($this, '_doHeaders_callback_setext'), $text);
2508 # atx-style headers:
2509 # # Header 1 {#header1}
2510 # ## Header 2 {#header2}
2511 # ## Header 2 with closing hashes ## {#header3.class1.class2}
2513 # ###### Header 6 {.class2}
2515 $text = preg_replace_callback('{
2516 ^(\#{1,6}) # $1 = string of #\'s
2518 (.+?) # $2 = Header text
2520 \#* # optional closing #\'s (not counted)
2521 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
2525 array($this, '_doHeaders_callback_atx'), $text);
2529 protected function _doHeaders_callback_setext($matches) {
2530 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2532 $level = $matches[3]{0} == '=' ? 1 : 2;
2533 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2534 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2535 return "\n" . $this->hashBlock($block) . "\n\n";
2537 protected function _doHeaders_callback_atx($matches) {
2538 $level = strlen($matches[1]);
2539 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2540 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2541 return "\n" . $this->hashBlock($block) . "\n\n";
2545 protected function doTables($text) {
2549 $less_than_tab = $this->tab_width - 1;
2551 # Find tables with leading pipe.
2553 # | Header 1 | Header 2
2554 # | -------- | --------
2558 $text = preg_replace_callback('
2561 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2562 [|] # Optional leading pipe (present)
2563 (.+) \n # $1: Header row (at least one pipe)
2565 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2566 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2570 [ ]* # Allowed whitespace.
2571 [|] .* \n # Row content.
2574 (?=\n|\Z) # Stop at final double newline.
2576 array($this, '_doTable_leadingPipe_callback'), $text);
2579 # Find tables without leading pipe.
2581 # Header 1 | Header 2
2582 # -------- | --------
2586 $text = preg_replace_callback('
2589 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2590 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2592 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2593 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2597 .* [|] .* \n # Row content
2600 (?=\n|\Z) # Stop at final double newline.
2602 array($this, '_DoTable_callback'), $text);
2606 protected function _doTable_leadingPipe_callback($matches) {
2607 $head = $matches[1];
2608 $underline = $matches[2];
2609 $content = $matches[3];
2611 # Remove leading pipe for each row.
2612 $content = preg_replace('/^ *[|]/m', '', $content);
2614 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2616 protected function _doTable_makeAlignAttr($alignname)
2618 if (empty($this->table_align_class_tmpl))
2619 return " align=\"$alignname\"";
2621 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
2622 return " class=\"$classname\"";
2624 protected function _doTable_callback($matches) {
2625 $head = $matches[1];
2626 $underline = $matches[2];
2627 $content = $matches[3];
2629 # Remove any tailing pipes for each line.
2630 $head = preg_replace('/[|] *$/m', '', $head);
2631 $underline = preg_replace('/[|] *$/m', '', $underline);
2632 $content = preg_replace('/[|] *$/m', '', $content);
2634 # Reading alignement from header underline.
2635 $separators = preg_split('/ *[|] */', $underline);
2636 foreach ($separators as $n => $s) {
2637 if (preg_match('/^ *-+: *$/', $s))
2638 $attr[$n] = $this->_doTable_makeAlignAttr('right');
2639 else if (preg_match('/^ *:-+: *$/', $s))
2640 $attr[$n] = $this->_doTable_makeAlignAttr('center');
2641 else if (preg_match('/^ *:-+ *$/', $s))
2642 $attr[$n] = $this->_doTable_makeAlignAttr('left');
2647 # Parsing span elements, including code spans, character escapes,
2648 # and inline HTML tags, so that pipes inside those gets ignored.
2649 $head = $this->parseSpan($head);
2650 $headers = preg_split('/ *[|] */', $head);
2651 $col_count = count($headers);
2652 $attr = array_pad($attr, $col_count, '');
2654 # Write column headers.
2655 $text = "<table>\n";
2656 $text .= "<thead>\n";
2658 foreach ($headers as $n => $header)
2659 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2661 $text .= "</thead>\n";
2663 # Split content by row.
2664 $rows = explode("\n", trim($content, "\n"));
2666 $text .= "<tbody>\n";
2667 foreach ($rows as $row) {
2668 # Parsing span elements, including code spans, character escapes,
2669 # and inline HTML tags, so that pipes inside those gets ignored.
2670 $row = $this->parseSpan($row);
2672 # Split row by cell.
2673 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2674 $row_cells = array_pad($row_cells, $col_count, '');
2677 foreach ($row_cells as $n => $cell)
2678 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2681 $text .= "</tbody>\n";
2682 $text .= "</table>";
2684 return $this->hashBlock($text) . "\n";
2688 protected function doDefLists($text) {
2690 # Form HTML definition lists.
2692 $less_than_tab = $this->tab_width - 1;
2694 # Re-usable pattern to match any entire dl list:
2695 $whole_list_re = '(?>
2698 [ ]{0,'.$less_than_tab.'}
2699 ((?>.*\S.*\n)+) # $3 = defined term
2701 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2709 (?! # Negative lookahead for another term
2710 [ ]{0,'.$less_than_tab.'}
2711 (?: \S.*\n )+? # defined term
2713 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2715 (?! # Negative lookahead for another definition
2716 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2722 $text = preg_replace_callback('{
2726 array($this, '_doDefLists_callback'), $text);
2730 protected function _doDefLists_callback($matches) {
2731 # Re-usable patterns to match list item bullets and number markers:
2732 $list = $matches[1];
2734 # Turn double returns into triple returns, so that we can make a
2735 # paragraph for the last item in a list, if necessary:
2736 $result = trim($this->processDefListItems($list));
2737 $result = "<dl>\n" . $result . "\n</dl>";
2738 return $this->hashBlock($result) . "\n\n";
2742 protected function processDefListItems($list_str) {
2744 # Process the contents of a single definition list, splitting it
2745 # into individual term and definition list items.
2747 $less_than_tab = $this->tab_width - 1;
2749 # trim trailing blank lines:
2750 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2752 # Process definition terms.
2753 $list_str = preg_replace_callback('{
2754 (?>\A\n?|\n\n+) # leading line
2755 ( # definition terms = $1
2756 [ ]{0,'.$less_than_tab.'} # leading whitespace
2757 (?!\:[ ]|[ ]) # negative lookahead for a definition
2758 # mark (colon) or more whitespace.
2759 (?> \S.* \n)+? # actual term (not whitespace).
2761 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2762 # with a definition mark.
2764 array($this, '_processDefListItems_callback_dt'), $list_str);
2766 # Process actual definitions.
2767 $list_str = preg_replace_callback('{
2768 \n(\n+)? # leading line = $1
2769 ( # marker space = $2
2770 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2771 \:[ ]+ # definition mark (colon)
2773 ((?s:.+?)) # definition text = $3
2774 (?= \n+ # stop at next definition mark,
2775 (?: # next term or end of text
2776 [ ]{0,'.$less_than_tab.'} \:[ ] |
2781 array($this, '_processDefListItems_callback_dd'), $list_str);
2785 protected function _processDefListItems_callback_dt($matches) {
2786 $terms = explode("\n", trim($matches[1]));
2788 foreach ($terms as $term) {
2789 $term = $this->runSpanGamut(trim($term));
2790 $text .= "\n<dt>" . $term . "</dt>";
2792 return $text . "\n";
2794 protected function _processDefListItems_callback_dd($matches) {
2795 $leading_line = $matches[1];
2796 $marker_space = $matches[2];
2799 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2800 # Replace marker with the appropriate whitespace indentation
2801 $def = str_repeat(' ', strlen($marker_space)) . $def;
2802 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2803 $def = "\n". $def ."\n";
2807 $def = $this->runSpanGamut($this->outdent($def));
2810 return "\n<dd>" . $def . "</dd>\n";
2814 protected function doFencedCodeBlocks($text) {
2816 # Adding the fenced code block syntax to regular Markdown:
2822 $less_than_tab = $this->tab_width;
2824 $text = preg_replace_callback('{
2828 (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2832 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2834 '.$this->id_class_attr_catch_re.' # 3: Extra attributes
2836 [ ]* \n # Whitespace and newline following marker.
2841 (?!\1 [ ]* \n) # Not a closing marker.
2849 array($this, '_doFencedCodeBlocks_callback'), $text);
2853 protected function _doFencedCodeBlocks_callback($matches) {
2854 $classname =& $matches[2];
2855 $attrs =& $matches[3];
2856 $codeblock = $matches[4];
2857 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2858 $codeblock = preg_replace_callback('/^\n+/',
2859 array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
2861 if ($classname != "") {
2862 if ($classname{0} == '.')
2863 $classname = substr($classname, 1);
2864 $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2866 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2868 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
2869 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2870 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2872 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2874 protected function _doFencedCodeBlocks_newlines($matches) {
2875 return str_repeat("<br$this->empty_element_suffix",
2876 strlen($matches[0]));
2881 # Redefining emphasis markers so that emphasis by underscore does not
2882 # work in the middle of a word.
2884 protected $em_relist = array(
2885 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
2886 '*' => '(?<![\s*])\*(?!\*)',
2887 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
2889 protected $strong_relist = array(
2890 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
2891 '**' => '(?<![\s*])\*\*(?!\*)',
2892 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
2894 protected $em_strong_relist = array(
2895 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
2896 '***' => '(?<![\s*])\*\*\*(?!\*)',
2897 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
2901 protected function formParagraphs($text) {
2904 # $text - string to process with html <p> tags
2906 # Strip leading and trailing lines:
2907 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2909 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2912 # Wrap <p> tags and unhashify HTML blocks
2914 foreach ($grafs as $key => $value) {
2915 $value = trim($this->runSpanGamut($value));
2917 # Check if this should be enclosed in a paragraph.
2918 # Clean tag hashes & block tag hashes are left alone.
2919 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2922 $value = "<p>$value</p>";
2924 $grafs[$key] = $value;
2927 # Join grafs in one text, then unhash HTML tags.
2928 $text = implode("\n\n", $grafs);
2930 # Finish by removing any tag hashes still present in $text.
2931 $text = $this->unhash($text);
2939 protected function stripFootnotes($text) {
2941 # Strips link definitions from text, stores the URLs and titles in
2944 $less_than_tab = $this->tab_width - 1;
2946 # Link defs are in the form: [^id]: url "optional title"
2947 $text = preg_replace_callback('{
2948 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2950 \n? # maybe *one* newline
2951 ( # text = $2 (no blank lines allowed)
2956 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
2957 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2958 # by non-indented content
2962 array($this, '_stripFootnotes_callback'),
2966 protected function _stripFootnotes_callback($matches) {
2967 $note_id = $this->fn_id_prefix . $matches[1];
2968 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2969 return ''; # String that will replace the block
2973 protected function doFootnotes($text) {
2975 # Replace footnote references in $text [^id] with a special text-token
2976 # which will be replaced by the actual footnote marker in appendFootnotes.
2978 if (!$this->in_anchor) {
2979 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2985 protected function appendFootnotes($text) {
2987 # Append footnote list to text.
2989 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2990 array($this, '_appendFootnotes_callback'), $text);
2992 if (!empty($this->footnotes_ordered)) {
2994 $text .= "<div class=\"footnotes\">\n";
2995 $text .= "<hr". $this->empty_element_suffix ."\n";
2996 $text .= "<ol>\n\n";
2999 if ($this->fn_backlink_class != "") {
3000 $class = $this->fn_backlink_class;
3001 $class = $this->encodeAttribute($class);
3002 $attr .= " class=\"$class\"";
3004 if ($this->fn_backlink_title != "") {
3005 $title = $this->fn_backlink_title;
3006 $title = $this->encodeAttribute($title);
3007 $attr .= " title=\"$title\"";
3011 while (!empty($this->footnotes_ordered)) {
3012 $footnote = reset($this->footnotes_ordered);
3013 $note_id = key($this->footnotes_ordered);
3014 unset($this->footnotes_ordered[$note_id]);
3015 $ref_count = $this->footnotes_ref_count[$note_id];
3016 unset($this->footnotes_ref_count[$note_id]);
3017 unset($this->footnotes[$note_id]);
3019 $footnote .= "\n"; # Need to append newline before parsing.
3020 $footnote = $this->runBlockGamut("$footnote\n");
3021 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
3022 array($this, '_appendFootnotes_callback'), $footnote);
3024 $attr = str_replace("%%", ++$num, $attr);
3025 $note_id = $this->encodeAttribute($note_id);
3027 # Prepare backlink, multiple backlinks if multiple references
3028 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
3029 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
3030 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>↩</a>";
3032 # Add backlink to last paragraph; create new paragraph if needed.
3033 if (preg_match('{</p>$}', $footnote)) {
3034 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
3036 $footnote .= "\n\n<p>$backlink</p>";
3039 $text .= "<li id=\"fn:$note_id\">\n";
3040 $text .= $footnote . "\n";
3041 $text .= "</li>\n\n";
3049 protected function _appendFootnotes_callback($matches) {
3050 $node_id = $this->fn_id_prefix . $matches[1];
3052 # Create footnote marker only if it has a corresponding footnote *and*
3053 # the footnote hasn't been used by another marker.
3054 if (isset($this->footnotes[$node_id])) {
3055 $num =& $this->footnotes_numbers[$node_id];
3057 # Transfer footnote content to the ordered list and give it its
3059 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3060 $this->footnotes_ref_count[$node_id] = 1;
3061 $num = $this->footnote_counter++;
3062 $ref_count_mark = '';
3064 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3068 if ($this->fn_link_class != "") {
3069 $class = $this->fn_link_class;
3070 $class = $this->encodeAttribute($class);
3071 $attr .= " class=\"$class\"";
3073 if ($this->fn_link_title != "") {
3074 $title = $this->fn_link_title;
3075 $title = $this->encodeAttribute($title);
3076 $attr .= " title=\"$title\"";
3079 $attr = str_replace("%%", $num, $attr);
3080 $node_id = $this->encodeAttribute($node_id);
3083 "<sup id=\"fnref$ref_count_mark:$node_id\">".
3084 "<a href=\"#fn:$node_id\"$attr>$num</a>".
3088 return "[^".$matches[1]."]";
3092 ### Abbreviations ###
3094 protected function stripAbbreviations($text) {
3096 # Strips abbreviations from text, stores titles in hash references.
3098 $less_than_tab = $this->tab_width - 1;
3100 # Link defs are in the form: [id]*: url "optional title"
3101 $text = preg_replace_callback('{
3102 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
3103 (.*) # text = $2 (no blank lines allowed)
3105 array($this, '_stripAbbreviations_callback'),
3109 protected function _stripAbbreviations_callback($matches) {
3110 $abbr_word = $matches[1];
3111 $abbr_desc = $matches[2];
3112 if ($this->abbr_word_re)
3113 $this->abbr_word_re .= '|';
3114 $this->abbr_word_re .= preg_quote($abbr_word);
3115 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3116 return ''; # String that will replace the block
3120 protected function doAbbreviations($text) {
3122 # Find defined abbreviations in text and wrap them in <abbr> elements.
3124 if ($this->abbr_word_re) {
3125 // cannot use the /x modifier because abbr_word_re may
3126 // contain significant spaces:
3127 $text = preg_replace_callback('{'.
3129 '(?:'.$this->abbr_word_re.')'.
3132 array($this, '_doAbbreviations_callback'), $text);
3136 protected function _doAbbreviations_callback($matches) {
3137 $abbr = $matches[0];
3138 if (isset($this->abbr_desciptions[$abbr])) {
3139 $desc = $this->abbr_desciptions[$abbr];
3141 return $this->hashPart("<abbr>$abbr</abbr>");
3143 $desc = $this->encodeAttribute($desc);
3144 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");