3 # Markdown - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2008 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1m" ); # Sat 21 Jun 2008
19 # Global default settings:
22 # Change to ">" for HTML output
23 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
25 # Define the width of a tab for code blocks.
26 @define( 'MARKDOWN_TAB_WIDTH', 4 );
33 # Change to false to remove Markdown from posts and/or comments.
34 @define( 'MARKDOWN_WP_POSTS', true );
35 @define( 'MARKDOWN_WP_COMMENTS', true );
39 ### Standard Function Interface ###
41 @define( 'MARKDOWN_PARSER_CLASS', 'Markdown_Parser' );
43 function Markdown($text) {
45 # Initialize the parser and return the result of its transform method.
47 # Setup static parser variable.
49 if (!isset($parser)) {
50 $parser_class = MARKDOWN_PARSER_CLASS;
51 $parser = new $parser_class;
54 # Transform text using parser.
55 return $parser->transform($text);
59 ### WordPress Plugin Interface ###
63 Plugin URI: http://www.michelf.com/projects/php-markdown/
64 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
67 Author URI: http://www.michelf.com/
70 if (isset($wp_version)) {
71 # More details about how it works here:
72 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
74 # Post content and excerpts
75 # - Remove WordPress paragraph generator.
76 # - Run Markdown on excerpt, then remove all tags.
77 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
78 if (MARKDOWN_WP_POSTS) {
79 remove_filter('the_content', 'wpautop');
80 remove_filter('the_content_rss', 'wpautop');
81 remove_filter('the_excerpt', 'wpautop');
82 add_filter('the_content', 'Markdown', 6);
83 add_filter('the_content_rss', 'Markdown', 6);
84 add_filter('get_the_excerpt', 'Markdown', 6);
85 add_filter('get_the_excerpt', 'trim', 7);
86 add_filter('the_excerpt', 'mdwp_add_p');
87 add_filter('the_excerpt_rss', 'mdwp_strip_p');
89 remove_filter('content_save_pre', 'balanceTags', 50);
90 remove_filter('excerpt_save_pre', 'balanceTags', 50);
91 add_filter('the_content', 'balanceTags', 50);
92 add_filter('get_the_excerpt', 'balanceTags', 9);
96 # - Remove WordPress paragraph generator.
97 # - Remove WordPress auto-link generator.
98 # - Scramble important tags before passing them to the kses filter.
99 # - Run Markdown on excerpt then remove paragraph tags.
100 if (MARKDOWN_WP_COMMENTS) {
101 remove_filter('comment_text', 'wpautop', 30);
102 remove_filter('comment_text', 'make_clickable');
103 add_filter('pre_comment_content', 'Markdown', 6);
104 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
105 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
106 add_filter('get_comment_text', 'Markdown', 6);
107 add_filter('get_comment_excerpt', 'Markdown', 6);
108 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
110 global $mdwp_hidden_tags, $mdwp_placeholders;
111 $mdwp_hidden_tags = explode(' ',
112 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
113 $mdwp_placeholders = explode(' ', str_rot13(
114 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
115 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
118 function mdwp_add_p($text) {
119 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
120 $text = '<p>'.$text.'</p>';
121 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
126 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
128 function mdwp_hide_tags($text) {
129 global $mdwp_hidden_tags, $mdwp_placeholders;
130 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
132 function mdwp_show_tags($text) {
133 global $mdwp_hidden_tags, $mdwp_placeholders;
134 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
139 ### bBlog Plugin Info ###
141 function identify_modifier_markdown() {
143 'name' => 'markdown',
144 'type' => 'modifier',
145 'nicename' => 'Markdown',
146 'description' => 'A text-to-HTML conversion tool for web writers',
147 'authors' => 'Michel Fortin and John Gruber',
148 'licence' => 'BSD-like',
149 'version' => MARKDOWN_VERSION,
150 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
155 ### Smarty Modifier Interface ###
157 function smarty_modifier_markdown($text) {
158 return Markdown($text);
162 ### Textile Compatibility Mode ###
164 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
166 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
167 # Try to include PHP SmartyPants. Should be in the same directory.
168 @include_once 'smartypants.php';
169 # Fake Textile class. It calls Markdown instead.
171 function TextileThis($text, $lite='', $encode='') {
172 if ($lite == '' && $encode == '') $text = Markdown($text);
173 if (function_exists('SmartyPants')) $text = SmartyPants($text);
176 # Fake restricted version: restrictions are not supported for now.
177 function TextileRestricted($text, $lite='', $noimage='') {
178 return $this->TextileThis($text, $lite);
180 # Workaround to ensure compatibility with TextPattern 4.0.3.
181 function blockLite($text) { return $text; }
188 # Markdown Parser Class
191 class Markdown_Parser {
193 # Regex to match balanced [brackets].
194 # Needed to insert a maximum bracked depth while converting to PHP.
195 var $nested_brackets_depth = 6;
196 var $nested_brackets_re;
198 var $nested_url_parenthesis_depth = 4;
199 var $nested_url_parenthesis_re;
201 # Table of hash values for escaped characters:
202 var $escape_chars = '\`*_{}[]()>#+-.!';
203 var $escape_chars_re;
205 # Change to ">" for HTML output.
206 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
207 var $tab_width = MARKDOWN_TAB_WIDTH;
209 # Change to `true` to disallow markup or entities.
210 var $no_markup = false;
211 var $no_entities = false;
213 # Predefined urls and titles for reference links and images.
214 var $predef_urls = array();
215 var $predef_titles = array();
218 function Markdown_Parser() {
220 # Constructor function. Initialize appropriate member variables.
223 $this->prepareItalicsAndBold();
225 $this->nested_brackets_re =
226 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
227 str_repeat('\])*', $this->nested_brackets_depth);
229 $this->nested_url_parenthesis_re =
230 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
231 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
233 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
235 # Sort document, block, and span gamut in ascendent priority order.
236 asort($this->document_gamut);
237 asort($this->block_gamut);
238 asort($this->span_gamut);
242 # Internal hashes used during transformation.
244 var $titles = array();
245 var $html_hashes = array();
247 # Status flag to avoid invalid nesting.
248 var $in_anchor = false;
253 # Called before the transformation process starts to setup parser
256 # Clear global hashes.
257 $this->urls = $this->predef_urls;
258 $this->titles = $this->predef_titles;
259 $this->html_hashes = array();
264 function teardown() {
266 # Called after the transformation process to clear any variable
267 # which may be taking up memory unnecessarly.
269 $this->urls = array();
270 $this->titles = array();
271 $this->html_hashes = array();
275 function transform($text) {
277 # Main function. Performs some preprocessing on the input text
278 # and pass it through the document gamut.
282 # Remove UTF-8 BOM and marker character in input, if present.
283 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
285 # Standardize line endings:
286 # DOS to Unix and Mac to Unix
287 $text = preg_replace('{\r\n?}', "\n", $text);
289 # Make sure $text ends with a couple of newlines:
292 # Convert all tabs to spaces.
293 $text = $this->detab($text);
295 # Turn block-level HTML blocks into hash entries
296 $text = $this->hashHTMLBlocks($text);
298 # Strip any lines consisting only of spaces and tabs.
299 # This makes subsequent regexen easier to write, because we can
300 # match consecutive blank lines with /\n+/ instead of something
301 # contorted like /[ ]*\n+/ .
302 $text = preg_replace('/^[ ]+$/m', '', $text);
304 # Run document gamut methods.
305 foreach ($this->document_gamut as $method => $priority) {
306 $text = $this->$method($text);
314 var $document_gamut = array(
315 # Strip link definitions, store in hashes.
316 "stripLinkDefinitions" => 20,
318 "runBasicBlockGamut" => 30,
322 function stripLinkDefinitions($text) {
324 # Strips link definitions from text, stores the URLs and titles in
327 $less_than_tab = $this->tab_width - 1;
329 # Link defs are in the form: ^[id]: url "optional title"
330 $text = preg_replace_callback('{
331 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
333 \n? # maybe *one* newline
335 <?(\S+?)>? # url = $2
337 \n? # maybe one newline
340 (?<=\s) # lookbehind for whitespace
345 )? # title is optional
348 array(&$this, '_stripLinkDefinitions_callback'),
352 function _stripLinkDefinitions_callback($matches) {
353 $link_id = strtolower($matches[1]);
354 $this->urls[$link_id] = $matches[2];
355 $this->titles[$link_id] =& $matches[3];
356 return ''; # String that will replace the block
360 function hashHTMLBlocks($text) {
361 if ($this->no_markup) return $text;
363 $less_than_tab = $this->tab_width - 1;
365 # Hashify HTML blocks:
366 # We only want to do this for block-level HTML tags, such as headers,
367 # lists, and tables. That's because we still want to wrap <p>s around
368 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
369 # phrase emphasis, and spans. The list of tags we're looking for is
372 # * List "a" is made of tags which can be both inline or block-level.
373 # These will be treated block-level when the start tag is alone on
374 # its line, otherwise they're not matched here and will be taken as
376 # * List "b" is made of tags which are always block-level;
378 $block_tags_a_re = 'ins|del';
379 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
380 'script|noscript|form|fieldset|iframe|math';
382 # Regular expression for the content of a block tag.
383 $nested_tags_level = 4;
385 (?> # optional tag attributes
386 \s # starts with whitespace
388 [^>"/]+ # text outside quotes
390 /+(?!>) # slash not followed by ">"
392 "[^"]*" # text inside double quotes (tolerate ">")
394 \'[^\']*\' # text inside single quotes (tolerate ">")
401 [^<]+ # content without tag
403 <\2 # nested opening tag
404 '.$attr.' # attributes
408 >', $nested_tags_level). # end of opening tag
409 '.*?'. # last level nested tag content
411 </\2\s*> # closing nested tag
414 <(?!/\2\s*> # other tags with a different name
418 $content2 = str_replace('\2', '\3', $content);
420 # First, look for nested blocks, e.g.:
423 # tags for inner block must be indented.
427 # The outermost tags must start at the left margin for this to match, and
428 # the inner nested divs must be indented.
429 # We need to do this before the next, more liberal match, because the next
430 # match will start at the first `<div>` and stop at the first `</div>`.
431 $text = preg_replace_callback('{(?>
433 (?<=\n\n) # Starting after a blank line
435 \A\n? # the beginning of the doc
439 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
442 [ ]{0,'.$less_than_tab.'}
443 <('.$block_tags_b_re.')# start tag = $2
444 '.$attr.'> # attributes followed by > and \n
445 '.$content.' # content, support nesting
446 </\2> # the matching end tag
447 [ ]* # trailing spaces/tabs
448 (?=\n+|\Z) # followed by a newline or end of document
450 | # Special version for tags of group a.
452 [ ]{0,'.$less_than_tab.'}
453 <('.$block_tags_a_re.')# start tag = $3
454 '.$attr.'>[ ]*\n # attributes followed by >
455 '.$content2.' # content, support nesting
456 </\3> # the matching end tag
457 [ ]* # trailing spaces/tabs
458 (?=\n+|\Z) # followed by a newline or end of document
460 | # Special case just for <hr />. It was easier to make a special
461 # case than to make the other regex more complicated.
463 [ ]{0,'.$less_than_tab.'}
464 <(hr) # start tag = $2
465 '.$attr.' # attributes
466 /?> # the matching end tag
468 (?=\n{2,}|\Z) # followed by a blank line or end of document
470 | # Special case for standalone HTML comments:
472 [ ]{0,'.$less_than_tab.'}
477 (?=\n{2,}|\Z) # followed by a blank line or end of document
479 | # PHP and ASP-style processor instructions (<? and <%)
481 [ ]{0,'.$less_than_tab.'}
488 (?=\n{2,}|\Z) # followed by a blank line or end of document
492 array(&$this, '_hashHTMLBlocks_callback'),
497 function _hashHTMLBlocks_callback($matches) {
499 $key = $this->hashBlock($text);
500 return "\n\n$key\n\n";
504 function hashPart($text, $boundary = 'X') {
506 # Called whenever a tag must be hashed when a function insert an atomic
507 # element in the text stream. Passing $text to through this function gives
508 # a unique text-token which will be reverted back when calling unhash.
510 # The $boundary argument specify what character should be used to surround
511 # the token. By convension, "B" is used for block elements that needs not
512 # to be wrapped into paragraph tags at the end, ":" is used for elements
513 # that are word separators and "X" is used in the general case.
515 # Swap back any tag hash found in $text so we do not have to `unhash`
516 # multiple times at the end.
517 $text = $this->unhash($text);
519 # Then hash the block.
521 $key = "$boundary\x1A" . ++$i . $boundary;
522 $this->html_hashes[$key] = $text;
523 return $key; # String that will replace the tag.
527 function hashBlock($text) {
529 # Shortcut function for hashPart with block-level boundaries.
531 return $this->hashPart($text, 'B');
535 var $block_gamut = array(
537 # These are all the transformations that form block-level
538 # tags like paragraphs, headers, and list items.
541 "doHorizontalRules" => 20,
544 "doCodeBlocks" => 50,
545 "doBlockQuotes" => 60,
548 function runBlockGamut($text) {
550 # Run block gamut tranformations.
552 # We need to escape raw HTML in Markdown source before doing anything
553 # else. This need to be done for each block, and not only at the
554 # begining in the Markdown function since hashed blocks can be part of
555 # list items and could have been indented. Indented blocks would have
556 # been seen as a code block in a previous pass of hashHTMLBlocks.
557 $text = $this->hashHTMLBlocks($text);
559 return $this->runBasicBlockGamut($text);
562 function runBasicBlockGamut($text) {
564 # Run block gamut tranformations, without hashing HTML blocks. This is
565 # useful when HTML blocks are known to be already hashed, like in the first
566 # whole-document pass.
568 foreach ($this->block_gamut as $method => $priority) {
569 $text = $this->$method($text);
572 # Finally form paragraph and restore hashed blocks.
573 $text = $this->formParagraphs($text);
579 function doHorizontalRules($text) {
580 # Do Horizontal Rules:
583 ^[ ]{0,3} # Leading space
584 ([-*_]) # $1: First marker
585 (?> # Repeated marker group
586 [ ]{0,2} # Zero, one, or two spaces.
587 \1 # Marker character
588 ){2,} # Group repeated at least twice
589 [ ]* # Tailing spaces
592 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
597 var $span_gamut = array(
599 # These are all the transformations that occur *within* block-level
600 # tags like paragraphs, headers, and list items.
602 # Process character escapes, code spans, and inline HTML
606 # Process anchor and image tags. Images must come first,
607 # because ![foo][f] looks like an anchor.
611 # Make links out of things like `<http://example.com/>`
612 # Must come after doAnchors, because you can use < and >
613 # delimiters in inline links like [this](<url>).
615 "encodeAmpsAndAngles" => 40,
617 "doItalicsAndBold" => 50,
618 "doHardBreaks" => 60,
621 function runSpanGamut($text) {
623 # Run span gamut tranformations.
625 foreach ($this->span_gamut as $method => $priority) {
626 $text = $this->$method($text);
633 function doHardBreaks($text) {
635 return preg_replace_callback('/ {2,}\n/',
636 array(&$this, '_doHardBreaks_callback'), $text);
638 function _doHardBreaks_callback($matches) {
639 return $this->hashPart("<br$this->empty_element_suffix\n");
643 function doAnchors($text) {
645 # Turn Markdown link shortcuts into XHTML <a> tags.
647 if ($this->in_anchor) return $text;
648 $this->in_anchor = true;
651 # First, handle reference-style links: [link text] [id]
653 $text = preg_replace_callback('{
654 ( # wrap whole match in $1
656 ('.$this->nested_brackets_re.') # link text = $2
659 [ ]? # one optional space
660 (?:\n[ ]*)? # one optional newline followed by spaces
667 array(&$this, '_doAnchors_reference_callback'), $text);
670 # Next, inline-style links: [link text](url "optional title")
672 $text = preg_replace_callback('{
673 ( # wrap whole match in $1
675 ('.$this->nested_brackets_re.') # link text = $2
682 ('.$this->nested_url_parenthesis_re.') # href = $4
686 ([\'"]) # quote char = $6
689 [ ]* # ignore any spaces/tabs between closing quote and )
690 )? # title is optional
694 array(&$this, '_DoAnchors_inline_callback'), $text);
697 # Last, handle reference-style shortcuts: [link text]
698 # These must come last in case you've also got [link test][1]
699 # or [link test](/foo)
701 // $text = preg_replace_callback('{
702 // ( # wrap whole match in $1
704 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
708 // array(&$this, '_doAnchors_reference_callback'), $text);
710 $this->in_anchor = false;
713 function _doAnchors_reference_callback($matches) {
714 $whole_match = $matches[1];
715 $link_text = $matches[2];
716 $link_id =& $matches[3];
718 if ($link_id == "") {
719 # for shortcut links like [this][] or [this].
720 $link_id = $link_text;
723 # lower-case and turn embedded newlines into spaces
724 $link_id = strtolower($link_id);
725 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
727 if (isset($this->urls[$link_id])) {
728 $url = $this->urls[$link_id];
729 $url = $this->encodeAttribute($url);
731 $result = "<a href=\"$url\"";
732 if ( isset( $this->titles[$link_id] ) ) {
733 $title = $this->titles[$link_id];
734 $title = $this->encodeAttribute($title);
735 $result .= " title=\"$title\"";
738 $link_text = $this->runSpanGamut($link_text);
739 $result .= ">$link_text</a>";
740 $result = $this->hashPart($result);
743 $result = $whole_match;
747 function _doAnchors_inline_callback($matches) {
748 $whole_match = $matches[1];
749 $link_text = $this->runSpanGamut($matches[2]);
750 $url = $matches[3] == '' ? $matches[4] : $matches[3];
751 $title =& $matches[7];
753 $url = $this->encodeAttribute($url);
755 $result = "<a href=\"$url\"";
757 $title = $this->encodeAttribute($title);
758 $result .= " title=\"$title\"";
761 $link_text = $this->runSpanGamut($link_text);
762 $result .= ">$link_text</a>";
764 return $this->hashPart($result);
768 function doImages($text) {
770 # Turn Markdown image shortcuts into <img> tags.
773 # First, handle reference-style labeled images: ![alt text][id]
775 $text = preg_replace_callback('{
776 ( # wrap whole match in $1
778 ('.$this->nested_brackets_re.') # alt text = $2
781 [ ]? # one optional space
782 (?:\n[ ]*)? # one optional newline followed by spaces
790 array(&$this, '_doImages_reference_callback'), $text);
793 # Next, handle inline images: ![alt text](url "optional title")
794 # Don't forget: encode * and _
796 $text = preg_replace_callback('{
797 ( # wrap whole match in $1
799 ('.$this->nested_brackets_re.') # alt text = $2
801 \s? # One optional whitespace character
805 <(\S*)> # src url = $3
807 ('.$this->nested_url_parenthesis_re.') # src url = $4
811 ([\'"]) # quote char = $6
815 )? # title is optional
819 array(&$this, '_doImages_inline_callback'), $text);
823 function _doImages_reference_callback($matches) {
824 $whole_match = $matches[1];
825 $alt_text = $matches[2];
826 $link_id = strtolower($matches[3]);
828 if ($link_id == "") {
829 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
832 $alt_text = $this->encodeAttribute($alt_text);
833 if (isset($this->urls[$link_id])) {
834 $url = $this->encodeAttribute($this->urls[$link_id]);
835 $result = "<img src=\"$url\" alt=\"$alt_text\"";
836 if (isset($this->titles[$link_id])) {
837 $title = $this->titles[$link_id];
838 $title = $this->encodeAttribute($title);
839 $result .= " title=\"$title\"";
841 $result .= $this->empty_element_suffix;
842 $result = $this->hashPart($result);
845 # If there's no such link ID, leave intact:
846 $result = $whole_match;
851 function _doImages_inline_callback($matches) {
852 $whole_match = $matches[1];
853 $alt_text = $matches[2];
854 $url = $matches[3] == '' ? $matches[4] : $matches[3];
855 $title =& $matches[7];
857 $alt_text = $this->encodeAttribute($alt_text);
858 $url = $this->encodeAttribute($url);
859 $result = "<img src=\"$url\" alt=\"$alt_text\"";
861 $title = $this->encodeAttribute($title);
862 $result .= " title=\"$title\""; # $title already quoted
864 $result .= $this->empty_element_suffix;
866 return $this->hashPart($result);
870 function doHeaders($text) {
871 # Setext-style headers:
878 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
879 array(&$this, '_doHeaders_callback_setext'), $text);
884 # ## Header 2 with closing hashes ##
888 $text = preg_replace_callback('{
889 ^(\#{1,6}) # $1 = string of #\'s
891 (.+?) # $2 = Header text
893 \#* # optional closing #\'s (not counted)
896 array(&$this, '_doHeaders_callback_atx'), $text);
900 function _doHeaders_callback_setext($matches) {
901 # Terrible hack to check we haven't found an empty list item.
902 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
905 $level = $matches[2]{0} == '=' ? 1 : 2;
906 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
907 return "\n" . $this->hashBlock($block) . "\n\n";
909 function _doHeaders_callback_atx($matches) {
910 $level = strlen($matches[1]);
911 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
912 return "\n" . $this->hashBlock($block) . "\n\n";
916 function doLists($text) {
918 # Form HTML ordered (numbered) and unordered (bulleted) lists.
920 $less_than_tab = $this->tab_width - 1;
922 # Re-usable patterns to match list item bullets and number markers:
923 $marker_ul_re = '[*+-]';
924 $marker_ol_re = '\d+[.]';
925 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
927 $markers_relist = array($marker_ul_re, $marker_ol_re);
929 foreach ($markers_relist as $marker_re) {
930 # Re-usable pattern to match any entirel ul or ol list:
934 [ ]{0,'.$less_than_tab.'}
935 ('.$marker_re.') # $3 = first list item marker
944 (?! # Negative lookahead for another list item marker
952 # We use a different prefix before nested lists than top-level lists.
953 # See extended comment in _ProcessListItems().
955 if ($this->list_level) {
956 $text = preg_replace_callback('{
960 array(&$this, '_doLists_callback'), $text);
963 $text = preg_replace_callback('{
964 (?:(?<=\n)\n|\A\n?) # Must eat the newline
967 array(&$this, '_doLists_callback'), $text);
973 function _doLists_callback($matches) {
974 # Re-usable patterns to match list item bullets and number markers:
975 $marker_ul_re = '[*+-]';
976 $marker_ol_re = '\d+[.]';
977 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
980 $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
982 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
985 $result = $this->processListItems($list, $marker_any_re);
987 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
988 return "\n". $result ."\n\n";
993 function processListItems($list_str, $marker_any_re) {
995 # Process the contents of a single ordered or unordered list, splitting it
996 # into individual list items.
998 # The $this->list_level global keeps track of when we're inside a list.
999 # Each time we enter a list, we increment it; when we leave a list,
1000 # we decrement. If it's zero, we're not in a list anymore.
1002 # We do this because when we're not inside a list, we want to treat
1003 # something like this:
1005 # I recommend upgrading to version
1006 # 8. Oops, now this line is treated
1009 # As a single paragraph, despite the fact that the second line starts
1010 # with a digit-period-space sequence.
1012 # Whereas when we're inside a list (or sub-list), that line will be
1013 # treated as the start of a sub-list. What a kludge, huh? This is
1014 # an aspect of Markdown's syntax that's hard to parse perfectly
1015 # without resorting to mind-reading. Perhaps the solution is to
1016 # change the syntax rules such that sub-lists must start with a
1017 # starting cardinal number; e.g. "1." or "a.".
1019 $this->list_level++;
1021 # trim trailing blank lines:
1022 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1024 $list_str = preg_replace_callback('{
1025 (\n)? # leading line = $1
1026 (^[ ]*) # leading whitespace = $2
1027 ('.$marker_any_re.' # list marker and space = $3
1028 (?:[ ]+|(?=\n)) # space only required if item is not empty
1030 ((?s:.*?)) # list item text = $4
1031 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1032 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1034 array(&$this, '_processListItems_callback'), $list_str);
1036 $this->list_level--;
1039 function _processListItems_callback($matches) {
1040 $item = $matches[4];
1041 $leading_line =& $matches[1];
1042 $leading_space =& $matches[2];
1043 $marker_space = $matches[3];
1044 $tailing_blank_line =& $matches[5];
1046 if ($leading_line || $tailing_blank_line ||
1047 preg_match('/\n{2,}/', $item))
1049 # Replace marker with the appropriate whitespace indentation
1050 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1051 $item = $this->runBlockGamut($this->outdent($item)."\n");
1054 # Recursion for sub-lists:
1055 $item = $this->doLists($this->outdent($item));
1056 $item = preg_replace('/\n+$/', '', $item);
1057 $item = $this->runSpanGamut($item);
1060 return "<li>" . $item . "</li>\n";
1064 function doCodeBlocks($text) {
1066 # Process Markdown `<pre><code>` blocks.
1068 $text = preg_replace_callback('{
1070 ( # $1 = the code block -- one or more lines, starting with a space/tab
1072 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
1076 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1078 array(&$this, '_doCodeBlocks_callback'), $text);
1082 function _doCodeBlocks_callback($matches) {
1083 $codeblock = $matches[1];
1085 $codeblock = $this->outdent($codeblock);
1086 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1088 # trim leading newlines and trailing newlines
1089 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1091 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1092 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1096 function makeCodeSpan($code) {
1098 # Create a code span markup for $code. Called from handleSpanToken.
1100 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1101 return $this->hashPart("<code>$code</code>");
1105 var $em_relist = array(
1106 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1107 '*' => '(?<=\S)(?<!\*)\*(?!\*)',
1108 '_' => '(?<=\S)(?<!_)_(?!_)',
1110 var $strong_relist = array(
1111 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1112 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1113 '__' => '(?<=\S)(?<!_)__(?!_)',
1115 var $em_strong_relist = array(
1116 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1117 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1118 '___' => '(?<=\S)(?<!_)___(?!_)',
1120 var $em_strong_prepared_relist;
1122 function prepareItalicsAndBold() {
1124 # Prepare regular expressions for seraching emphasis tokens in any
1127 foreach ($this->em_relist as $em => $em_re) {
1128 foreach ($this->strong_relist as $strong => $strong_re) {
1129 # Construct list of allowed token expressions.
1130 $token_relist = array();
1131 if (isset($this->em_strong_relist["$em$strong"])) {
1132 $token_relist[] = $this->em_strong_relist["$em$strong"];
1134 $token_relist[] = $em_re;
1135 $token_relist[] = $strong_re;
1137 # Construct master expression from list.
1138 $token_re = '{('. implode('|', $token_relist) .')}';
1139 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1144 function doItalicsAndBold($text) {
1145 $token_stack = array('');
1146 $text_stack = array('');
1149 $tree_char_em = false;
1153 # Get prepared regular expression for seraching emphasis tokens
1154 # in current context.
1156 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1159 # Each loop iteration seach for the next emphasis token.
1160 # Each token is then passed to handleSpanToken.
1162 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1163 $text_stack[0] .= $parts[0];
1164 $token =& $parts[1];
1167 if (empty($token)) {
1168 # Reached end of text span: empty stack without emitting.
1169 # any more emphasis.
1170 while ($token_stack[0]) {
1171 $text_stack[1] .= array_shift($token_stack);
1172 $text_stack[0] .= array_shift($text_stack);
1177 $token_len = strlen($token);
1178 if ($tree_char_em) {
1179 # Reached closing marker while inside a three-char emphasis.
1180 if ($token_len == 3) {
1181 # Three-char closing marker, close em and strong.
1182 array_shift($token_stack);
1183 $span = array_shift($text_stack);
1184 $span = $this->runSpanGamut($span);
1185 $span = "<strong><em>$span</em></strong>";
1186 $text_stack[0] .= $this->hashPart($span);
1190 # Other closing marker: close one em or strong and
1191 # change current token state to match the other
1192 $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1193 $tag = $token_len == 2 ? "strong" : "em";
1194 $span = $text_stack[0];
1195 $span = $this->runSpanGamut($span);
1196 $span = "<$tag>$span</$tag>";
1197 $text_stack[0] = $this->hashPart($span);
1198 $$tag = ''; # $$tag stands for $em or $strong
1200 $tree_char_em = false;
1201 } else if ($token_len == 3) {
1203 # Reached closing marker for both em and strong.
1204 # Closing strong marker:
1205 for ($i = 0; $i < 2; ++$i) {
1206 $shifted_token = array_shift($token_stack);
1207 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1208 $span = array_shift($text_stack);
1209 $span = $this->runSpanGamut($span);
1210 $span = "<$tag>$span</$tag>";
1211 $text_stack[0] .= $this->hashPart($span);
1212 $$tag = ''; # $$tag stands for $em or $strong
1215 # Reached opening three-char emphasis marker. Push on token
1216 # stack; will be handled by the special condition above.
1219 array_unshift($token_stack, $token);
1220 array_unshift($text_stack, '');
1221 $tree_char_em = true;
1223 } else if ($token_len == 2) {
1225 # Unwind any dangling emphasis marker:
1226 if (strlen($token_stack[0]) == 1) {
1227 $text_stack[1] .= array_shift($token_stack);
1228 $text_stack[0] .= array_shift($text_stack);
1230 # Closing strong marker:
1231 array_shift($token_stack);
1232 $span = array_shift($text_stack);
1233 $span = $this->runSpanGamut($span);
1234 $span = "<strong>$span</strong>";
1235 $text_stack[0] .= $this->hashPart($span);
1238 array_unshift($token_stack, $token);
1239 array_unshift($text_stack, '');
1243 # Here $token_len == 1
1245 if (strlen($token_stack[0]) == 1) {
1246 # Closing emphasis marker:
1247 array_shift($token_stack);
1248 $span = array_shift($text_stack);
1249 $span = $this->runSpanGamut($span);
1250 $span = "<em>$span</em>";
1251 $text_stack[0] .= $this->hashPart($span);
1254 $text_stack[0] .= $token;
1257 array_unshift($token_stack, $token);
1258 array_unshift($text_stack, '');
1263 return $text_stack[0];
1267 function doBlockQuotes($text) {
1268 $text = preg_replace_callback('/
1269 ( # Wrap whole match in $1
1271 ^[ ]*>[ ]? # ">" at the start of a line
1272 .+\n # rest of the first line
1273 (.+\n)* # subsequent consecutive lines
1278 array(&$this, '_doBlockQuotes_callback'), $text);
1282 function _doBlockQuotes_callback($matches) {
1284 # trim one level of quoting - trim whitespace-only lines
1285 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1286 $bq = $this->runBlockGamut($bq); # recurse
1288 $bq = preg_replace('/^/m', " ", $bq);
1289 # These leading spaces cause problem with <pre> content,
1290 # so we need to fix that:
1291 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1292 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1294 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1296 function _doBlockQuotes_callback2($matches) {
1298 $pre = preg_replace('/^ /m', '', $pre);
1303 function formParagraphs($text) {
1306 # $text - string to process with html <p> tags
1308 # Strip leading and trailing lines:
1309 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1311 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1314 # Wrap <p> tags and unhashify HTML blocks
1316 foreach ($grafs as $key => $value) {
1317 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1319 $value = $this->runSpanGamut($value);
1320 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1322 $grafs[$key] = $this->unhash($value);
1326 # Modify elements of @grafs in-place...
1328 $block = $this->html_hashes[$graf];
1330 // if (preg_match('{
1332 // ( # $1 = <div> tag
1336 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1342 // ( # $3 = contents
1345 // (</div>) # $4 = closing tag
1347 // }xs', $block, $matches))
1349 // list(, $div_open, , $div_content, $div_close) = $matches;
1351 // # We can't call Markdown(), because that resets the hash;
1352 // # that initialization code should be pulled into its own sub, though.
1353 // $div_content = $this->hashHTMLBlocks($div_content);
1355 // # Run document gamut methods on the content.
1356 // foreach ($this->document_gamut as $method => $priority) {
1357 // $div_content = $this->$method($div_content);
1360 // $div_open = preg_replace(
1361 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1363 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1365 $grafs[$key] = $graf;
1369 return implode("\n\n", $grafs);
1373 function encodeAttribute($text) {
1375 # Encode text for a double-quoted HTML attribute. This function
1376 # is *not* suitable for attributes enclosed in single quotes.
1378 $text = $this->encodeAmpsAndAngles($text);
1379 $text = str_replace('"', '"', $text);
1384 function encodeAmpsAndAngles($text) {
1386 # Smart processing for ampersands and angle brackets that need to
1387 # be encoded. Valid character entities are left alone unless the
1388 # no-entities mode is set.
1390 if ($this->no_entities) {
1391 $text = str_replace('&', '&', $text);
1393 # Ampersand-encoding based entirely on Nat Irons's Amputator
1394 # MT plugin: <http://bumppo.net/projects/amputator/>
1395 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1398 # Encode remaining <'s
1399 $text = str_replace('<', '<', $text);
1405 function doAutoLinks($text) {
1406 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1407 array(&$this, '_doAutoLinks_url_callback'), $text);
1409 # Email addresses: <address@domain.foo>
1410 $text = preg_replace_callback('{
1416 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1420 array(&$this, '_doAutoLinks_email_callback'), $text);
1424 function _doAutoLinks_url_callback($matches) {
1425 $url = $this->encodeAttribute($matches[1]);
1426 $link = "<a href=\"$url\">$url</a>";
1427 return $this->hashPart($link);
1429 function _doAutoLinks_email_callback($matches) {
1430 $address = $matches[1];
1431 $link = $this->encodeEmailAddress($address);
1432 return $this->hashPart($link);
1436 function encodeEmailAddress($addr) {
1438 # Input: an email address, e.g. "foo@example.com"
1440 # Output: the email address as a mailto link, with each character
1441 # of the address encoded as either a decimal or hex entity, in
1442 # the hopes of foiling most address harvesting spam bots. E.g.:
1444 # <p><a href="mailto:foo
1445 # @example.co
1446 # m">foo@exampl
1447 # e.com</a></p>
1449 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1450 # With some optimizations by Milian Wolff.
1452 $addr = "mailto:" . $addr;
1453 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1454 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1456 foreach ($chars as $key => $char) {
1458 # Ignore non-ascii chars.
1460 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1461 # roughly 10% raw, 45% hex, 45% dec
1462 # '@' *must* be encoded. I insist.
1463 if ($r > 90 && $char != '@') /* do nothing */;
1464 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1465 else $chars[$key] = '&#'.$ord.';';
1469 $addr = implode('', $chars);
1470 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1471 $addr = "<a href=\"$addr\">$text</a>";
1477 function parseSpan($str) {
1479 # Take the string $str and parse it into tokens, hashing embeded HTML,
1480 # escaped characters and handling code spans.
1486 \\\\'.$this->escape_chars_re.'
1489 `+ # code span marker
1490 '.( $this->no_markup ? '' : '
1492 <!-- .*? --> # comment
1494 <\?.*?\?> | <%.*?%> # processing instruction
1496 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1499 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1508 # Each loop iteration seach for either the next tag, the next
1509 # openning code span marker, or the next escaped character.
1510 # Each token is then passed to handleSpanToken.
1512 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1514 # Create token from text preceding tag.
1515 if ($parts[0] != "") {
1516 $output .= $parts[0];
1519 # Check if we reach the end.
1520 if (isset($parts[1])) {
1521 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1533 function handleSpanToken($token, &$str) {
1535 # Handle $token provided by parseSpan by determining its nature and
1536 # returning the corresponding value that should replace it.
1538 switch ($token{0}) {
1540 return $this->hashPart("&#". ord($token{1}). ";");
1542 # Search for end marker in remaining text.
1543 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1547 $codespan = $this->makeCodeSpan($matches[1]);
1548 return $this->hashPart($codespan);
1550 return $token; // return as text since no ending marker found.
1552 return $this->hashPart($token);
1557 function outdent($text) {
1559 # Remove one level of line-leading tabs or spaces
1561 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1565 # String length function for detab. `_initDetab` will create a function to
1566 # hanlde UTF-8 if the default function does not exist.
1567 var $utf8_strlen = 'mb_strlen';
1569 function detab($text) {
1571 # Replace tabs with the appropriate amount of space.
1573 # For each line we separate the line in blocks delemited by
1574 # tab characters. Then we reconstruct every line by adding the
1575 # appropriate number of space between each blocks.
1577 $text = preg_replace_callback('/^.*\t.*$/m',
1578 array(&$this, '_detab_callback'), $text);
1582 function _detab_callback($matches) {
1583 $line = $matches[0];
1584 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1587 $blocks = explode("\t", $line);
1588 # Add each blocks to the line.
1590 unset($blocks[0]); # Do not add first block twice.
1591 foreach ($blocks as $block) {
1592 # Calculate amount of space, insert spaces, insert block.
1593 $amount = $this->tab_width -
1594 $strlen($line, 'UTF-8') % $this->tab_width;
1595 $line .= str_repeat(" ", $amount) . $block;
1599 function _initDetab() {
1601 # Check for the availability of the function in the `utf8_strlen` property
1602 # (initially `mb_strlen`). If the function is not available, create a
1603 # function that will loosely count the number of UTF-8 characters with a
1604 # regular expression.
1606 if (function_exists($this->utf8_strlen)) return;
1607 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1608 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1613 function unhash($text) {
1615 # Swap back in all the tags hashed by _HashHTMLBlocks.
1617 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1618 array(&$this, '_unhash_callback'), $text);
1620 function _unhash_callback($matches) {
1621 return $this->html_hashes[$matches[0]];
1634 This is a PHP translation of the original Markdown formatter written in
1635 Perl by John Gruber.
1637 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1638 easy-to-write structured text format into HTML. Markdown's text format
1639 is most similar to that of plain text email, and supports features such
1640 as headers, *emphasis*, code blocks, blockquotes, and links.
1642 Markdown's syntax is designed not as a generic markup language, but
1643 specifically to serve as a front-end to (X)HTML. You can use span-level
1644 HTML tags anywhere in a Markdown document, and you can use block level
1645 HTML tags (like <div> and <table> as well).
1647 For more information about Markdown's syntax, see:
1649 <http://daringfireball.net/projects/markdown/>
1655 To file bug reports please send email to:
1657 <michel.fortin@michelf.com>
1659 Please include with your report: (1) the example input; (2) the output you
1660 expected; (3) the output Markdown actually produced.
1666 See the readme file for detailed release notes for this version.
1669 Copyright and License
1670 ---------------------
1673 Copyright (c) 2004-2008 Michel Fortin
1674 <http://www.michelf.com/>
1675 All rights reserved.
1678 Copyright (c) 2003-2006 John Gruber
1679 <http://daringfireball.net/>
1680 All rights reserved.
1682 Redistribution and use in source and binary forms, with or without
1683 modification, are permitted provided that the following conditions are
1686 * Redistributions of source code must retain the above copyright notice,
1687 this list of conditions and the following disclaimer.
1689 * Redistributions in binary form must reproduce the above copyright
1690 notice, this list of conditions and the following disclaimer in the
1691 documentation and/or other materials provided with the distribution.
1693 * Neither the name "Markdown" nor the names of its contributors may
1694 be used to endorse or promote products derived from this software
1695 without specific prior written permission.
1697 This software is provided by the copyright holders and contributors "as
1698 is" and any express or implied warranties, including, but not limited
1699 to, the implied warranties of merchantability and fitness for a
1700 particular purpose are disclaimed. In no event shall the copyright owner
1701 or contributors be liable for any direct, indirect, incidental, special,
1702 exemplary, or consequential damages (including, but not limited to,
1703 procurement of substitute goods or services; loss of use, data, or
1704 profits; or business interruption) however caused and on any theory of
1705 liability, whether in contract, strict liability, or tort (including
1706 negligence or otherwise) arising in any way out of the use of this
1707 software, even if advised of the possibility of such damage.