extlib/markdown.php

   1 <?php
   2 #
   3 # Markdown  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown
   6 # Copyright (c) 2004-2008 Michel Fortin
   7 # <http://www.michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
  16
  17
  18 #
  19 # Global default settings:
  20 #
  21
  22 # Change to ">" for HTML output
  23 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  24
  25 # Define the width of a tab for code blocks.
  26 @define( 'MARKDOWN_TAB_WIDTH',     4 );
  27
  28
  29 #
  30 # WordPress settings:
  31 #
  32
  33 # Change to false to remove Markdown from posts and/or comments.
  34 @define( 'MARKDOWN_WP_POSTS',      true );
  35 @define( 'MARKDOWN_WP_COMMENTS',   true );
  36
  37
  38
  39 ### Standard Function Interface ###
  40
  41 @define( 'MARKDOWN_PARSER_CLASS',  'Markdown_Parser' );
  42
  43 function Markdown($text) {
  44 #
  45 # Initialize the parser and return the result of its transform method.
  46 #
  47         # Setup static parser variable.
  48         static $parser;
  49         if (!isset($parser)) {
  50                 $parser_class = MARKDOWN_PARSER_CLASS;
  51                 $parser = new $parser_class;
  52         }
  53
  54         # Transform text using parser.
  55         return $parser->transform($text);
  56 }
  57
  58
  59 ### WordPress Plugin Interface ###
  60
  61 /*
  62 Plugin Name: Markdown
  63 Plugin URI: http://www.michelf.com/projects/php-markdown/
  64 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  65 Version: 1.0.1m
  66 Author: Michel Fortin
  67 Author URI: http://www.michelf.com/
  68 */
  69
  70 if (isset($wp_version)) {
  71         # More details about how it works here:
  72         # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  73
  74         # Post content and excerpts
  75         # - Remove WordPress paragraph generator.
  76         # - Run Markdown on excerpt, then remove all tags.
  77         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  78         if (MARKDOWN_WP_POSTS) {
  79                 remove_filter('the_content',     'wpautop');
  80         remove_filter('the_content_rss', 'wpautop');
  81                 remove_filter('the_excerpt',     'wpautop');
  82                 add_filter('the_content',     'Markdown', 6);
  83         add_filter('the_content_rss', 'Markdown', 6);
  84                 add_filter('get_the_excerpt', 'Markdown', 6);
  85                 add_filter('get_the_excerpt', 'trim', 7);
  86                 add_filter('the_excerpt',     'mdwp_add_p');
  87                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
  88
  89                 remove_filter('content_save_pre',  'balanceTags', 50);
  90                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
  91                 add_filter('the_content',         'balanceTags', 50);
  92                 add_filter('get_the_excerpt', 'balanceTags', 9);
  93         }
  94
  95         # Comments
  96         # - Remove WordPress paragraph generator.
  97         # - Remove WordPress auto-link generator.
  98         # - Scramble important tags before passing them to the kses filter.
  99         # - Run Markdown on excerpt then remove paragraph tags.
 100         if (MARKDOWN_WP_COMMENTS) {
 101                 remove_filter('comment_text', 'wpautop', 30);
 102                 remove_filter('comment_text', 'make_clickable');
 103                 add_filter('pre_comment_content', 'Markdown', 6);
 104                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 105                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 106                 add_filter('get_comment_text',    'Markdown', 6);
 107                 add_filter('get_comment_excerpt', 'Markdown', 6);
 108                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 109
 110                 global $mdwp_hidden_tags, $mdwp_placeholders;
 111                 $mdwp_hidden_tags = explode(' ',
 112                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 113                 $mdwp_placeholders = explode(' ', str_rot13(
 114                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 115                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 116         }
 117
 118         function mdwp_add_p($text) {
 119                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 120                         $text = '<p>'.$text.'</p>';
 121                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 122                 }
 123                 return $text;
 124         }
 125
 126         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 127
 128         function mdwp_hide_tags($text) {
 129                 global $mdwp_hidden_tags, $mdwp_placeholders;
 130                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 131         }
 132         function mdwp_show_tags($text) {
 133                 global $mdwp_hidden_tags, $mdwp_placeholders;
 134                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 135         }
 136 }
 137
 138
 139 ### bBlog Plugin Info ###
 140
 141 function identify_modifier_markdown() {
 142         return array(
 143                 'name'                  => 'markdown',
 144                 'type'                  => 'modifier',
 145                 'nicename'              => 'Markdown',
 146                 'description'   => 'A text-to-HTML conversion tool for web writers',
 147                 'authors'               => 'Michel Fortin and John Gruber',
 148                 'licence'               => 'BSD-like',
 149                 'version'               => MARKDOWN_VERSION,
 150                 'help'                  => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
 151         );
 152 }
 153
 154
 155 ### Smarty Modifier Interface ###
 156
 157 function smarty_modifier_markdown($text) {
 158         return Markdown($text);
 159 }
 160
 161
 162 ### Textile Compatibility Mode ###
 163
 164 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
 165
 166 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 167         # Try to include PHP SmartyPants. Should be in the same directory.
 168         @include_once 'smartypants.php';
 169         # Fake Textile class. It calls Markdown instead.
 170         class Textile {
 171                 function TextileThis($text, $lite='', $encode='') {
 172                         if ($lite == '' && $encode == '')    $text = Markdown($text);
 173                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 174                         return $text;
 175                 }
 176                 # Fake restricted version: restrictions are not supported for now.
 177                 function TextileRestricted($text, $lite='', $noimage='') {
 178                         return $this->TextileThis($text, $lite);
 179                 }
 180                 # Workaround to ensure compatibility with TextPattern 4.0.3.
 181                 function blockLite($text) { return $text; }
 182         }
 183 }
 184
 185
 186
 187 #
 188 # Markdown Parser Class
 189 #
 190
 191 class Markdown_Parser {
 192
 193         # Regex to match balanced [brackets].
 194         # Needed to insert a maximum bracked depth while converting to PHP.
 195         var $nested_brackets_depth = 6;
 196         var $nested_brackets_re;
 197
 198         var $nested_url_parenthesis_depth = 4;
 199         var $nested_url_parenthesis_re;
 200
 201         # Table of hash values for escaped characters:
 202         var $escape_chars = '\`*_{}[]()>#+-.!';
 203         var $escape_chars_re;
 204
 205         # Change to ">" for HTML output.
 206         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 207         var $tab_width = MARKDOWN_TAB_WIDTH;
 208
 209         # Change to `true` to disallow markup or entities.
 210         var $no_markup = false;
 211         var $no_entities = false;
 212
 213         # Predefined urls and titles for reference links and images.
 214         var $predef_urls = array();
 215         var $predef_titles = array();
 216
 217
 218         function Markdown_Parser() {
 219         #
 220         # Constructor function. Initialize appropriate member variables.
 221         #
 222                 $this->_initDetab();
 223                 $this->prepareItalicsAndBold();
 224
 225                 $this->nested_brackets_re =
 226                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 227                         str_repeat('\])*', $this->nested_brackets_depth);
 228
 229                 $this->nested_url_parenthesis_re =
 230                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 231                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 232
 233                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 234
 235                 # Sort document, block, and span gamut in ascendent priority order.
 236                 asort($this->document_gamut);
 237                 asort($this->block_gamut);
 238                 asort($this->span_gamut);
 239         }
 240
 241
 242         # Internal hashes used during transformation.
 243         var $urls = array();
 244         var $titles = array();
 245         var $html_hashes = array();
 246
 247         # Status flag to avoid invalid nesting.
 248         var $in_anchor = false;
 249
 250
 251         function setup() {
 252         #
 253         # Called before the transformation process starts to setup parser
 254         # states.
 255         #
 256                 # Clear global hashes.
 257                 $this->urls = $this->predef_urls;
 258                 $this->titles = $this->predef_titles;
 259                 $this->html_hashes = array();
 260
 261                 $in_anchor = false;
 262         }
 263
 264         function teardown() {
 265         #
 266         # Called after the transformation process to clear any variable
 267         # which may be taking up memory unnecessarly.
 268         #
 269                 $this->urls = array();
 270                 $this->titles = array();
 271                 $this->html_hashes = array();
 272         }
 273
 274
 275         function transform($text) {
 276         #
 277         # Main function. Performs some preprocessing on the input text
 278         # and pass it through the document gamut.
 279         #
 280                 $this->setup();
 281
 282                 # Remove UTF-8 BOM and marker character in input, if present.
 283                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 284
 285                 # Standardize line endings:
 286                 #   DOS to Unix and Mac to Unix
 287                 $text = preg_replace('{\r\n?}', "\n", $text);
 288
 289                 # Make sure $text ends with a couple of newlines:
 290                 $text .= "\n\n";
 291
 292                 # Convert all tabs to spaces.
 293                 $text = $this->detab($text);
 294
 295                 # Turn block-level HTML blocks into hash entries
 296                 $text = $this->hashHTMLBlocks($text);
 297
 298                 # Strip any lines consisting only of spaces and tabs.
 299                 # This makes subsequent regexen easier to write, because we can
 300                 # match consecutive blank lines with /\n+/ instead of something
 301                 # contorted like /[ ]*\n+/ .
 302                 $text = preg_replace('/^[ ]+$/m', '', $text);
 303
 304                 # Run document gamut methods.
 305                 foreach ($this->document_gamut as $method => $priority) {
 306                         $text = $this->$method($text);
 307                 }
 308
 309                 $this->teardown();
 310
 311                 return $text . "\n";
 312         }
 313
 314         var $document_gamut = array(
 315                 # Strip link definitions, store in hashes.
 316                 "stripLinkDefinitions" => 20,
 317
 318                 "runBasicBlockGamut"   => 30,
 319                 );
 320
 321
 322         function stripLinkDefinitions($text) {
 323         #
 324         # Strips link definitions from text, stores the URLs and titles in
 325         # hash references.
 326         #
 327                 $less_than_tab = $this->tab_width - 1;
 328
 329                 # Link defs are in the form: ^[id]: url "optional title"
 330                 $text = preg_replace_callback('{
 331                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 332                                                           [ ]*
 333                                                           \n?                           # maybe *one* newline
 334                                                           [ ]*
 335                                                         <?(\S+?)>?                      # url = $2
 336                                                           [ ]*
 337                                                           \n?                           # maybe one newline
 338                                                           [ ]*
 339                                                         (?:
 340                                                                 (?<=\s)                 # lookbehind for whitespace
 341                                                                 ["(]
 342                                                                 (.*?)                   # title = $3
 343                                                                 [")]
 344                                                                 [ ]*
 345                                                         )?      # title is optional
 346                                                         (?:\n+|\Z)
 347                         }xm',
 348                         array(&$this, '_stripLinkDefinitions_callback'),
 349                         $text);
 350                 return $text;
 351         }
 352         function _stripLinkDefinitions_callback($matches) {
 353                 $link_id = strtolower($matches[1]);
 354                 $this->urls[$link_id] = $matches[2];
 355                 $this->titles[$link_id] =& $matches[3];
 356                 return ''; # String that will replace the block
 357         }
 358
 359
 360         function hashHTMLBlocks($text) {
 361                 if ($this->no_markup)  return $text;
 362
 363                 $less_than_tab = $this->tab_width - 1;
 364
 365                 # Hashify HTML blocks:
 366                 # We only want to do this for block-level HTML tags, such as headers,
 367                 # lists, and tables. That's because we still want to wrap <p>s around
 368                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 369                 # phrase emphasis, and spans. The list of tags we're looking for is
 370                 # hard-coded:
 371                 #
 372                 # *  List "a" is made of tags which can be both inline or block-level.
 373                 #    These will be treated block-level when the start tag is alone on
 374                 #    its line, otherwise they're not matched here and will be taken as
 375                 #    inline later.
 376                 # *  List "b" is made of tags which are always block-level;
 377                 #
 378                 $block_tags_a_re = 'ins|del';
 379                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 380                                                    'script|noscript|form|fieldset|iframe|math';
 381
 382                 # Regular expression for the content of a block tag.
 383                 $nested_tags_level = 4;
 384                 $attr = '
 385                         (?>                             # optional tag attributes
 386                           \s                    # starts with whitespace
 387                           (?>
 388                                 [^>"/]+         # text outside quotes
 389                           |
 390                                 /+(?!>)         # slash not followed by ">"
 391                           |
 392                                 "[^"]*"         # text inside double quotes (tolerate ">")
 393                           |
 394                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 395                           )*
 396                         )?
 397                         ';
 398                 $content =
 399                         str_repeat('
 400                                 (?>
 401                                   [^<]+                 # content without tag
 402                                 |
 403                                   <\2                   # nested opening tag
 404                                         '.$attr.'       # attributes
 405                                         (?>
 406                                           />
 407                                         |
 408                                           >', $nested_tags_level).      # end of opening tag
 409                                           '.*?'.                                        # last level nested tag content
 410                         str_repeat('
 411                                           </\2\s*>      # closing nested tag
 412                                         )
 413                                   |
 414                                         <(?!/\2\s*>     # other tags with a different name
 415                                   )
 416                                 )*',
 417                                 $nested_tags_level);
 418                 $content2 = str_replace('\2', '\3', $content);
 419
 420                 # First, look for nested blocks, e.g.:
 421                 #       <div>
 422                 #               <div>
 423                 #               tags for inner block must be indented.
 424                 #               </div>
 425                 #       </div>
 426                 #
 427                 # The outermost tags must start at the left margin for this to match, and
 428                 # the inner nested divs must be indented.
 429                 # We need to do this before the next, more liberal match, because the next
 430                 # match will start at the first `<div>` and stop at the first `</div>`.
 431                 $text = preg_replace_callback('{(?>
 432                         (?>
 433                                 (?<=\n\n)               # Starting after a blank line
 434                                 |                               # or
 435                                 \A\n?                   # the beginning of the doc
 436                         )
 437                         (                                               # save in $1
 438
 439                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 440                           # in between.
 441
 442                                                 [ ]{0,'.$less_than_tab.'}
 443                                                 <('.$block_tags_b_re.')# start tag = $2
 444                                                 '.$attr.'>                      # attributes followed by > and \n
 445                                                 '.$content.'            # content, support nesting
 446                                                 </\2>                           # the matching end tag
 447                                                 [ ]*                            # trailing spaces/tabs
 448                                                 (?=\n+|\Z)      # followed by a newline or end of document
 449
 450                         | # Special version for tags of group a.
 451
 452                                                 [ ]{0,'.$less_than_tab.'}
 453                                                 <('.$block_tags_a_re.')# start tag = $3
 454                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 455                                                 '.$content2.'           # content, support nesting
 456                                                 </\3>                           # the matching end tag
 457                                                 [ ]*                            # trailing spaces/tabs
 458                                                 (?=\n+|\Z)      # followed by a newline or end of document
 459
 460                         | # Special case just for <hr />. It was easier to make a special
 461                           # case than to make the other regex more complicated.
 462
 463                                                 [ ]{0,'.$less_than_tab.'}
 464                                                 <(hr)                           # start tag = $2
 465                                                 '.$attr.'                       # attributes
 466                                                 /?>                                     # the matching end tag
 467                                                 [ ]*
 468                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 469
 470                         | # Special case for standalone HTML comments:
 471
 472                                         [ ]{0,'.$less_than_tab.'}
 473                                         (?s:
 474                                                 <!-- .*? -->
 475                                         )
 476                                         [ ]*
 477                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 478
 479                         | # PHP and ASP-style processor instructions (<? and <%)
 480
 481                                         [ ]{0,'.$less_than_tab.'}
 482                                         (?s:
 483                                                 <([?%])                 # $2
 484                                                 .*?
 485                                                 \2>
 486                                         )
 487                                         [ ]*
 488                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 489
 490                         )
 491                         )}Sxmi',
 492                         array(&$this, '_hashHTMLBlocks_callback'),
 493                         $text);
 494
 495                 return $text;
 496         }
 497         function _hashHTMLBlocks_callback($matches) {
 498                 $text = $matches[1];
 499                 $key  = $this->hashBlock($text);
 500                 return "\n\n$key\n\n";
 501         }
 502
 503
 504         function hashPart($text, $boundary = 'X') {
 505         #
 506         # Called whenever a tag must be hashed when a function insert an atomic
 507         # element in the text stream. Passing $text to through this function gives
 508         # a unique text-token which will be reverted back when calling unhash.
 509         #
 510         # The $boundary argument specify what character should be used to surround
 511         # the token. By convension, "B" is used for block elements that needs not
 512         # to be wrapped into paragraph tags at the end, ":" is used for elements
 513         # that are word separators and "X" is used in the general case.
 514         #
 515                 # Swap back any tag hash found in $text so we do not have to `unhash`
 516                 # multiple times at the end.
 517                 $text = $this->unhash($text);
 518
 519                 # Then hash the block.
 520                 static $i = 0;
 521                 $key = "$boundary\x1A" . ++$i . $boundary;
 522                 $this->html_hashes[$key] = $text;
 523                 return $key; # String that will replace the tag.
 524         }
 525
 526
 527         function hashBlock($text) {
 528         #
 529         # Shortcut function for hashPart with block-level boundaries.
 530         #
 531                 return $this->hashPart($text, 'B');
 532         }
 533
 534
 535         var $block_gamut = array(
 536         #
 537         # These are all the transformations that form block-level
 538         # tags like paragraphs, headers, and list items.
 539         #
 540                 "doHeaders"         => 10,
 541                 "doHorizontalRules" => 20,
 542
 543                 "doLists"           => 40,
 544                 "doCodeBlocks"      => 50,
 545                 "doBlockQuotes"     => 60,
 546                 );
 547
 548         function runBlockGamut($text) {
 549         #
 550         # Run block gamut tranformations.
 551         #
 552                 # We need to escape raw HTML in Markdown source before doing anything
 553                 # else. This need to be done for each block, and not only at the
 554                 # begining in the Markdown function since hashed blocks can be part of
 555                 # list items and could have been indented. Indented blocks would have
 556                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 557                 $text = $this->hashHTMLBlocks($text);
 558
 559                 return $this->runBasicBlockGamut($text);
 560         }
 561
 562         function runBasicBlockGamut($text) {
 563         #
 564         # Run block gamut tranformations, without hashing HTML blocks. This is
 565         # useful when HTML blocks are known to be already hashed, like in the first
 566         # whole-document pass.
 567         #
 568                 foreach ($this->block_gamut as $method => $priority) {
 569                         $text = $this->$method($text);
 570                 }
 571
 572                 # Finally form paragraph and restore hashed blocks.
 573                 $text = $this->formParagraphs($text);
 574
 575                 return $text;
 576         }
 577
 578
 579         function doHorizontalRules($text) {
 580                 # Do Horizontal Rules:
 581                 return preg_replace(
 582                         '{
 583                                 ^[ ]{0,3}       # Leading space
 584                                 ([-*_])         # $1: First marker
 585                                 (?>                     # Repeated marker group
 586                                         [ ]{0,2}        # Zero, one, or two spaces.
 587                                         \1                      # Marker character
 588                                 ){2,}           # Group repeated at least twice
 589                                 [ ]*            # Tailing spaces
 590                                 $                       # End of line.
 591                         }mx',
 592                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 593                         $text);
 594         }
 595
 596
 597         var $span_gamut = array(
 598         #
 599         # These are all the transformations that occur *within* block-level
 600         # tags like paragraphs, headers, and list items.
 601         #
 602                 # Process character escapes, code spans, and inline HTML
 603                 # in one shot.
 604                 "parseSpan"           => -30,
 605
 606                 # Process anchor and image tags. Images must come first,
 607                 # because ![foo][f] looks like an anchor.
 608                 "doImages"            =>  10,
 609                 "doAnchors"           =>  20,
 610
 611                 # Make links out of things like `<http://example.com/>`
 612                 # Must come after doAnchors, because you can use < and >
 613                 # delimiters in inline links like [this](<url>).
 614                 "doAutoLinks"         =>  30,
 615                 "encodeAmpsAndAngles" =>  40,
 616
 617                 "doItalicsAndBold"    =>  50,
 618                 "doHardBreaks"        =>  60,
 619                 );
 620
 621         function runSpanGamut($text) {
 622         #
 623         # Run span gamut tranformations.
 624         #
 625                 foreach ($this->span_gamut as $method => $priority) {
 626                         $text = $this->$method($text);
 627                 }
 628
 629                 return $text;
 630         }
 631
 632
 633         function doHardBreaks($text) {
 634                 # Do hard breaks:
 635                 return preg_replace_callback('/ {2,}\n/',
 636                         array(&$this, '_doHardBreaks_callback'), $text);
 637         }
 638         function _doHardBreaks_callback($matches) {
 639                 return $this->hashPart("<br$this->empty_element_suffix\n");
 640         }
 641
 642
 643         function doAnchors($text) {
 644         #
 645         # Turn Markdown link shortcuts into XHTML <a> tags.
 646         #
 647                 if ($this->in_anchor) return $text;
 648                 $this->in_anchor = true;
 649
 650                 #
 651                 # First, handle reference-style links: [link text] [id]
 652                 #
 653                 $text = preg_replace_callback('{
 654                         (                                       # wrap whole match in $1
 655                           \[
 656                                 ('.$this->nested_brackets_re.') # link text = $2
 657                           \]
 658
 659                           [ ]?                          # one optional space
 660                           (?:\n[ ]*)?           # one optional newline followed by spaces
 661
 662                           \[
 663                                 (.*?)           # id = $3
 664                           \]
 665                         )
 666                         }xs',
 667                         array(&$this, '_doAnchors_reference_callback'), $text);
 668
 669                 #
 670                 # Next, inline-style links: [link text](url "optional title")
 671                 #
 672                 $text = preg_replace_callback('{
 673                         (                               # wrap whole match in $1
 674                           \[
 675                                 ('.$this->nested_brackets_re.') # link text = $2
 676                           \]
 677                           \(                    # literal paren
 678                                 [ ]*
 679                                 (?:
 680                                         <(\S*)> # href = $3
 681                                 |
 682                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 683                                 )
 684                                 [ ]*
 685                                 (                       # $5
 686                                   ([\'"])       # quote char = $6
 687                                   (.*?)         # Title = $7
 688                                   \6            # matching quote
 689                                   [ ]*  # ignore any spaces/tabs between closing quote and )
 690                                 )?                      # title is optional
 691                           \)
 692                         )
 693                         }xs',
 694                         array(&$this, '_DoAnchors_inline_callback'), $text);
 695
 696                 #
 697                 # Last, handle reference-style shortcuts: [link text]
 698                 # These must come last in case you've also got [link test][1]
 699                 # or [link test](/foo)
 700                 #
 701 //              $text = preg_replace_callback('{
 702 //                      (                                       # wrap whole match in $1
 703 //                        \[
 704 //                              ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 705 //                        \]
 706 //                      )
 707 //                      }xs',
 708 //                      array(&$this, '_doAnchors_reference_callback'), $text);
 709
 710                 $this->in_anchor = false;
 711                 return $text;
 712         }
 713         function _doAnchors_reference_callback($matches) {
 714                 $whole_match =  $matches[1];
 715                 $link_text   =  $matches[2];
 716                 $link_id     =& $matches[3];
 717
 718                 if ($link_id == "") {
 719                         # for shortcut links like [this][] or [this].
 720                         $link_id = $link_text;
 721                 }
 722
 723                 # lower-case and turn embedded newlines into spaces
 724                 $link_id = strtolower($link_id);
 725                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 726
 727                 if (isset($this->urls[$link_id])) {
 728                         $url = $this->urls[$link_id];
 729                         $url = $this->encodeAttribute($url);
 730
 731                         $result = "<a href=\"$url\"";
 732                         if ( isset( $this->titles[$link_id] ) ) {
 733                                 $title = $this->titles[$link_id];
 734                                 $title = $this->encodeAttribute($title);
 735                                 $result .=  " title=\"$title\"";
 736                         }
 737
 738                         $link_text = $this->runSpanGamut($link_text);
 739                         $result .= ">$link_text</a>";
 740                         $result = $this->hashPart($result);
 741                 }
 742                 else {
 743                         $result = $whole_match;
 744                 }
 745                 return $result;
 746         }
 747         function _doAnchors_inline_callback($matches) {
 748                 $whole_match    =  $matches[1];
 749                 $link_text              =  $this->runSpanGamut($matches[2]);
 750                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 751                 $title                  =& $matches[7];
 752
 753                 $url = $this->encodeAttribute($url);
 754
 755                 $result = "<a href=\"$url\"";
 756                 if (isset($title)) {
 757                         $title = $this->encodeAttribute($title);
 758                         $result .=  " title=\"$title\"";
 759                 }
 760
 761                 $link_text = $this->runSpanGamut($link_text);
 762                 $result .= ">$link_text</a>";
 763
 764                 return $this->hashPart($result);
 765         }
 766
 767
 768         function doImages($text) {
 769         #
 770         # Turn Markdown image shortcuts into <img> tags.
 771         #
 772                 #
 773                 # First, handle reference-style labeled images: ![alt text][id]
 774                 #
 775                 $text = preg_replace_callback('{
 776                         (                               # wrap whole match in $1
 777                           !\[
 778                                 ('.$this->nested_brackets_re.')         # alt text = $2
 779                           \]
 780
 781                           [ ]?                          # one optional space
 782                           (?:\n[ ]*)?           # one optional newline followed by spaces
 783
 784                           \[
 785                                 (.*?)           # id = $3
 786                           \]
 787
 788                         )
 789                         }xs',
 790                         array(&$this, '_doImages_reference_callback'), $text);
 791
 792                 #
 793                 # Next, handle inline images:  ![alt text](url "optional title")
 794                 # Don't forget: encode * and _
 795                 #
 796                 $text = preg_replace_callback('{
 797                         (                               # wrap whole match in $1
 798                           !\[
 799                                 ('.$this->nested_brackets_re.')         # alt text = $2
 800                           \]
 801                           \s?                   # One optional whitespace character
 802                           \(                    # literal paren
 803                                 [ ]*
 804                                 (?:
 805                                         <(\S*)> # src url = $3
 806                                 |
 807                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 808                                 )
 809                                 [ ]*
 810                                 (                       # $5
 811                                   ([\'"])       # quote char = $6
 812                                   (.*?)         # title = $7
 813                                   \6            # matching quote
 814                                   [ ]*
 815                                 )?                      # title is optional
 816                           \)
 817                         )
 818                         }xs',
 819                         array(&$this, '_doImages_inline_callback'), $text);
 820
 821                 return $text;
 822         }
 823         function _doImages_reference_callback($matches) {
 824                 $whole_match = $matches[1];
 825                 $alt_text    = $matches[2];
 826                 $link_id     = strtolower($matches[3]);
 827
 828                 if ($link_id == "") {
 829                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 830                 }
 831
 832                 $alt_text = $this->encodeAttribute($alt_text);
 833                 if (isset($this->urls[$link_id])) {
 834                         $url = $this->encodeAttribute($this->urls[$link_id]);
 835                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 836                         if (isset($this->titles[$link_id])) {
 837                                 $title = $this->titles[$link_id];
 838                                 $title = $this->encodeAttribute($title);
 839                                 $result .=  " title=\"$title\"";
 840                         }
 841                         $result .= $this->empty_element_suffix;
 842                         $result = $this->hashPart($result);
 843                 }
 844                 else {
 845                         # If there's no such link ID, leave intact:
 846                         $result = $whole_match;
 847                 }
 848
 849                 return $result;
 850         }
 851         function _doImages_inline_callback($matches) {
 852                 $whole_match    = $matches[1];
 853                 $alt_text               = $matches[2];
 854                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 855                 $title                  =& $matches[7];
 856
 857                 $alt_text = $this->encodeAttribute($alt_text);
 858                 $url = $this->encodeAttribute($url);
 859                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 860                 if (isset($title)) {
 861                         $title = $this->encodeAttribute($title);
 862                         $result .=  " title=\"$title\""; # $title already quoted
 863                 }
 864                 $result .= $this->empty_element_suffix;
 865
 866                 return $this->hashPart($result);
 867         }
 868
 869
 870         function doHeaders($text) {
 871                 # Setext-style headers:
 872                 #         Header 1
 873                 #         ========
 874                 #
 875                 #         Header 2
 876                 #         --------
 877                 #
 878                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 879                         array(&$this, '_doHeaders_callback_setext'), $text);
 880
 881                 # atx-style headers:
 882                 #       # Header 1
 883                 #       ## Header 2
 884                 #       ## Header 2 with closing hashes ##
 885                 #       ...
 886                 #       ###### Header 6
 887                 #
 888                 $text = preg_replace_callback('{
 889                                 ^(\#{1,6})      # $1 = string of #\'s
 890                                 [ ]*
 891                                 (.+?)           # $2 = Header text
 892                                 [ ]*
 893                                 \#*                     # optional closing #\'s (not counted)
 894                                 \n+
 895                         }xm',
 896                         array(&$this, '_doHeaders_callback_atx'), $text);
 897
 898                 return $text;
 899         }
 900         function _doHeaders_callback_setext($matches) {
 901                 # Terrible hack to check we haven't found an empty list item.
 902                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 903                         return $matches[0];
 904
 905                 $level = $matches[2]{0} == '=' ? 1 : 2;
 906                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 907                 return "\n" . $this->hashBlock($block) . "\n\n";
 908         }
 909         function _doHeaders_callback_atx($matches) {
 910                 $level = strlen($matches[1]);
 911                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 912                 return "\n" . $this->hashBlock($block) . "\n\n";
 913         }
 914
 915
 916         function doLists($text) {
 917         #
 918         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 919         #
 920                 $less_than_tab = $this->tab_width - 1;
 921
 922                 # Re-usable patterns to match list item bullets and number markers:
 923                 $marker_ul_re  = '[*+-]';
 924                 $marker_ol_re  = '\d+[.]';
 925                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 926
 927                 $markers_relist = array($marker_ul_re, $marker_ol_re);
 928
 929                 foreach ($markers_relist as $marker_re) {
 930                         # Re-usable pattern to match any entirel ul or ol list:
 931                         $whole_list_re = '
 932                                 (                                                               # $1 = whole list
 933                                   (                                                             # $2
 934                                         [ ]{0,'.$less_than_tab.'}
 935                                         ('.$marker_re.')                        # $3 = first list item marker
 936                                         [ ]+
 937                                   )
 938                                   (?s:.+?)
 939                                   (                                                             # $4
 940                                           \z
 941                                         |
 942                                           \n{2,}
 943                                           (?=\S)
 944                                           (?!                                           # Negative lookahead for another list item marker
 945                                                 [ ]*
 946                                                 '.$marker_re.'[ ]+
 947                                           )
 948                                   )
 949                                 )
 950                         '; // mx
 951
 952                         # We use a different prefix before nested lists than top-level lists.
 953                         # See extended comment in _ProcessListItems().
 954
 955                         if ($this->list_level) {
 956                                 $text = preg_replace_callback('{
 957                                                 ^
 958                                                 '.$whole_list_re.'
 959                                         }mx',
 960                                         array(&$this, '_doLists_callback'), $text);
 961                         }
 962                         else {
 963                                 $text = preg_replace_callback('{
 964                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
 965                                                 '.$whole_list_re.'
 966                                         }mx',
 967                                         array(&$this, '_doLists_callback'), $text);
 968                         }
 969                 }
 970
 971                 return $text;
 972         }
 973         function _doLists_callback($matches) {
 974                 # Re-usable patterns to match list item bullets and number markers:
 975                 $marker_ul_re  = '[*+-]';
 976                 $marker_ol_re  = '\d+[.]';
 977                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 978
 979                 $list = $matches[1];
 980                 $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
 981
 982                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 983
 984                 $list .= "\n";
 985                 $result = $this->processListItems($list, $marker_any_re);
 986
 987                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 988                 return "\n". $result ."\n\n";
 989         }
 990
 991         var $list_level = 0;
 992
 993         function processListItems($list_str, $marker_any_re) {
 994         #
 995         #       Process the contents of a single ordered or unordered list, splitting it
 996         #       into individual list items.
 997         #
 998                 # The $this->list_level global keeps track of when we're inside a list.
 999                 # Each time we enter a list, we increment it; when we leave a list,
1000                 # we decrement. If it's zero, we're not in a list anymore.
1001                 #
1002                 # We do this because when we're not inside a list, we want to treat
1003                 # something like this:
1004                 #
1005                 #               I recommend upgrading to version
1006                 #               8. Oops, now this line is treated
1007                 #               as a sub-list.
1008                 #
1009                 # As a single paragraph, despite the fact that the second line starts
1010                 # with a digit-period-space sequence.
1011                 #
1012                 # Whereas when we're inside a list (or sub-list), that line will be
1013                 # treated as the start of a sub-list. What a kludge, huh? This is
1014                 # an aspect of Markdown's syntax that's hard to parse perfectly
1015                 # without resorting to mind-reading. Perhaps the solution is to
1016                 # change the syntax rules such that sub-lists must start with a
1017                 # starting cardinal number; e.g. "1." or "a.".
1018
1019                 $this->list_level++;
1020
1021                 # trim trailing blank lines:
1022                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1023
1024                 $list_str = preg_replace_callback('{
1025                         (\n)?                                                   # leading line = $1
1026                         (^[ ]*)                                                 # leading whitespace = $2
1027                         ('.$marker_any_re.'                             # list marker and space = $3
1028                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1029                         )
1030                         ((?s:.*?))                                              # list item text   = $4
1031                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1032                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1033                         }xm',
1034                         array(&$this, '_processListItems_callback'), $list_str);
1035
1036                 $this->list_level--;
1037                 return $list_str;
1038         }
1039         function _processListItems_callback($matches) {
1040                 $item = $matches[4];
1041                 $leading_line =& $matches[1];
1042                 $leading_space =& $matches[2];
1043                 $marker_space = $matches[3];
1044                 $tailing_blank_line =& $matches[5];
1045
1046                 if ($leading_line || $tailing_blank_line ||
1047                         preg_match('/\n{2,}/', $item))
1048                 {
1049                         # Replace marker with the appropriate whitespace indentation
1050                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1051                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1052                 }
1053                 else {
1054                         # Recursion for sub-lists:
1055                         $item = $this->doLists($this->outdent($item));
1056                         $item = preg_replace('/\n+$/', '', $item);
1057                         $item = $this->runSpanGamut($item);
1058                 }
1059
1060                 return "<li>" . $item . "</li>\n";
1061         }
1062
1063
1064         function doCodeBlocks($text) {
1065         #
1066         #       Process Markdown `<pre><code>` blocks.
1067         #
1068                 $text = preg_replace_callback('{
1069                                 (?:\n\n|\A\n?)
1070                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1071                                   (?>
1072                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1073                                         .*\n+
1074                                   )+
1075                                 )
1076                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1077                         }xm',
1078                         array(&$this, '_doCodeBlocks_callback'), $text);
1079
1080                 return $text;
1081         }
1082         function _doCodeBlocks_callback($matches) {
1083                 $codeblock = $matches[1];
1084
1085                 $codeblock = $this->outdent($codeblock);
1086                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1087
1088                 # trim leading newlines and trailing newlines
1089                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1090
1091                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1092                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1093         }
1094
1095
1096         function makeCodeSpan($code) {
1097         #
1098         # Create a code span markup for $code. Called from handleSpanToken.
1099         #
1100                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1101                 return $this->hashPart("<code>$code</code>");
1102         }
1103
1104
1105         var $em_relist = array(
1106                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1107                 '*' => '(?<=\S)(?<!\*)\*(?!\*)',
1108                 '_' => '(?<=\S)(?<!_)_(?!_)',
1109                 );
1110         var $strong_relist = array(
1111                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1112                 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1113                 '__' => '(?<=\S)(?<!_)__(?!_)',
1114                 );
1115         var $em_strong_relist = array(
1116                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1117                 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1118                 '___' => '(?<=\S)(?<!_)___(?!_)',
1119                 );
1120         var $em_strong_prepared_relist;
1121
1122         function prepareItalicsAndBold() {
1123         #
1124         # Prepare regular expressions for seraching emphasis tokens in any
1125         # context.
1126         #
1127                 foreach ($this->em_relist as $em => $em_re) {
1128                         foreach ($this->strong_relist as $strong => $strong_re) {
1129                                 # Construct list of allowed token expressions.
1130                                 $token_relist = array();
1131                                 if (isset($this->em_strong_relist["$em$strong"])) {
1132                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1133                                 }
1134                                 $token_relist[] = $em_re;
1135                                 $token_relist[] = $strong_re;
1136
1137                                 # Construct master expression from list.
1138                                 $token_re = '{('. implode('|', $token_relist) .')}';
1139                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1140                         }
1141                 }
1142         }
1143
1144         function doItalicsAndBold($text) {
1145                 $token_stack = array('');
1146                 $text_stack = array('');
1147                 $em = '';
1148                 $strong = '';
1149                 $tree_char_em = false;
1150
1151                 while (1) {
1152                         #
1153                         # Get prepared regular expression for seraching emphasis tokens
1154                         # in current context.
1155                         #
1156                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1157
1158                         #
1159                         # Each loop iteration seach for the next emphasis token.
1160                         # Each token is then passed to handleSpanToken.
1161                         #
1162                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1163                         $text_stack[0] .= $parts[0];
1164                         $token =& $parts[1];
1165                         $text =& $parts[2];
1166
1167                         if (empty($token)) {
1168                                 # Reached end of text span: empty stack without emitting.
1169                                 # any more emphasis.
1170                                 while ($token_stack[0]) {
1171                                         $text_stack[1] .= array_shift($token_stack);
1172                                         $text_stack[0] .= array_shift($text_stack);
1173                                 }
1174                                 break;
1175                         }
1176
1177                         $token_len = strlen($token);
1178                         if ($tree_char_em) {
1179                                 # Reached closing marker while inside a three-char emphasis.
1180                                 if ($token_len == 3) {
1181                                         # Three-char closing marker, close em and strong.
1182                                         array_shift($token_stack);
1183                                         $span = array_shift($text_stack);
1184                                         $span = $this->runSpanGamut($span);
1185                                         $span = "<strong><em>$span</em></strong>";
1186                                         $text_stack[0] .= $this->hashPart($span);
1187                                         $em = '';
1188                                         $strong = '';
1189                                 } else {
1190                                         # Other closing marker: close one em or strong and
1191                                         # change current token state to match the other
1192                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1193                                         $tag = $token_len == 2 ? "strong" : "em";
1194                                         $span = $text_stack[0];
1195                                         $span = $this->runSpanGamut($span);
1196                                         $span = "<$tag>$span</$tag>";
1197                                         $text_stack[0] = $this->hashPart($span);
1198                                         $$tag = ''; # $$tag stands for $em or $strong
1199                                 }
1200                                 $tree_char_em = false;
1201                         } else if ($token_len == 3) {
1202                                 if ($em) {
1203                                         # Reached closing marker for both em and strong.
1204                                         # Closing strong marker:
1205                                         for ($i = 0; $i < 2; ++$i) {
1206                                                 $shifted_token = array_shift($token_stack);
1207                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1208                                                 $span = array_shift($text_stack);
1209                                                 $span = $this->runSpanGamut($span);
1210                                                 $span = "<$tag>$span</$tag>";
1211                                                 $text_stack[0] .= $this->hashPart($span);
1212                                                 $$tag = ''; # $$tag stands for $em or $strong
1213                                         }
1214                                 } else {
1215                                         # Reached opening three-char emphasis marker. Push on token
1216                                         # stack; will be handled by the special condition above.
1217                                         $em = $token{0};
1218                                         $strong = "$em$em";
1219                                         array_unshift($token_stack, $token);
1220                                         array_unshift($text_stack, '');
1221                                         $tree_char_em = true;
1222                                 }
1223                         } else if ($token_len == 2) {
1224                                 if ($strong) {
1225                                         # Unwind any dangling emphasis marker:
1226                                         if (strlen($token_stack[0]) == 1) {
1227                                                 $text_stack[1] .= array_shift($token_stack);
1228                                                 $text_stack[0] .= array_shift($text_stack);
1229                                         }
1230                                         # Closing strong marker:
1231                                         array_shift($token_stack);
1232                                         $span = array_shift($text_stack);
1233                                         $span = $this->runSpanGamut($span);
1234                                         $span = "<strong>$span</strong>";
1235                                         $text_stack[0] .= $this->hashPart($span);
1236                                         $strong = '';
1237                                 } else {
1238                                         array_unshift($token_stack, $token);
1239                                         array_unshift($text_stack, '');
1240                                         $strong = $token;
1241                                 }
1242                         } else {
1243                                 # Here $token_len == 1
1244                                 if ($em) {
1245                                         if (strlen($token_stack[0]) == 1) {
1246                                                 # Closing emphasis marker:
1247                                                 array_shift($token_stack);
1248                                                 $span = array_shift($text_stack);
1249                                                 $span = $this->runSpanGamut($span);
1250                                                 $span = "<em>$span</em>";
1251                                                 $text_stack[0] .= $this->hashPart($span);
1252                                                 $em = '';
1253                                         } else {
1254                                                 $text_stack[0] .= $token;
1255                                         }
1256                                 } else {
1257                                         array_unshift($token_stack, $token);
1258                                         array_unshift($text_stack, '');
1259                                         $em = $token;
1260                                 }
1261                         }
1262                 }
1263                 return $text_stack[0];
1264         }
1265
1266
1267         function doBlockQuotes($text) {
1268                 $text = preg_replace_callback('/
1269                           (                                                             # Wrap whole match in $1
1270                                 (?>
1271                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1272                                         .+\n                                    # rest of the first line
1273                                   (.+\n)*                                       # subsequent consecutive lines
1274                                   \n*                                           # blanks
1275                                 )+
1276                           )
1277                         /xm',
1278                         array(&$this, '_doBlockQuotes_callback'), $text);
1279
1280                 return $text;
1281         }
1282         function _doBlockQuotes_callback($matches) {
1283                 $bq = $matches[1];
1284                 # trim one level of quoting - trim whitespace-only lines
1285                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1286                 $bq = $this->runBlockGamut($bq);                # recurse
1287
1288                 $bq = preg_replace('/^/m', "  ", $bq);
1289                 # These leading spaces cause problem with <pre> content,
1290                 # so we need to fix that:
1291                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1292                         array(&$this, '_DoBlockQuotes_callback2'), $bq);
1293
1294                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1295         }
1296         function _doBlockQuotes_callback2($matches) {
1297                 $pre = $matches[1];
1298                 $pre = preg_replace('/^  /m', '', $pre);
1299                 return $pre;
1300         }
1301
1302
1303         function formParagraphs($text) {
1304         #
1305         #       Params:
1306         #               $text - string to process with html <p> tags
1307         #
1308                 # Strip leading and trailing lines:
1309                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1310
1311                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1312
1313                 #
1314                 # Wrap <p> tags and unhashify HTML blocks
1315                 #
1316                 foreach ($grafs as $key => $value) {
1317                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1318                                 # Is a paragraph.
1319                                 $value = $this->runSpanGamut($value);
1320                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1321                                 $value .= "</p>";
1322                                 $grafs[$key] = $this->unhash($value);
1323                         }
1324                         else {
1325                                 # Is a block.
1326                                 # Modify elements of @grafs in-place...
1327                                 $graf = $value;
1328                                 $block = $this->html_hashes[$graf];
1329                                 $graf = $block;
1330 //                              if (preg_match('{
1331 //                                      \A
1332 //                                      (                                                       # $1 = <div> tag
1333 //                                        <div  \s+
1334 //                                        [^>]*
1335 //                                        \b
1336 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1337 //                                        1
1338 //                                        \2
1339 //                                        [^>]*
1340 //                                        >
1341 //                                      )
1342 //                                      (                                                       # $3 = contents
1343 //                                      .*
1344 //                                      )
1345 //                                      (</div>)                                        # $4 = closing tag
1346 //                                      \z
1347 //                                      }xs', $block, $matches))
1348 //                              {
1349 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1350 //
1351 //                                      # We can't call Markdown(), because that resets the hash;
1352 //                                      # that initialization code should be pulled into its own sub, though.
1353 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1354 //
1355 //                                      # Run document gamut methods on the content.
1356 //                                      foreach ($this->document_gamut as $method => $priority) {
1357 //                                              $div_content = $this->$method($div_content);
1358 //                                      }
1359 //
1360 //                                      $div_open = preg_replace(
1361 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1362 //
1363 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1364 //                              }
1365                                 $grafs[$key] = $graf;
1366                         }
1367                 }
1368
1369                 return implode("\n\n", $grafs);
1370         }
1371
1372
1373         function encodeAttribute($text) {
1374         #
1375         # Encode text for a double-quoted HTML attribute. This function
1376         # is *not* suitable for attributes enclosed in single quotes.
1377         #
1378                 $text = $this->encodeAmpsAndAngles($text);
1379                 $text = str_replace('"', '&quot;', $text);
1380                 return $text;
1381         }
1382
1383
1384         function encodeAmpsAndAngles($text) {
1385         #
1386         # Smart processing for ampersands and angle brackets that need to
1387         # be encoded. Valid character entities are left alone unless the
1388         # no-entities mode is set.
1389         #
1390                 if ($this->no_entities) {
1391                         $text = str_replace('&', '&amp;', $text);
1392                 } else {
1393                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1394                         # MT plugin: <http://bumppo.net/projects/amputator/>
1395                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1396                                                                 '&amp;', $text);;
1397                 }
1398                 # Encode remaining <'s
1399                 $text = str_replace('<', '&lt;', $text);
1400
1401                 return $text;
1402         }
1403
1404
1405         function doAutoLinks($text) {
1406                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1407                         array(&$this, '_doAutoLinks_url_callback'), $text);
1408
1409                 # Email addresses: <address@domain.foo>
1410                 $text = preg_replace_callback('{
1411                         <
1412                         (?:mailto:)?
1413                         (
1414                                 [-.\w\x80-\xFF]+
1415                                 \@
1416                                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1417                         )
1418                         >
1419                         }xi',
1420                         array(&$this, '_doAutoLinks_email_callback'), $text);
1421
1422                 return $text;
1423         }
1424         function _doAutoLinks_url_callback($matches) {
1425                 $url = $this->encodeAttribute($matches[1]);
1426                 $link = "<a href=\"$url\">$url</a>";
1427                 return $this->hashPart($link);
1428         }
1429         function _doAutoLinks_email_callback($matches) {
1430                 $address = $matches[1];
1431                 $link = $this->encodeEmailAddress($address);
1432                 return $this->hashPart($link);
1433         }
1434
1435
1436         function encodeEmailAddress($addr) {
1437         #
1438         #       Input: an email address, e.g. "foo@example.com"
1439         #
1440         #       Output: the email address as a mailto link, with each character
1441         #               of the address encoded as either a decimal or hex entity, in
1442         #               the hopes of foiling most address harvesting spam bots. E.g.:
1443         #
1444         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1445         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1446         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1447         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1448         #
1449         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1450         #   With some optimizations by Milian Wolff.
1451         #
1452                 $addr = "mailto:" . $addr;
1453                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1454                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1455
1456                 foreach ($chars as $key => $char) {
1457                         $ord = ord($char);
1458                         # Ignore non-ascii chars.
1459                         if ($ord < 128) {
1460                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1461                                 # roughly 10% raw, 45% hex, 45% dec
1462                                 # '@' *must* be encoded. I insist.
1463                                 if ($r > 90 && $char != '@') /* do nothing */;
1464                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1465                                 else              $chars[$key] = '&#'.$ord.';';
1466                         }
1467                 }
1468
1469                 $addr = implode('', $chars);
1470                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1471                 $addr = "<a href=\"$addr\">$text</a>";
1472
1473                 return $addr;
1474         }
1475
1476
1477         function parseSpan($str) {
1478         #
1479         # Take the string $str and parse it into tokens, hashing embeded HTML,
1480         # escaped characters and handling code spans.
1481         #
1482                 $output = '';
1483
1484                 $span_re = '{
1485                                 (
1486                                         \\\\'.$this->escape_chars_re.'
1487                                 |
1488                                         (?<![`\\\\])
1489                                         `+                                              # code span marker
1490                         '.( $this->no_markup ? '' : '
1491                                 |
1492                                         <!--    .*?     -->             # comment
1493                                 |
1494                                         <\?.*?\?> | <%.*?%>             # processing instruction
1495                                 |
1496                                         <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1497                                         (?>
1498                                                 \s
1499                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1500                                         )?
1501                                         >
1502                         ').'
1503                                 )
1504                                 }xs';
1505
1506                 while (1) {
1507                         #
1508                         # Each loop iteration seach for either the next tag, the next
1509                         # openning code span marker, or the next escaped character.
1510                         # Each token is then passed to handleSpanToken.
1511                         #
1512                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1513
1514                         # Create token from text preceding tag.
1515                         if ($parts[0] != "") {
1516                                 $output .= $parts[0];
1517                         }
1518
1519                         # Check if we reach the end.
1520                         if (isset($parts[1])) {
1521                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1522                                 $str = $parts[2];
1523                         }
1524                         else {
1525                                 break;
1526                         }
1527                 }
1528
1529                 return $output;
1530         }
1531
1532
1533         function handleSpanToken($token, &$str) {
1534         #
1535         # Handle $token provided by parseSpan by determining its nature and
1536         # returning the corresponding value that should replace it.
1537         #
1538                 switch ($token{0}) {
1539                         case "\\":
1540                                 return $this->hashPart("&#". ord($token{1}). ";");
1541                         case "`":
1542                                 # Search for end marker in remaining text.
1543                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1544                                         $str, $matches))
1545                                 {
1546                                         $str = $matches[2];
1547                                         $codespan = $this->makeCodeSpan($matches[1]);
1548                                         return $this->hashPart($codespan);
1549                                 }
1550                                 return $token; // return as text since no ending marker found.
1551                         default:
1552                                 return $this->hashPart($token);
1553                 }
1554         }
1555
1556
1557         function outdent($text) {
1558         #
1559         # Remove one level of line-leading tabs or spaces
1560         #
1561                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1562         }
1563
1564
1565         # String length function for detab. `_initDetab` will create a function to
1566         # hanlde UTF-8 if the default function does not exist.
1567         var $utf8_strlen = 'mb_strlen';
1568
1569         function detab($text) {
1570         #
1571         # Replace tabs with the appropriate amount of space.
1572         #
1573                 # For each line we separate the line in blocks delemited by
1574                 # tab characters. Then we reconstruct every line by adding the
1575                 # appropriate number of space between each blocks.
1576
1577                 $text = preg_replace_callback('/^.*\t.*$/m',
1578                         array(&$this, '_detab_callback'), $text);
1579
1580                 return $text;
1581         }
1582         function _detab_callback($matches) {
1583                 $line = $matches[0];
1584                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1585
1586                 # Split in blocks.
1587                 $blocks = explode("\t", $line);
1588                 # Add each blocks to the line.
1589                 $line = $blocks[0];
1590                 unset($blocks[0]); # Do not add first block twice.
1591                 foreach ($blocks as $block) {
1592                         # Calculate amount of space, insert spaces, insert block.
1593                         $amount = $this->tab_width -
1594                                 $strlen($line, 'UTF-8') % $this->tab_width;
1595                         $line .= str_repeat(" ", $amount) . $block;
1596                 }
1597                 return $line;
1598         }
1599         function _initDetab() {
1600         #
1601         # Check for the availability of the function in the `utf8_strlen` property
1602         # (initially `mb_strlen`). If the function is not available, create a
1603         # function that will loosely count the number of UTF-8 characters with a
1604         # regular expression.
1605         #
1606                 if (function_exists($this->utf8_strlen)) return;
1607                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1608                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1609                         $text, $m);');
1610         }
1611
1612
1613         function unhash($text) {
1614         #
1615         # Swap back in all the tags hashed by _HashHTMLBlocks.
1616         #
1617                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1618                         array(&$this, '_unhash_callback'), $text);
1619         }
1620         function _unhash_callback($matches) {
1621                 return $this->html_hashes[$matches[0]];
1622         }
1623
1624 }
1625
1626 /*
1627
1628 PHP Markdown
1629 ============
1630
1631 Description
1632 -----------
1633
1634 This is a PHP translation of the original Markdown formatter written in
1635 Perl by John Gruber.
1636
1637 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1638 easy-to-write structured text format into HTML. Markdown's text format
1639 is most similar to that of plain text email, and supports features such
1640 as headers, *emphasis*, code blocks, blockquotes, and links.
1641
1642 Markdown's syntax is designed not as a generic markup language, but
1643 specifically to serve as a front-end to (X)HTML. You can use span-level
1644 HTML tags anywhere in a Markdown document, and you can use block level
1645 HTML tags (like <div> and <table> as well).
1646
1647 For more information about Markdown's syntax, see:
1648
1649 <http://daringfireball.net/projects/markdown/>
1650
1651
1652 Bugs
1653 ----
1654
1655 To file bug reports please send email to:
1656
1657 <michel.fortin@michelf.com>
1658
1659 Please include with your report: (1) the example input; (2) the output you
1660 expected; (3) the output Markdown actually produced.
1661
1662
1663 Version History
1664 ---------------
1665
1666 See the readme file for detailed release notes for this version.
1667
1668
1669 Copyright and License
1670 ---------------------
1671
1672 PHP Markdown
1673 Copyright (c) 2004-2008 Michel Fortin
1674 <http://www.michelf.com/>
1675 All rights reserved.
1676
1677 Based on Markdown
1678 Copyright (c) 2003-2006 John Gruber
1679 <http://daringfireball.net/>
1680 All rights reserved.
1681
1682 Redistribution and use in source and binary forms, with or without
1683 modification, are permitted provided that the following conditions are
1684 met:
1685
1686 *       Redistributions of source code must retain the above copyright notice,
1687         this list of conditions and the following disclaimer.
1688
1689 *       Redistributions in binary form must reproduce the above copyright
1690         notice, this list of conditions and the following disclaimer in the
1691         documentation and/or other materials provided with the distribution.
1692
1693 *       Neither the name "Markdown" nor the names of its contributors may
1694         be used to endorse or promote products derived from this software
1695         without specific prior written permission.
1696
1697 This software is provided by the copyright holders and contributors "as
1698 is" and any express or implied warranties, including, but not limited
1699 to, the implied warranties of merchantability and fitness for a
1700 particular purpose are disclaimed. In no event shall the copyright owner
1701 or contributors be liable for any direct, indirect, incidental, special,
1702 exemplary, or consequential damages (including, but not limited to,
1703 procurement of substitute goods or services; loss of use, data, or
1704 profits; or business interruption) however caused and on any theory of
1705 liability, whether in contract, strict liability, or tort (including
1706 negligence or otherwise) arising in any way out of the use of this
1707 software, even if advised of the possibility of such damage.
1708
1709 */
1710 ?>