]> git.mxchange.org Git - quix0rs-gnu-social.git/blob - extlib/markdown.php
DEBUG HACK: error checking for xml_parse in XMPPHP input stream
[quix0rs-gnu-social.git] / extlib / markdown.php
1 <?php
2 #
3 # Markdown  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown
6 # Copyright (c) 2004-2008 Michel Fortin  
7 # <http://www.michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
13
14
15 define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
16
17
18 #
19 # Global default settings:
20 #
21
22 # Change to ">" for HTML output
23 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
24
25 # Define the width of a tab for code blocks.
26 @define( 'MARKDOWN_TAB_WIDTH',     4 );
27
28
29 #
30 # WordPress settings:
31 #
32
33 # Change to false to remove Markdown from posts and/or comments.
34 @define( 'MARKDOWN_WP_POSTS',      true );
35 @define( 'MARKDOWN_WP_COMMENTS',   true );
36
37
38
39 ### Standard Function Interface ###
40
41 @define( 'MARKDOWN_PARSER_CLASS',  'Markdown_Parser' );
42
43 function Markdown($text) {
44 #
45 # Initialize the parser and return the result of its transform method.
46 #
47         # Setup static parser variable.
48         static $parser;
49         if (!isset($parser)) {
50                 $parser_class = MARKDOWN_PARSER_CLASS;
51                 $parser = new $parser_class;
52         }
53
54         # Transform text using parser.
55         return $parser->transform($text);
56 }
57
58
59 ### WordPress Plugin Interface ###
60
61 /*
62 Plugin Name: Markdown
63 Plugin URI: http://www.michelf.com/projects/php-markdown/
64 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
65 Version: 1.0.1m
66 Author: Michel Fortin
67 Author URI: http://www.michelf.com/
68 */
69
70 if (isset($wp_version)) {
71         # More details about how it works here:
72         # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
73         
74         # Post content and excerpts
75         # - Remove WordPress paragraph generator.
76         # - Run Markdown on excerpt, then remove all tags.
77         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
78         if (MARKDOWN_WP_POSTS) {
79                 remove_filter('the_content',     'wpautop');
80         remove_filter('the_content_rss', 'wpautop');
81                 remove_filter('the_excerpt',     'wpautop');
82                 add_filter('the_content',     'Markdown', 6);
83         add_filter('the_content_rss', 'Markdown', 6);
84                 add_filter('get_the_excerpt', 'Markdown', 6);
85                 add_filter('get_the_excerpt', 'trim', 7);
86                 add_filter('the_excerpt',     'mdwp_add_p');
87                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
88                 
89                 remove_filter('content_save_pre',  'balanceTags', 50);
90                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
91                 add_filter('the_content',         'balanceTags', 50);
92                 add_filter('get_the_excerpt', 'balanceTags', 9);
93         }
94         
95         # Comments
96         # - Remove WordPress paragraph generator.
97         # - Remove WordPress auto-link generator.
98         # - Scramble important tags before passing them to the kses filter.
99         # - Run Markdown on excerpt then remove paragraph tags.
100         if (MARKDOWN_WP_COMMENTS) {
101                 remove_filter('comment_text', 'wpautop', 30);
102                 remove_filter('comment_text', 'make_clickable');
103                 add_filter('pre_comment_content', 'Markdown', 6);
104                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
105                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
106                 add_filter('get_comment_text',    'Markdown', 6);
107                 add_filter('get_comment_excerpt', 'Markdown', 6);
108                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
109         
110                 global $mdwp_hidden_tags, $mdwp_placeholders;
111                 $mdwp_hidden_tags = explode(' ',
112                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
113                 $mdwp_placeholders = explode(' ', str_rot13(
114                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
115                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
116         }
117         
118         function mdwp_add_p($text) {
119                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
120                         $text = '<p>'.$text.'</p>';
121                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
122                 }
123                 return $text;
124         }
125         
126         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
127
128         function mdwp_hide_tags($text) {
129                 global $mdwp_hidden_tags, $mdwp_placeholders;
130                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
131         }
132         function mdwp_show_tags($text) {
133                 global $mdwp_hidden_tags, $mdwp_placeholders;
134                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
135         }
136 }
137
138
139 ### bBlog Plugin Info ###
140
141 function identify_modifier_markdown() {
142         return array(
143                 'name'                  => 'markdown',
144                 'type'                  => 'modifier',
145                 'nicename'              => 'Markdown',
146                 'description'   => 'A text-to-HTML conversion tool for web writers',
147                 'authors'               => 'Michel Fortin and John Gruber',
148                 'licence'               => 'BSD-like',
149                 'version'               => MARKDOWN_VERSION,
150                 'help'                  => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
151         );
152 }
153
154
155 ### Smarty Modifier Interface ###
156
157 function smarty_modifier_markdown($text) {
158         return Markdown($text);
159 }
160
161
162 ### Textile Compatibility Mode ###
163
164 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
165
166 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
167         # Try to include PHP SmartyPants. Should be in the same directory.
168         @include_once 'smartypants.php';
169         # Fake Textile class. It calls Markdown instead.
170         class Textile {
171                 function TextileThis($text, $lite='', $encode='') {
172                         if ($lite == '' && $encode == '')    $text = Markdown($text);
173                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
174                         return $text;
175                 }
176                 # Fake restricted version: restrictions are not supported for now.
177                 function TextileRestricted($text, $lite='', $noimage='') {
178                         return $this->TextileThis($text, $lite);
179                 }
180                 # Workaround to ensure compatibility with TextPattern 4.0.3.
181                 function blockLite($text) { return $text; }
182         }
183 }
184
185
186
187 #
188 # Markdown Parser Class
189 #
190
191 class Markdown_Parser {
192
193         # Regex to match balanced [brackets].
194         # Needed to insert a maximum bracked depth while converting to PHP.
195         var $nested_brackets_depth = 6;
196         var $nested_brackets_re;
197         
198         var $nested_url_parenthesis_depth = 4;
199         var $nested_url_parenthesis_re;
200
201         # Table of hash values for escaped characters:
202         var $escape_chars = '\`*_{}[]()>#+-.!';
203         var $escape_chars_re;
204
205         # Change to ">" for HTML output.
206         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
207         var $tab_width = MARKDOWN_TAB_WIDTH;
208         
209         # Change to `true` to disallow markup or entities.
210         var $no_markup = false;
211         var $no_entities = false;
212         
213         # Predefined urls and titles for reference links and images.
214         var $predef_urls = array();
215         var $predef_titles = array();
216
217
218         function Markdown_Parser() {
219         #
220         # Constructor function. Initialize appropriate member variables.
221         #
222                 $this->_initDetab();
223                 $this->prepareItalicsAndBold();
224         
225                 $this->nested_brackets_re = 
226                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
227                         str_repeat('\])*', $this->nested_brackets_depth);
228         
229                 $this->nested_url_parenthesis_re = 
230                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
231                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
232                 
233                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
234                 
235                 # Sort document, block, and span gamut in ascendent priority order.
236                 asort($this->document_gamut);
237                 asort($this->block_gamut);
238                 asort($this->span_gamut);
239         }
240
241
242         # Internal hashes used during transformation.
243         var $urls = array();
244         var $titles = array();
245         var $html_hashes = array();
246         
247         # Status flag to avoid invalid nesting.
248         var $in_anchor = false;
249         
250         
251         function setup() {
252         #
253         # Called before the transformation process starts to setup parser 
254         # states.
255         #
256                 # Clear global hashes.
257                 $this->urls = $this->predef_urls;
258                 $this->titles = $this->predef_titles;
259                 $this->html_hashes = array();
260                 
261                 $in_anchor = false;
262         }
263         
264         function teardown() {
265         #
266         # Called after the transformation process to clear any variable 
267         # which may be taking up memory unnecessarly.
268         #
269                 $this->urls = array();
270                 $this->titles = array();
271                 $this->html_hashes = array();
272         }
273
274
275         function transform($text) {
276         #
277         # Main function. Performs some preprocessing on the input text
278         # and pass it through the document gamut.
279         #
280                 $this->setup();
281         
282                 # Remove UTF-8 BOM and marker character in input, if present.
283                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
284
285                 # Standardize line endings:
286                 #   DOS to Unix and Mac to Unix
287                 $text = preg_replace('{\r\n?}', "\n", $text);
288
289                 # Make sure $text ends with a couple of newlines:
290                 $text .= "\n\n";
291
292                 # Convert all tabs to spaces.
293                 $text = $this->detab($text);
294
295                 # Turn block-level HTML blocks into hash entries
296                 $text = $this->hashHTMLBlocks($text);
297
298                 # Strip any lines consisting only of spaces and tabs.
299                 # This makes subsequent regexen easier to write, because we can
300                 # match consecutive blank lines with /\n+/ instead of something
301                 # contorted like /[ ]*\n+/ .
302                 $text = preg_replace('/^[ ]+$/m', '', $text);
303
304                 # Run document gamut methods.
305                 foreach ($this->document_gamut as $method => $priority) {
306                         $text = $this->$method($text);
307                 }
308                 
309                 $this->teardown();
310
311                 return $text . "\n";
312         }
313         
314         var $document_gamut = array(
315                 # Strip link definitions, store in hashes.
316                 "stripLinkDefinitions" => 20,
317                 
318                 "runBasicBlockGamut"   => 30,
319                 );
320
321
322         function stripLinkDefinitions($text) {
323         #
324         # Strips link definitions from text, stores the URLs and titles in
325         # hash references.
326         #
327                 $less_than_tab = $this->tab_width - 1;
328
329                 # Link defs are in the form: ^[id]: url "optional title"
330                 $text = preg_replace_callback('{
331                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
332                                                           [ ]*
333                                                           \n?                           # maybe *one* newline
334                                                           [ ]*
335                                                         <?(\S+?)>?                      # url = $2
336                                                           [ ]*
337                                                           \n?                           # maybe one newline
338                                                           [ ]*
339                                                         (?:
340                                                                 (?<=\s)                 # lookbehind for whitespace
341                                                                 ["(]
342                                                                 (.*?)                   # title = $3
343                                                                 [")]
344                                                                 [ ]*
345                                                         )?      # title is optional
346                                                         (?:\n+|\Z)
347                         }xm',
348                         array(&$this, '_stripLinkDefinitions_callback'),
349                         $text);
350                 return $text;
351         }
352         function _stripLinkDefinitions_callback($matches) {
353                 $link_id = strtolower($matches[1]);
354                 $this->urls[$link_id] = $matches[2];
355                 $this->titles[$link_id] =& $matches[3];
356                 return ''; # String that will replace the block
357         }
358
359
360         function hashHTMLBlocks($text) {
361                 if ($this->no_markup)  return $text;
362
363                 $less_than_tab = $this->tab_width - 1;
364
365                 # Hashify HTML blocks:
366                 # We only want to do this for block-level HTML tags, such as headers,
367                 # lists, and tables. That's because we still want to wrap <p>s around
368                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
369                 # phrase emphasis, and spans. The list of tags we're looking for is
370                 # hard-coded:
371                 #
372                 # *  List "a" is made of tags which can be both inline or block-level.
373                 #    These will be treated block-level when the start tag is alone on 
374                 #    its line, otherwise they're not matched here and will be taken as 
375                 #    inline later.
376                 # *  List "b" is made of tags which are always block-level;
377                 #
378                 $block_tags_a_re = 'ins|del';
379                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
380                                                    'script|noscript|form|fieldset|iframe|math';
381
382                 # Regular expression for the content of a block tag.
383                 $nested_tags_level = 4;
384                 $attr = '
385                         (?>                             # optional tag attributes
386                           \s                    # starts with whitespace
387                           (?>
388                                 [^>"/]+         # text outside quotes
389                           |
390                                 /+(?!>)         # slash not followed by ">"
391                           |
392                                 "[^"]*"         # text inside double quotes (tolerate ">")
393                           |
394                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
395                           )*
396                         )?      
397                         ';
398                 $content =
399                         str_repeat('
400                                 (?>
401                                   [^<]+                 # content without tag
402                                 |
403                                   <\2                   # nested opening tag
404                                         '.$attr.'       # attributes
405                                         (?>
406                                           />
407                                         |
408                                           >', $nested_tags_level).      # end of opening tag
409                                           '.*?'.                                        # last level nested tag content
410                         str_repeat('
411                                           </\2\s*>      # closing nested tag
412                                         )
413                                   |                             
414                                         <(?!/\2\s*>     # other tags with a different name
415                                   )
416                                 )*',
417                                 $nested_tags_level);
418                 $content2 = str_replace('\2', '\3', $content);
419
420                 # First, look for nested blocks, e.g.:
421                 #       <div>
422                 #               <div>
423                 #               tags for inner block must be indented.
424                 #               </div>
425                 #       </div>
426                 #
427                 # The outermost tags must start at the left margin for this to match, and
428                 # the inner nested divs must be indented.
429                 # We need to do this before the next, more liberal match, because the next
430                 # match will start at the first `<div>` and stop at the first `</div>`.
431                 $text = preg_replace_callback('{(?>
432                         (?>
433                                 (?<=\n\n)               # Starting after a blank line
434                                 |                               # or
435                                 \A\n?                   # the beginning of the doc
436                         )
437                         (                                               # save in $1
438
439                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
440                           # in between.
441                                         
442                                                 [ ]{0,'.$less_than_tab.'}
443                                                 <('.$block_tags_b_re.')# start tag = $2
444                                                 '.$attr.'>                      # attributes followed by > and \n
445                                                 '.$content.'            # content, support nesting
446                                                 </\2>                           # the matching end tag
447                                                 [ ]*                            # trailing spaces/tabs
448                                                 (?=\n+|\Z)      # followed by a newline or end of document
449
450                         | # Special version for tags of group a.
451
452                                                 [ ]{0,'.$less_than_tab.'}
453                                                 <('.$block_tags_a_re.')# start tag = $3
454                                                 '.$attr.'>[ ]*\n        # attributes followed by >
455                                                 '.$content2.'           # content, support nesting
456                                                 </\3>                           # the matching end tag
457                                                 [ ]*                            # trailing spaces/tabs
458                                                 (?=\n+|\Z)      # followed by a newline or end of document
459                                         
460                         | # Special case just for <hr />. It was easier to make a special 
461                           # case than to make the other regex more complicated.
462                         
463                                                 [ ]{0,'.$less_than_tab.'}
464                                                 <(hr)                           # start tag = $2
465                                                 '.$attr.'                       # attributes
466                                                 /?>                                     # the matching end tag
467                                                 [ ]*
468                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
469                         
470                         | # Special case for standalone HTML comments:
471                         
472                                         [ ]{0,'.$less_than_tab.'}
473                                         (?s:
474                                                 <!-- .*? -->
475                                         )
476                                         [ ]*
477                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
478                         
479                         | # PHP and ASP-style processor instructions (<? and <%)
480                         
481                                         [ ]{0,'.$less_than_tab.'}
482                                         (?s:
483                                                 <([?%])                 # $2
484                                                 .*?
485                                                 \2>
486                                         )
487                                         [ ]*
488                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
489                                         
490                         )
491                         )}Sxmi',
492                         array(&$this, '_hashHTMLBlocks_callback'),
493                         $text);
494
495                 return $text;
496         }
497         function _hashHTMLBlocks_callback($matches) {
498                 $text = $matches[1];
499                 $key  = $this->hashBlock($text);
500                 return "\n\n$key\n\n";
501         }
502         
503         
504         function hashPart($text, $boundary = 'X') {
505         #
506         # Called whenever a tag must be hashed when a function insert an atomic 
507         # element in the text stream. Passing $text to through this function gives
508         # a unique text-token which will be reverted back when calling unhash.
509         #
510         # The $boundary argument specify what character should be used to surround
511         # the token. By convension, "B" is used for block elements that needs not
512         # to be wrapped into paragraph tags at the end, ":" is used for elements
513         # that are word separators and "X" is used in the general case.
514         #
515                 # Swap back any tag hash found in $text so we do not have to `unhash`
516                 # multiple times at the end.
517                 $text = $this->unhash($text);
518                 
519                 # Then hash the block.
520                 static $i = 0;
521                 $key = "$boundary\x1A" . ++$i . $boundary;
522                 $this->html_hashes[$key] = $text;
523                 return $key; # String that will replace the tag.
524         }
525
526
527         function hashBlock($text) {
528         #
529         # Shortcut function for hashPart with block-level boundaries.
530         #
531                 return $this->hashPart($text, 'B');
532         }
533
534
535         var $block_gamut = array(
536         #
537         # These are all the transformations that form block-level
538         # tags like paragraphs, headers, and list items.
539         #
540                 "doHeaders"         => 10,
541                 "doHorizontalRules" => 20,
542                 
543                 "doLists"           => 40,
544                 "doCodeBlocks"      => 50,
545                 "doBlockQuotes"     => 60,
546                 );
547
548         function runBlockGamut($text) {
549         #
550         # Run block gamut tranformations.
551         #
552                 # We need to escape raw HTML in Markdown source before doing anything 
553                 # else. This need to be done for each block, and not only at the 
554                 # begining in the Markdown function since hashed blocks can be part of
555                 # list items and could have been indented. Indented blocks would have 
556                 # been seen as a code block in a previous pass of hashHTMLBlocks.
557                 $text = $this->hashHTMLBlocks($text);
558                 
559                 return $this->runBasicBlockGamut($text);
560         }
561         
562         function runBasicBlockGamut($text) {
563         #
564         # Run block gamut tranformations, without hashing HTML blocks. This is 
565         # useful when HTML blocks are known to be already hashed, like in the first
566         # whole-document pass.
567         #
568                 foreach ($this->block_gamut as $method => $priority) {
569                         $text = $this->$method($text);
570                 }
571                 
572                 # Finally form paragraph and restore hashed blocks.
573                 $text = $this->formParagraphs($text);
574
575                 return $text;
576         }
577         
578         
579         function doHorizontalRules($text) {
580                 # Do Horizontal Rules:
581                 return preg_replace(
582                         '{
583                                 ^[ ]{0,3}       # Leading space
584                                 ([-*_])         # $1: First marker
585                                 (?>                     # Repeated marker group
586                                         [ ]{0,2}        # Zero, one, or two spaces.
587                                         \1                      # Marker character
588                                 ){2,}           # Group repeated at least twice
589                                 [ ]*            # Tailing spaces
590                                 $                       # End of line.
591                         }mx',
592                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
593                         $text);
594         }
595
596
597         var $span_gamut = array(
598         #
599         # These are all the transformations that occur *within* block-level
600         # tags like paragraphs, headers, and list items.
601         #
602                 # Process character escapes, code spans, and inline HTML
603                 # in one shot.
604                 "parseSpan"           => -30,
605
606                 # Process anchor and image tags. Images must come first,
607                 # because ![foo][f] looks like an anchor.
608                 "doImages"            =>  10,
609                 "doAnchors"           =>  20,
610                 
611                 # Make links out of things like `<http://example.com/>`
612                 # Must come after doAnchors, because you can use < and >
613                 # delimiters in inline links like [this](<url>).
614                 "doAutoLinks"         =>  30,
615                 "encodeAmpsAndAngles" =>  40,
616
617                 "doItalicsAndBold"    =>  50,
618                 "doHardBreaks"        =>  60,
619                 );
620
621         function runSpanGamut($text) {
622         #
623         # Run span gamut tranformations.
624         #
625                 foreach ($this->span_gamut as $method => $priority) {
626                         $text = $this->$method($text);
627                 }
628
629                 return $text;
630         }
631         
632         
633         function doHardBreaks($text) {
634                 # Do hard breaks:
635                 return preg_replace_callback('/ {2,}\n/', 
636                         array(&$this, '_doHardBreaks_callback'), $text);
637         }
638         function _doHardBreaks_callback($matches) {
639                 return $this->hashPart("<br$this->empty_element_suffix\n");
640         }
641
642
643         function doAnchors($text) {
644         #
645         # Turn Markdown link shortcuts into XHTML <a> tags.
646         #
647                 if ($this->in_anchor) return $text;
648                 $this->in_anchor = true;
649                 
650                 #
651                 # First, handle reference-style links: [link text] [id]
652                 #
653                 $text = preg_replace_callback('{
654                         (                                       # wrap whole match in $1
655                           \[
656                                 ('.$this->nested_brackets_re.') # link text = $2
657                           \]
658
659                           [ ]?                          # one optional space
660                           (?:\n[ ]*)?           # one optional newline followed by spaces
661
662                           \[
663                                 (.*?)           # id = $3
664                           \]
665                         )
666                         }xs',
667                         array(&$this, '_doAnchors_reference_callback'), $text);
668
669                 #
670                 # Next, inline-style links: [link text](url "optional title")
671                 #
672                 $text = preg_replace_callback('{
673                         (                               # wrap whole match in $1
674                           \[
675                                 ('.$this->nested_brackets_re.') # link text = $2
676                           \]
677                           \(                    # literal paren
678                                 [ ]*
679                                 (?:
680                                         <(\S*)> # href = $3
681                                 |
682                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
683                                 )
684                                 [ ]*
685                                 (                       # $5
686                                   ([\'"])       # quote char = $6
687                                   (.*?)         # Title = $7
688                                   \6            # matching quote
689                                   [ ]*  # ignore any spaces/tabs between closing quote and )
690                                 )?                      # title is optional
691                           \)
692                         )
693                         }xs',
694                         array(&$this, '_DoAnchors_inline_callback'), $text);
695
696                 #
697                 # Last, handle reference-style shortcuts: [link text]
698                 # These must come last in case you've also got [link test][1]
699                 # or [link test](/foo)
700                 #
701 //              $text = preg_replace_callback('{
702 //                      (                                       # wrap whole match in $1
703 //                        \[
704 //                              ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
705 //                        \]
706 //                      )
707 //                      }xs',
708 //                      array(&$this, '_doAnchors_reference_callback'), $text);
709
710                 $this->in_anchor = false;
711                 return $text;
712         }
713         function _doAnchors_reference_callback($matches) {
714                 $whole_match =  $matches[1];
715                 $link_text   =  $matches[2];
716                 $link_id     =& $matches[3];
717
718                 if ($link_id == "") {
719                         # for shortcut links like [this][] or [this].
720                         $link_id = $link_text;
721                 }
722                 
723                 # lower-case and turn embedded newlines into spaces
724                 $link_id = strtolower($link_id);
725                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
726
727                 if (isset($this->urls[$link_id])) {
728                         $url = $this->urls[$link_id];
729                         $url = $this->encodeAttribute($url);
730                         
731                         $result = "<a href=\"$url\"";
732                         if ( isset( $this->titles[$link_id] ) ) {
733                                 $title = $this->titles[$link_id];
734                                 $title = $this->encodeAttribute($title);
735                                 $result .=  " title=\"$title\"";
736                         }
737                 
738                         $link_text = $this->runSpanGamut($link_text);
739                         $result .= ">$link_text</a>";
740                         $result = $this->hashPart($result);
741                 }
742                 else {
743                         $result = $whole_match;
744                 }
745                 return $result;
746         }
747         function _doAnchors_inline_callback($matches) {
748                 $whole_match    =  $matches[1];
749                 $link_text              =  $this->runSpanGamut($matches[2]);
750                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
751                 $title                  =& $matches[7];
752
753                 $url = $this->encodeAttribute($url);
754
755                 $result = "<a href=\"$url\"";
756                 if (isset($title)) {
757                         $title = $this->encodeAttribute($title);
758                         $result .=  " title=\"$title\"";
759                 }
760                 
761                 $link_text = $this->runSpanGamut($link_text);
762                 $result .= ">$link_text</a>";
763
764                 return $this->hashPart($result);
765         }
766
767
768         function doImages($text) {
769         #
770         # Turn Markdown image shortcuts into <img> tags.
771         #
772                 #
773                 # First, handle reference-style labeled images: ![alt text][id]
774                 #
775                 $text = preg_replace_callback('{
776                         (                               # wrap whole match in $1
777                           !\[
778                                 ('.$this->nested_brackets_re.')         # alt text = $2
779                           \]
780
781                           [ ]?                          # one optional space
782                           (?:\n[ ]*)?           # one optional newline followed by spaces
783
784                           \[
785                                 (.*?)           # id = $3
786                           \]
787
788                         )
789                         }xs', 
790                         array(&$this, '_doImages_reference_callback'), $text);
791
792                 #
793                 # Next, handle inline images:  ![alt text](url "optional title")
794                 # Don't forget: encode * and _
795                 #
796                 $text = preg_replace_callback('{
797                         (                               # wrap whole match in $1
798                           !\[
799                                 ('.$this->nested_brackets_re.')         # alt text = $2
800                           \]
801                           \s?                   # One optional whitespace character
802                           \(                    # literal paren
803                                 [ ]*
804                                 (?:
805                                         <(\S*)> # src url = $3
806                                 |
807                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
808                                 )
809                                 [ ]*
810                                 (                       # $5
811                                   ([\'"])       # quote char = $6
812                                   (.*?)         # title = $7
813                                   \6            # matching quote
814                                   [ ]*
815                                 )?                      # title is optional
816                           \)
817                         )
818                         }xs',
819                         array(&$this, '_doImages_inline_callback'), $text);
820
821                 return $text;
822         }
823         function _doImages_reference_callback($matches) {
824                 $whole_match = $matches[1];
825                 $alt_text    = $matches[2];
826                 $link_id     = strtolower($matches[3]);
827
828                 if ($link_id == "") {
829                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
830                 }
831
832                 $alt_text = $this->encodeAttribute($alt_text);
833                 if (isset($this->urls[$link_id])) {
834                         $url = $this->encodeAttribute($this->urls[$link_id]);
835                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
836                         if (isset($this->titles[$link_id])) {
837                                 $title = $this->titles[$link_id];
838                                 $title = $this->encodeAttribute($title);
839                                 $result .=  " title=\"$title\"";
840                         }
841                         $result .= $this->empty_element_suffix;
842                         $result = $this->hashPart($result);
843                 }
844                 else {
845                         # If there's no such link ID, leave intact:
846                         $result = $whole_match;
847                 }
848
849                 return $result;
850         }
851         function _doImages_inline_callback($matches) {
852                 $whole_match    = $matches[1];
853                 $alt_text               = $matches[2];
854                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
855                 $title                  =& $matches[7];
856
857                 $alt_text = $this->encodeAttribute($alt_text);
858                 $url = $this->encodeAttribute($url);
859                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
860                 if (isset($title)) {
861                         $title = $this->encodeAttribute($title);
862                         $result .=  " title=\"$title\""; # $title already quoted
863                 }
864                 $result .= $this->empty_element_suffix;
865
866                 return $this->hashPart($result);
867         }
868
869
870         function doHeaders($text) {
871                 # Setext-style headers:
872                 #         Header 1
873                 #         ========
874                 #  
875                 #         Header 2
876                 #         --------
877                 #
878                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
879                         array(&$this, '_doHeaders_callback_setext'), $text);
880
881                 # atx-style headers:
882                 #       # Header 1
883                 #       ## Header 2
884                 #       ## Header 2 with closing hashes ##
885                 #       ...
886                 #       ###### Header 6
887                 #
888                 $text = preg_replace_callback('{
889                                 ^(\#{1,6})      # $1 = string of #\'s
890                                 [ ]*
891                                 (.+?)           # $2 = Header text
892                                 [ ]*
893                                 \#*                     # optional closing #\'s (not counted)
894                                 \n+
895                         }xm',
896                         array(&$this, '_doHeaders_callback_atx'), $text);
897
898                 return $text;
899         }
900         function _doHeaders_callback_setext($matches) {
901                 # Terrible hack to check we haven't found an empty list item.
902                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
903                         return $matches[0];
904                 
905                 $level = $matches[2]{0} == '=' ? 1 : 2;
906                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
907                 return "\n" . $this->hashBlock($block) . "\n\n";
908         }
909         function _doHeaders_callback_atx($matches) {
910                 $level = strlen($matches[1]);
911                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
912                 return "\n" . $this->hashBlock($block) . "\n\n";
913         }
914
915
916         function doLists($text) {
917         #
918         # Form HTML ordered (numbered) and unordered (bulleted) lists.
919         #
920                 $less_than_tab = $this->tab_width - 1;
921
922                 # Re-usable patterns to match list item bullets and number markers:
923                 $marker_ul_re  = '[*+-]';
924                 $marker_ol_re  = '\d+[.]';
925                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
926
927                 $markers_relist = array($marker_ul_re, $marker_ol_re);
928
929                 foreach ($markers_relist as $marker_re) {
930                         # Re-usable pattern to match any entirel ul or ol list:
931                         $whole_list_re = '
932                                 (                                                               # $1 = whole list
933                                   (                                                             # $2
934                                         [ ]{0,'.$less_than_tab.'}
935                                         ('.$marker_re.')                        # $3 = first list item marker
936                                         [ ]+
937                                   )
938                                   (?s:.+?)
939                                   (                                                             # $4
940                                           \z
941                                         |
942                                           \n{2,}
943                                           (?=\S)
944                                           (?!                                           # Negative lookahead for another list item marker
945                                                 [ ]*
946                                                 '.$marker_re.'[ ]+
947                                           )
948                                   )
949                                 )
950                         '; // mx
951                         
952                         # We use a different prefix before nested lists than top-level lists.
953                         # See extended comment in _ProcessListItems().
954                 
955                         if ($this->list_level) {
956                                 $text = preg_replace_callback('{
957                                                 ^
958                                                 '.$whole_list_re.'
959                                         }mx',
960                                         array(&$this, '_doLists_callback'), $text);
961                         }
962                         else {
963                                 $text = preg_replace_callback('{
964                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
965                                                 '.$whole_list_re.'
966                                         }mx',
967                                         array(&$this, '_doLists_callback'), $text);
968                         }
969                 }
970
971                 return $text;
972         }
973         function _doLists_callback($matches) {
974                 # Re-usable patterns to match list item bullets and number markers:
975                 $marker_ul_re  = '[*+-]';
976                 $marker_ol_re  = '\d+[.]';
977                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
978                 
979                 $list = $matches[1];
980                 $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
981                 
982                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
983                 
984                 $list .= "\n";
985                 $result = $this->processListItems($list, $marker_any_re);
986                 
987                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
988                 return "\n". $result ."\n\n";
989         }
990
991         var $list_level = 0;
992
993         function processListItems($list_str, $marker_any_re) {
994         #
995         #       Process the contents of a single ordered or unordered list, splitting it
996         #       into individual list items.
997         #
998                 # The $this->list_level global keeps track of when we're inside a list.
999                 # Each time we enter a list, we increment it; when we leave a list,
1000                 # we decrement. If it's zero, we're not in a list anymore.
1001                 #
1002                 # We do this because when we're not inside a list, we want to treat
1003                 # something like this:
1004                 #
1005                 #               I recommend upgrading to version
1006                 #               8. Oops, now this line is treated
1007                 #               as a sub-list.
1008                 #
1009                 # As a single paragraph, despite the fact that the second line starts
1010                 # with a digit-period-space sequence.
1011                 #
1012                 # Whereas when we're inside a list (or sub-list), that line will be
1013                 # treated as the start of a sub-list. What a kludge, huh? This is
1014                 # an aspect of Markdown's syntax that's hard to parse perfectly
1015                 # without resorting to mind-reading. Perhaps the solution is to
1016                 # change the syntax rules such that sub-lists must start with a
1017                 # starting cardinal number; e.g. "1." or "a.".
1018                 
1019                 $this->list_level++;
1020
1021                 # trim trailing blank lines:
1022                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1023
1024                 $list_str = preg_replace_callback('{
1025                         (\n)?                                                   # leading line = $1
1026                         (^[ ]*)                                                 # leading whitespace = $2
1027                         ('.$marker_any_re.'                             # list marker and space = $3
1028                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1029                         )
1030                         ((?s:.*?))                                              # list item text   = $4
1031                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1032                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1033                         }xm',
1034                         array(&$this, '_processListItems_callback'), $list_str);
1035
1036                 $this->list_level--;
1037                 return $list_str;
1038         }
1039         function _processListItems_callback($matches) {
1040                 $item = $matches[4];
1041                 $leading_line =& $matches[1];
1042                 $leading_space =& $matches[2];
1043                 $marker_space = $matches[3];
1044                 $tailing_blank_line =& $matches[5];
1045
1046                 if ($leading_line || $tailing_blank_line || 
1047                         preg_match('/\n{2,}/', $item))
1048                 {
1049                         # Replace marker with the appropriate whitespace indentation
1050                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1051                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1052                 }
1053                 else {
1054                         # Recursion for sub-lists:
1055                         $item = $this->doLists($this->outdent($item));
1056                         $item = preg_replace('/\n+$/', '', $item);
1057                         $item = $this->runSpanGamut($item);
1058                 }
1059
1060                 return "<li>" . $item . "</li>\n";
1061         }
1062
1063
1064         function doCodeBlocks($text) {
1065         #
1066         #       Process Markdown `<pre><code>` blocks.
1067         #
1068                 $text = preg_replace_callback('{
1069                                 (?:\n\n|\A\n?)
1070                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1071                                   (?>
1072                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1073                                         .*\n+
1074                                   )+
1075                                 )
1076                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1077                         }xm',
1078                         array(&$this, '_doCodeBlocks_callback'), $text);
1079
1080                 return $text;
1081         }
1082         function _doCodeBlocks_callback($matches) {
1083                 $codeblock = $matches[1];
1084
1085                 $codeblock = $this->outdent($codeblock);
1086                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1087
1088                 # trim leading newlines and trailing newlines
1089                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1090
1091                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1092                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1093         }
1094
1095
1096         function makeCodeSpan($code) {
1097         #
1098         # Create a code span markup for $code. Called from handleSpanToken.
1099         #
1100                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1101                 return $this->hashPart("<code>$code</code>");
1102         }
1103
1104
1105         var $em_relist = array(
1106                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1107                 '*' => '(?<=\S)(?<!\*)\*(?!\*)',
1108                 '_' => '(?<=\S)(?<!_)_(?!_)',
1109                 );
1110         var $strong_relist = array(
1111                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1112                 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1113                 '__' => '(?<=\S)(?<!_)__(?!_)',
1114                 );
1115         var $em_strong_relist = array(
1116                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1117                 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1118                 '___' => '(?<=\S)(?<!_)___(?!_)',
1119                 );
1120         var $em_strong_prepared_relist;
1121         
1122         function prepareItalicsAndBold() {
1123         #
1124         # Prepare regular expressions for seraching emphasis tokens in any
1125         # context.
1126         #
1127                 foreach ($this->em_relist as $em => $em_re) {
1128                         foreach ($this->strong_relist as $strong => $strong_re) {
1129                                 # Construct list of allowed token expressions.
1130                                 $token_relist = array();
1131                                 if (isset($this->em_strong_relist["$em$strong"])) {
1132                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1133                                 }
1134                                 $token_relist[] = $em_re;
1135                                 $token_relist[] = $strong_re;
1136                                 
1137                                 # Construct master expression from list.
1138                                 $token_re = '{('. implode('|', $token_relist) .')}';
1139                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1140                         }
1141                 }
1142         }
1143         
1144         function doItalicsAndBold($text) {
1145                 $token_stack = array('');
1146                 $text_stack = array('');
1147                 $em = '';
1148                 $strong = '';
1149                 $tree_char_em = false;
1150                 
1151                 while (1) {
1152                         #
1153                         # Get prepared regular expression for seraching emphasis tokens
1154                         # in current context.
1155                         #
1156                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1157                         
1158                         #
1159                         # Each loop iteration seach for the next emphasis token. 
1160                         # Each token is then passed to handleSpanToken.
1161                         #
1162                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1163                         $text_stack[0] .= $parts[0];
1164                         $token =& $parts[1];
1165                         $text =& $parts[2];
1166                         
1167                         if (empty($token)) {
1168                                 # Reached end of text span: empty stack without emitting.
1169                                 # any more emphasis.
1170                                 while ($token_stack[0]) {
1171                                         $text_stack[1] .= array_shift($token_stack);
1172                                         $text_stack[0] .= array_shift($text_stack);
1173                                 }
1174                                 break;
1175                         }
1176                         
1177                         $token_len = strlen($token);
1178                         if ($tree_char_em) {
1179                                 # Reached closing marker while inside a three-char emphasis.
1180                                 if ($token_len == 3) {
1181                                         # Three-char closing marker, close em and strong.
1182                                         array_shift($token_stack);
1183                                         $span = array_shift($text_stack);
1184                                         $span = $this->runSpanGamut($span);
1185                                         $span = "<strong><em>$span</em></strong>";
1186                                         $text_stack[0] .= $this->hashPart($span);
1187                                         $em = '';
1188                                         $strong = '';
1189                                 } else {
1190                                         # Other closing marker: close one em or strong and
1191                                         # change current token state to match the other
1192                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1193                                         $tag = $token_len == 2 ? "strong" : "em";
1194                                         $span = $text_stack[0];
1195                                         $span = $this->runSpanGamut($span);
1196                                         $span = "<$tag>$span</$tag>";
1197                                         $text_stack[0] = $this->hashPart($span);
1198                                         $$tag = ''; # $$tag stands for $em or $strong
1199                                 }
1200                                 $tree_char_em = false;
1201                         } else if ($token_len == 3) {
1202                                 if ($em) {
1203                                         # Reached closing marker for both em and strong.
1204                                         # Closing strong marker:
1205                                         for ($i = 0; $i < 2; ++$i) {
1206                                                 $shifted_token = array_shift($token_stack);
1207                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1208                                                 $span = array_shift($text_stack);
1209                                                 $span = $this->runSpanGamut($span);
1210                                                 $span = "<$tag>$span</$tag>";
1211                                                 $text_stack[0] .= $this->hashPart($span);
1212                                                 $$tag = ''; # $$tag stands for $em or $strong
1213                                         }
1214                                 } else {
1215                                         # Reached opening three-char emphasis marker. Push on token 
1216                                         # stack; will be handled by the special condition above.
1217                                         $em = $token{0};
1218                                         $strong = "$em$em";
1219                                         array_unshift($token_stack, $token);
1220                                         array_unshift($text_stack, '');
1221                                         $tree_char_em = true;
1222                                 }
1223                         } else if ($token_len == 2) {
1224                                 if ($strong) {
1225                                         # Unwind any dangling emphasis marker:
1226                                         if (strlen($token_stack[0]) == 1) {
1227                                                 $text_stack[1] .= array_shift($token_stack);
1228                                                 $text_stack[0] .= array_shift($text_stack);
1229                                         }
1230                                         # Closing strong marker:
1231                                         array_shift($token_stack);
1232                                         $span = array_shift($text_stack);
1233                                         $span = $this->runSpanGamut($span);
1234                                         $span = "<strong>$span</strong>";
1235                                         $text_stack[0] .= $this->hashPart($span);
1236                                         $strong = '';
1237                                 } else {
1238                                         array_unshift($token_stack, $token);
1239                                         array_unshift($text_stack, '');
1240                                         $strong = $token;
1241                                 }
1242                         } else {
1243                                 # Here $token_len == 1
1244                                 if ($em) {
1245                                         if (strlen($token_stack[0]) == 1) {
1246                                                 # Closing emphasis marker:
1247                                                 array_shift($token_stack);
1248                                                 $span = array_shift($text_stack);
1249                                                 $span = $this->runSpanGamut($span);
1250                                                 $span = "<em>$span</em>";
1251                                                 $text_stack[0] .= $this->hashPart($span);
1252                                                 $em = '';
1253                                         } else {
1254                                                 $text_stack[0] .= $token;
1255                                         }
1256                                 } else {
1257                                         array_unshift($token_stack, $token);
1258                                         array_unshift($text_stack, '');
1259                                         $em = $token;
1260                                 }
1261                         }
1262                 }
1263                 return $text_stack[0];
1264         }
1265
1266
1267         function doBlockQuotes($text) {
1268                 $text = preg_replace_callback('/
1269                           (                                                             # Wrap whole match in $1
1270                                 (?>
1271                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1272                                         .+\n                                    # rest of the first line
1273                                   (.+\n)*                                       # subsequent consecutive lines
1274                                   \n*                                           # blanks
1275                                 )+
1276                           )
1277                         /xm',
1278                         array(&$this, '_doBlockQuotes_callback'), $text);
1279
1280                 return $text;
1281         }
1282         function _doBlockQuotes_callback($matches) {
1283                 $bq = $matches[1];
1284                 # trim one level of quoting - trim whitespace-only lines
1285                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1286                 $bq = $this->runBlockGamut($bq);                # recurse
1287
1288                 $bq = preg_replace('/^/m', "  ", $bq);
1289                 # These leading spaces cause problem with <pre> content, 
1290                 # so we need to fix that:
1291                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1292                         array(&$this, '_DoBlockQuotes_callback2'), $bq);
1293
1294                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1295         }
1296         function _doBlockQuotes_callback2($matches) {
1297                 $pre = $matches[1];
1298                 $pre = preg_replace('/^  /m', '', $pre);
1299                 return $pre;
1300         }
1301
1302
1303         function formParagraphs($text) {
1304         #
1305         #       Params:
1306         #               $text - string to process with html <p> tags
1307         #
1308                 # Strip leading and trailing lines:
1309                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1310
1311                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1312
1313                 #
1314                 # Wrap <p> tags and unhashify HTML blocks
1315                 #
1316                 foreach ($grafs as $key => $value) {
1317                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1318                                 # Is a paragraph.
1319                                 $value = $this->runSpanGamut($value);
1320                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1321                                 $value .= "</p>";
1322                                 $grafs[$key] = $this->unhash($value);
1323                         }
1324                         else {
1325                                 # Is a block.
1326                                 # Modify elements of @grafs in-place...
1327                                 $graf = $value;
1328                                 $block = $this->html_hashes[$graf];
1329                                 $graf = $block;
1330 //                              if (preg_match('{
1331 //                                      \A
1332 //                                      (                                                       # $1 = <div> tag
1333 //                                        <div  \s+
1334 //                                        [^>]*
1335 //                                        \b
1336 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1337 //                                        1
1338 //                                        \2
1339 //                                        [^>]*
1340 //                                        >
1341 //                                      )
1342 //                                      (                                                       # $3 = contents
1343 //                                      .*
1344 //                                      )
1345 //                                      (</div>)                                        # $4 = closing tag
1346 //                                      \z
1347 //                                      }xs', $block, $matches))
1348 //                              {
1349 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1350 //
1351 //                                      # We can't call Markdown(), because that resets the hash;
1352 //                                      # that initialization code should be pulled into its own sub, though.
1353 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1354 //                                      
1355 //                                      # Run document gamut methods on the content.
1356 //                                      foreach ($this->document_gamut as $method => $priority) {
1357 //                                              $div_content = $this->$method($div_content);
1358 //                                      }
1359 //
1360 //                                      $div_open = preg_replace(
1361 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1362 //
1363 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1364 //                              }
1365                                 $grafs[$key] = $graf;
1366                         }
1367                 }
1368
1369                 return implode("\n\n", $grafs);
1370         }
1371
1372
1373         function encodeAttribute($text) {
1374         #
1375         # Encode text for a double-quoted HTML attribute. This function
1376         # is *not* suitable for attributes enclosed in single quotes.
1377         #
1378                 $text = $this->encodeAmpsAndAngles($text);
1379                 $text = str_replace('"', '&quot;', $text);
1380                 return $text;
1381         }
1382         
1383         
1384         function encodeAmpsAndAngles($text) {
1385         #
1386         # Smart processing for ampersands and angle brackets that need to 
1387         # be encoded. Valid character entities are left alone unless the
1388         # no-entities mode is set.
1389         #
1390                 if ($this->no_entities) {
1391                         $text = str_replace('&', '&amp;', $text);
1392                 } else {
1393                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1394                         # MT plugin: <http://bumppo.net/projects/amputator/>
1395                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1396                                                                 '&amp;', $text);;
1397                 }
1398                 # Encode remaining <'s
1399                 $text = str_replace('<', '&lt;', $text);
1400
1401                 return $text;
1402         }
1403
1404
1405         function doAutoLinks($text) {
1406                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1407                         array(&$this, '_doAutoLinks_url_callback'), $text);
1408
1409                 # Email addresses: <address@domain.foo>
1410                 $text = preg_replace_callback('{
1411                         <
1412                         (?:mailto:)?
1413                         (
1414                                 [-.\w\x80-\xFF]+
1415                                 \@
1416                                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1417                         )
1418                         >
1419                         }xi',
1420                         array(&$this, '_doAutoLinks_email_callback'), $text);
1421
1422                 return $text;
1423         }
1424         function _doAutoLinks_url_callback($matches) {
1425                 $url = $this->encodeAttribute($matches[1]);
1426                 $link = "<a href=\"$url\">$url</a>";
1427                 return $this->hashPart($link);
1428         }
1429         function _doAutoLinks_email_callback($matches) {
1430                 $address = $matches[1];
1431                 $link = $this->encodeEmailAddress($address);
1432                 return $this->hashPart($link);
1433         }
1434
1435
1436         function encodeEmailAddress($addr) {
1437         #
1438         #       Input: an email address, e.g. "foo@example.com"
1439         #
1440         #       Output: the email address as a mailto link, with each character
1441         #               of the address encoded as either a decimal or hex entity, in
1442         #               the hopes of foiling most address harvesting spam bots. E.g.:
1443         #
1444         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1445         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1446         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1447         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1448         #
1449         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1450         #   With some optimizations by Milian Wolff.
1451         #
1452                 $addr = "mailto:" . $addr;
1453                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1454                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1455                 
1456                 foreach ($chars as $key => $char) {
1457                         $ord = ord($char);
1458                         # Ignore non-ascii chars.
1459                         if ($ord < 128) {
1460                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1461                                 # roughly 10% raw, 45% hex, 45% dec
1462                                 # '@' *must* be encoded. I insist.
1463                                 if ($r > 90 && $char != '@') /* do nothing */;
1464                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1465                                 else              $chars[$key] = '&#'.$ord.';';
1466                         }
1467                 }
1468                 
1469                 $addr = implode('', $chars);
1470                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1471                 $addr = "<a href=\"$addr\">$text</a>";
1472
1473                 return $addr;
1474         }
1475
1476
1477         function parseSpan($str) {
1478         #
1479         # Take the string $str and parse it into tokens, hashing embeded HTML,
1480         # escaped characters and handling code spans.
1481         #
1482                 $output = '';
1483                 
1484                 $span_re = '{
1485                                 (
1486                                         \\\\'.$this->escape_chars_re.'
1487                                 |
1488                                         (?<![`\\\\])
1489                                         `+                                              # code span marker
1490                         '.( $this->no_markup ? '' : '
1491                                 |
1492                                         <!--    .*?     -->             # comment
1493                                 |
1494                                         <\?.*?\?> | <%.*?%>             # processing instruction
1495                                 |
1496                                         <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1497                                         (?>
1498                                                 \s
1499                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1500                                         )?
1501                                         >
1502                         ').'
1503                                 )
1504                                 }xs';
1505
1506                 while (1) {
1507                         #
1508                         # Each loop iteration seach for either the next tag, the next 
1509                         # openning code span marker, or the next escaped character. 
1510                         # Each token is then passed to handleSpanToken.
1511                         #
1512                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1513                         
1514                         # Create token from text preceding tag.
1515                         if ($parts[0] != "") {
1516                                 $output .= $parts[0];
1517                         }
1518                         
1519                         # Check if we reach the end.
1520                         if (isset($parts[1])) {
1521                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1522                                 $str = $parts[2];
1523                         }
1524                         else {
1525                                 break;
1526                         }
1527                 }
1528                 
1529                 return $output;
1530         }
1531         
1532         
1533         function handleSpanToken($token, &$str) {
1534         #
1535         # Handle $token provided by parseSpan by determining its nature and 
1536         # returning the corresponding value that should replace it.
1537         #
1538                 switch ($token{0}) {
1539                         case "\\":
1540                                 return $this->hashPart("&#". ord($token{1}). ";");
1541                         case "`":
1542                                 # Search for end marker in remaining text.
1543                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1544                                         $str, $matches))
1545                                 {
1546                                         $str = $matches[2];
1547                                         $codespan = $this->makeCodeSpan($matches[1]);
1548                                         return $this->hashPart($codespan);
1549                                 }
1550                                 return $token; // return as text since no ending marker found.
1551                         default:
1552                                 return $this->hashPart($token);
1553                 }
1554         }
1555
1556
1557         function outdent($text) {
1558         #
1559         # Remove one level of line-leading tabs or spaces
1560         #
1561                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1562         }
1563
1564
1565         # String length function for detab. `_initDetab` will create a function to 
1566         # hanlde UTF-8 if the default function does not exist.
1567         var $utf8_strlen = 'mb_strlen';
1568         
1569         function detab($text) {
1570         #
1571         # Replace tabs with the appropriate amount of space.
1572         #
1573                 # For each line we separate the line in blocks delemited by
1574                 # tab characters. Then we reconstruct every line by adding the 
1575                 # appropriate number of space between each blocks.
1576                 
1577                 $text = preg_replace_callback('/^.*\t.*$/m',
1578                         array(&$this, '_detab_callback'), $text);
1579
1580                 return $text;
1581         }
1582         function _detab_callback($matches) {
1583                 $line = $matches[0];
1584                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1585                 
1586                 # Split in blocks.
1587                 $blocks = explode("\t", $line);
1588                 # Add each blocks to the line.
1589                 $line = $blocks[0];
1590                 unset($blocks[0]); # Do not add first block twice.
1591                 foreach ($blocks as $block) {
1592                         # Calculate amount of space, insert spaces, insert block.
1593                         $amount = $this->tab_width - 
1594                                 $strlen($line, 'UTF-8') % $this->tab_width;
1595                         $line .= str_repeat(" ", $amount) . $block;
1596                 }
1597                 return $line;
1598         }
1599         function _initDetab() {
1600         #
1601         # Check for the availability of the function in the `utf8_strlen` property
1602         # (initially `mb_strlen`). If the function is not available, create a 
1603         # function that will loosely count the number of UTF-8 characters with a
1604         # regular expression.
1605         #
1606                 if (function_exists($this->utf8_strlen)) return;
1607                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1608                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1609                         $text, $m);');
1610         }
1611
1612
1613         function unhash($text) {
1614         #
1615         # Swap back in all the tags hashed by _HashHTMLBlocks.
1616         #
1617                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1618                         array(&$this, '_unhash_callback'), $text);
1619         }
1620         function _unhash_callback($matches) {
1621                 return $this->html_hashes[$matches[0]];
1622         }
1623
1624 }
1625
1626 /*
1627
1628 PHP Markdown
1629 ============
1630
1631 Description
1632 -----------
1633
1634 This is a PHP translation of the original Markdown formatter written in
1635 Perl by John Gruber.
1636
1637 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1638 easy-to-write structured text format into HTML. Markdown's text format
1639 is most similar to that of plain text email, and supports features such
1640 as headers, *emphasis*, code blocks, blockquotes, and links.
1641
1642 Markdown's syntax is designed not as a generic markup language, but
1643 specifically to serve as a front-end to (X)HTML. You can use span-level
1644 HTML tags anywhere in a Markdown document, and you can use block level
1645 HTML tags (like <div> and <table> as well).
1646
1647 For more information about Markdown's syntax, see:
1648
1649 <http://daringfireball.net/projects/markdown/>
1650
1651
1652 Bugs
1653 ----
1654
1655 To file bug reports please send email to:
1656
1657 <michel.fortin@michelf.com>
1658
1659 Please include with your report: (1) the example input; (2) the output you
1660 expected; (3) the output Markdown actually produced.
1661
1662
1663 Version History
1664 --------------- 
1665
1666 See the readme file for detailed release notes for this version.
1667
1668
1669 Copyright and License
1670 ---------------------
1671
1672 PHP Markdown
1673 Copyright (c) 2004-2008 Michel Fortin  
1674 <http://www.michelf.com/>  
1675 All rights reserved.
1676
1677 Based on Markdown
1678 Copyright (c) 2003-2006 John Gruber   
1679 <http://daringfireball.net/>   
1680 All rights reserved.
1681
1682 Redistribution and use in source and binary forms, with or without
1683 modification, are permitted provided that the following conditions are
1684 met:
1685
1686 *       Redistributions of source code must retain the above copyright notice,
1687         this list of conditions and the following disclaimer.
1688
1689 *       Redistributions in binary form must reproduce the above copyright
1690         notice, this list of conditions and the following disclaimer in the
1691         documentation and/or other materials provided with the distribution.
1692
1693 *       Neither the name "Markdown" nor the names of its contributors may
1694         be used to endorse or promote products derived from this software
1695         without specific prior written permission.
1696
1697 This software is provided by the copyright holders and contributors "as
1698 is" and any express or implied warranties, including, but not limited
1699 to, the implied warranties of merchantability and fitness for a
1700 particular purpose are disclaimed. In no event shall the copyright owner
1701 or contributors be liable for any direct, indirect, incidental, special,
1702 exemplary, or consequential damages (including, but not limited to,
1703 procurement of substitute goods or services; loss of use, data, or
1704 profits; or business interruption) however caused and on any theory of
1705 liability, whether in contract, strict liability, or tort (including
1706 negligence or otherwise) arising in any way out of the use of this
1707 software, even if advised of the possibility of such damage.
1708
1709 */
1710 ?>