# Markdown Extra - A text-to-HTML conversion tool for web writers
#
# PHP Markdown & Extra
-# Copyright (c) 2004-2008 Michel Fortin
-# <http://www.michelf.com/projects/php-markdown/>
+# Copyright (c) 2004-2012 Michel Fortin
+# <http://michelf.com/projects/php-markdown/>
#
# Original Markdown
-# Copyright (c) 2004-2006 John Gruber
+# Copyright (c) 2004-2006 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
-define( 'MARKDOWN_VERSION', "1.0.1m" ); # Sat 21 Jun 2008
-define( 'MARKDOWNEXTRA_VERSION', "1.2.3" ); # Wed 31 Dec 2008
+define( 'MARKDOWN_VERSION', "1.0.1o" ); # Sun 8 Jan 2012
+define( 'MARKDOWNEXTRA_VERSION', "1.2.5" ); # Sun 8 Jan 2012
#
@define( 'MARKDOWN_FN_LINK_CLASS', "" );
@define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
-# Enables special handling for links pointing outside of the current domain.
-@define( 'MARKDOWN_EL_ENABLE', true); # Use this feature at all?
-@define( 'MARKDOWN_EL_LOCAL_DOMAIN', null); # Leave as null to autodetect
-@define( 'MARKDOWN_EL_NEW_WINDOW', true); # Open link in a new browser?
-@define( 'MARKDOWN_EL_CSS_CLASS', 'external'); # Leave as null for no class
-
-# Enables header auto-self-linking.
-@define( 'MARKDOWN_HA_ENABLE', true ); # Use this feature at all?
-@define( 'MARKDOWN_HA_CLASS', 'hidden-selflink' ); # Leave as null for no class
-@define( 'MARKDOWN_HA_TEXT', '←' ); # The text to use as the link
-
#
# WordPress settings:
/*
Plugin Name: Markdown Extra
-Plugin URI: http://www.michelf.com/projects/php-markdown/
-Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
-Version: 1.2.2
+Plugin URI: http://michelf.com/projects/php-markdown/
+Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
+Version: 1.2.5
Author: Michel Fortin
-Author URI: http://www.michelf.com/
+Author URI: http://michelf.com/
*/
if (isset($wp_version)) {
# More details about how it works here:
- # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
-
+ # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
+
# Post content and excerpts
# - Remove WordPress paragraph generator.
# - Run Markdown on excerpt, then remove all tags.
add_filter('get_the_excerpt', 'trim', 7);
add_filter('the_excerpt', 'mdwp_add_p');
add_filter('the_excerpt_rss', 'mdwp_strip_p');
-
+
remove_filter('content_save_pre', 'balanceTags', 50);
remove_filter('excerpt_save_pre', 'balanceTags', 50);
add_filter('the_content', 'balanceTags', 50);
add_filter('get_the_excerpt', 'balanceTags', 9);
}
-
+
# Add a footnote id prefix to posts when inside a loop.
function mdwp_MarkdownPost($text) {
static $parser;
}
return $parser->transform($text);
}
-
+
# Comments
# - Remove WordPress paragraph generator.
# - Remove WordPress auto-link generator.
add_filter('get_comment_text', 'Markdown', 6);
add_filter('get_comment_excerpt', 'Markdown', 6);
add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
-
+
global $mdwp_hidden_tags, $mdwp_placeholders;
$mdwp_hidden_tags = explode(' ',
'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
}
-
+
function mdwp_add_p($text) {
if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
$text = '<p>'.$text.'</p>';
}
return $text;
}
-
+
function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
function mdwp_hide_tags($text) {
'authors' => 'Michel Fortin and John Gruber',
'licence' => 'GPL',
'version' => MARKDOWNEXTRA_VERSION,
- 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
+ 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
);
}
# Needed to insert a maximum bracked depth while converting to PHP.
var $nested_brackets_depth = 6;
var $nested_brackets_re;
-
+
var $nested_url_parenthesis_depth = 4;
var $nested_url_parenthesis_re;
# Change to ">" for HTML output.
var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
var $tab_width = MARKDOWN_TAB_WIDTH;
-
+
# Change to `true` to disallow markup or entities.
var $no_markup = false;
var $no_entities = false;
-
+
# Predefined urls and titles for reference links and images.
var $predef_urls = array();
var $predef_titles = array();
#
$this->_initDetab();
$this->prepareItalicsAndBold();
-
- $this->nested_brackets_re =
+
+ $this->nested_brackets_re =
str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
str_repeat('\])*', $this->nested_brackets_depth);
-
- $this->nested_url_parenthesis_re =
+
+ $this->nested_url_parenthesis_re =
str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
-
+
$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
-
+
# Sort document, block, and span gamut in ascendent priority order.
asort($this->document_gamut);
asort($this->block_gamut);
var $urls = array();
var $titles = array();
var $html_hashes = array();
-
+
# Status flag to avoid invalid nesting.
var $in_anchor = false;
-
-
+
+
function setup() {
#
- # Called before the transformation process starts to setup parser
+ # Called before the transformation process starts to setup parser
# states.
#
# Clear global hashes.
$this->urls = $this->predef_urls;
$this->titles = $this->predef_titles;
$this->html_hashes = array();
-
+
$in_anchor = false;
}
-
+
function teardown() {
#
- # Called after the transformation process to clear any variable
+ # Called after the transformation process to clear any variable
# which may be taking up memory unnecessarly.
#
$this->urls = array();
# and pass it through the document gamut.
#
$this->setup();
-
+
# Remove UTF-8 BOM and marker character in input, if present.
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
foreach ($this->document_gamut as $method => $priority) {
$text = $this->$method($text);
}
-
+
$this->teardown();
return $text . "\n";
}
-
+
var $document_gamut = array(
# Strip link definitions, store in hashes.
"stripLinkDefinitions" => 20,
-
+
"runBasicBlockGamut" => 30,
);
[ ]*
\n? # maybe *one* newline
[ ]*
- <?(\S+?)>? # url = $2
+ (?:
+ <(.+?)> # url = $2
+ |
+ (\S+?) # url = $3
+ )
[ ]*
\n? # maybe one newline
[ ]*
(?:
(?<=\s) # lookbehind for whitespace
["(]
- (.*?) # title = $3
+ (.*?) # title = $4
[")]
[ ]*
)? # title is optional
}
function _stripLinkDefinitions_callback($matches) {
$link_id = strtolower($matches[1]);
- $this->urls[$link_id] = $matches[2];
- $this->titles[$link_id] =& $matches[3];
+ $url = $matches[2] == '' ? $matches[3] : $matches[2];
+ $this->urls[$link_id] = $url;
+ $this->titles[$link_id] =& $matches[4];
return ''; # String that will replace the block
}
# hard-coded:
#
# * List "a" is made of tags which can be both inline or block-level.
- # These will be treated block-level when the start tag is alone on
- # its line, otherwise they're not matched here and will be taken as
+ # These will be treated block-level when the start tag is alone on
+ # its line, otherwise they're not matched here and will be taken as
# inline later.
# * List "b" is made of tags which are always block-level;
#
$block_tags_a_re = 'ins|del';
$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
- 'script|noscript|form|fieldset|iframe|math|textarea';
+ 'script|noscript|form|fieldset|iframe|math';
# Regular expression for the content of a block tag.
$nested_tags_level = 4;
|
\'[^\']*\' # text inside single quotes (tolerate ">")
)*
- )?
+ )?
';
$content =
str_repeat('
str_repeat('
</\2\s*> # closing nested tag
)
- |
+ |
<(?!/\2\s*> # other tags with a different name
)
)*',
)
( # save in $1
- # Match from `\n<tag>` to `</tag>\n`, handling nested tags
+ # Match from `\n<tag>` to `</tag>\n`, handling nested tags
# in between.
-
+
[ ]{0,'.$less_than_tab.'}
<('.$block_tags_b_re.')# start tag = $2
'.$attr.'> # attributes followed by > and \n
</\3> # the matching end tag
[ ]* # trailing spaces/tabs
(?=\n+|\Z) # followed by a newline or end of document
-
- | # Special case just for <hr />. It was easier to make a special
+
+ | # Special case just for <hr />. It was easier to make a special
# case than to make the other regex more complicated.
-
+
[ ]{0,'.$less_than_tab.'}
<(hr) # start tag = $2
'.$attr.' # attributes
/?> # the matching end tag
[ ]*
(?=\n{2,}|\Z) # followed by a blank line or end of document
-
+
| # Special case for standalone HTML comments:
-
+
[ ]{0,'.$less_than_tab.'}
(?s:
<!-- .*? -->
)
[ ]*
(?=\n{2,}|\Z) # followed by a blank line or end of document
-
+
| # PHP and ASP-style processor instructions (<? and <%)
-
+
[ ]{0,'.$less_than_tab.'}
(?s:
<([?%]) # $2
)
[ ]*
(?=\n{2,}|\Z) # followed by a blank line or end of document
-
+
)
)}Sxmi',
array(&$this, '_hashHTMLBlocks_callback'),
$key = $this->hashBlock($text);
return "\n\n$key\n\n";
}
-
-
+
+
function hashPart($text, $boundary = 'X') {
#
- # Called whenever a tag must be hashed when a function insert an atomic
+ # Called whenever a tag must be hashed when a function insert an atomic
# element in the text stream. Passing $text to through this function gives
# a unique text-token which will be reverted back when calling unhash.
#
# Swap back any tag hash found in $text so we do not have to `unhash`
# multiple times at the end.
$text = $this->unhash($text);
-
+
# Then hash the block.
static $i = 0;
$key = "$boundary\x1A" . ++$i . $boundary;
#
"doHeaders" => 10,
"doHorizontalRules" => 20,
-
+
"doLists" => 40,
"doCodeBlocks" => 50,
"doBlockQuotes" => 60,
#
# Run block gamut tranformations.
#
- # We need to escape raw HTML in Markdown source before doing anything
- # else. This need to be done for each block, and not only at the
+ # We need to escape raw HTML in Markdown source before doing anything
+ # else. This need to be done for each block, and not only at the
# begining in the Markdown function since hashed blocks can be part of
- # list items and could have been indented. Indented blocks would have
+ # list items and could have been indented. Indented blocks would have
# been seen as a code block in a previous pass of hashHTMLBlocks.
$text = $this->hashHTMLBlocks($text);
-
+
return $this->runBasicBlockGamut($text);
}
-
+
function runBasicBlockGamut($text) {
#
- # Run block gamut tranformations, without hashing HTML blocks. This is
+ # Run block gamut tranformations, without hashing HTML blocks. This is
# useful when HTML blocks are known to be already hashed, like in the first
# whole-document pass.
#
foreach ($this->block_gamut as $method => $priority) {
$text = $this->$method($text);
}
-
+
# Finally form paragraph and restore hashed blocks.
$text = $this->formParagraphs($text);
return $text;
}
-
-
+
+
function doHorizontalRules($text) {
# Do Horizontal Rules:
return preg_replace(
[ ]* # Tailing spaces
$ # End of line.
}mx',
- "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
+ "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
$text);
}
# because ![foo][f] looks like an anchor.
"doImages" => 10,
"doAnchors" => 20,
-
+
# Make links out of things like `<http://example.com/>`
# Must come after doAnchors, because you can use < and >
# delimiters in inline links like [this](<url>).
return $text;
}
-
-
+
+
function doHardBreaks($text) {
# Do hard breaks:
- return preg_replace_callback('/ {2,}\n/',
+ return preg_replace_callback('/ {2,}\n/',
array(&$this, '_doHardBreaks_callback'), $text);
}
function _doHardBreaks_callback($matches) {
#
if ($this->in_anchor) return $text;
$this->in_anchor = true;
-
+
#
# First, handle reference-style links: [link text] [id]
#
('.$this->nested_brackets_re.') # link text = $2
\]
\( # literal paren
- [ ]*
+ [ \n]*
(?:
- <(\S*)> # href = $3
+ <(.+?)> # href = $3
|
('.$this->nested_url_parenthesis_re.') # href = $4
)
- [ ]*
+ [ \n]*
( # $5
([\'"]) # quote char = $6
(.*?) # Title = $7
\6 # matching quote
- [ ]* # ignore any spaces/tabs between closing quote and )
+ [ \n]* # ignore any spaces/tabs between closing quote and )
)? # title is optional
\)
)
}xs',
- array(&$this, '_DoAnchors_inline_callback'), $text);
+ array(&$this, '_doAnchors_inline_callback'), $text);
#
# Last, handle reference-style shortcuts: [link text]
- # These must come last in case you've also got [link test][1]
- # or [link test](/foo)
+ # These must come last in case you've also got [link text][1]
+ # or [link text](/foo)
#
-// $text = preg_replace_callback('{
-// ( # wrap whole match in $1
-// \[
-// ([^\[\]]+) # link text = $2; can\'t contain [ or ]
-// \]
-// )
-// }xs',
-// array(&$this, '_doAnchors_reference_callback'), $text);
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ \[
+ ([^\[\]]+) # link text = $2; can\'t contain [ or ]
+ \]
+ )
+ }xs',
+ array(&$this, '_doAnchors_reference_callback'), $text);
$this->in_anchor = false;
return $text;
# for shortcut links like [this][] or [this].
$link_id = $link_text;
}
-
+
# lower-case and turn embedded newlines into spaces
$link_id = strtolower($link_id);
$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
if (isset($this->urls[$link_id])) {
$url = $this->urls[$link_id];
$url = $this->encodeAttribute($url);
-
+
$result = "<a href=\"$url\"";
if ( isset( $this->titles[$link_id] ) ) {
$title = $this->titles[$link_id];
$title = $this->encodeAttribute($title);
$result .= " title=\"$title\"";
}
-
+
$link_text = $this->runSpanGamut($link_text);
$result .= ">$link_text</a>";
$result = $this->hashPart($result);
$title = $this->encodeAttribute($title);
$result .= " title=\"$title\"";
}
-
+
$link_text = $this->runSpanGamut($link_text);
$result .= ">$link_text</a>";
\]
)
- }xs',
+ }xs',
array(&$this, '_doImages_reference_callback'), $text);
#
\]
\s? # One optional whitespace character
\( # literal paren
- [ ]*
+ [ \n]*
(?:
<(\S*)> # src url = $3
|
('.$this->nested_url_parenthesis_re.') # src url = $4
)
- [ ]*
+ [ \n]*
( # $5
([\'"]) # quote char = $6
(.*?) # title = $7
\6 # matching quote
- [ ]*
+ [ \n]*
)? # title is optional
\)
)
# Setext-style headers:
# Header 1
# ========
- #
+ #
# Header 2
# --------
#
# Terrible hack to check we haven't found an empty list item.
if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
return $matches[0];
-
+
$level = $matches[2]{0} == '=' ? 1 : 2;
$block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
return "\n" . $this->hashBlock($block) . "\n\n";
# Re-usable patterns to match list item bullets and number markers:
$marker_ul_re = '[*+-]';
- $marker_ol_re = '\d+[.]';
+ $marker_ol_re = '\d+[\.]';
$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
- $markers_relist = array($marker_ul_re, $marker_ol_re);
+ $markers_relist = array(
+ $marker_ul_re => $marker_ol_re,
+ $marker_ol_re => $marker_ul_re,
+ );
- foreach ($markers_relist as $marker_re) {
+ foreach ($markers_relist as $marker_re => $other_marker_re) {
# Re-usable pattern to match any entirel ul or ol list:
$whole_list_re = '
( # $1 = whole list
( # $2
- [ ]{0,'.$less_than_tab.'}
- ('.$marker_re.') # $3 = first list item marker
+ ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
+ ('.$marker_re.') # $4 = first list item marker
[ ]+
)
(?s:.+?)
- ( # $4
+ ( # $5
\z
|
\n{2,}
[ ]*
'.$marker_re.'[ ]+
)
+ |
+ (?= # Lookahead for another kind of list
+ \n
+ \3 # Must have the same indentation
+ '.$other_marker_re.'[ ]+
+ )
)
)
'; // mx
-
+
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
-
+
if ($this->list_level) {
$text = preg_replace_callback('{
^
function _doLists_callback($matches) {
# Re-usable patterns to match list item bullets and number markers:
$marker_ul_re = '[*+-]';
- $marker_ol_re = '\d+[.]';
+ $marker_ol_re = '\d+[\.]';
$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
-
+
$list = $matches[1];
- $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
-
+ $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
+
$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
-
+
$list .= "\n";
$result = $this->processListItems($list, $marker_any_re);
-
+
$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
return "\n". $result ."\n\n";
}
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g. "1." or "a.".
-
+
$this->list_level++;
# trim trailing blank lines:
$marker_space = $matches[3];
$tailing_blank_line =& $matches[5];
- if ($leading_line || $tailing_blank_line ||
+ if ($leading_line || $tailing_blank_line ||
preg_match('/\n{2,}/', $item))
{
# Replace marker with the appropriate whitespace indentation
var $em_relist = array(
- '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
- '*' => '(?<=\S)(?<!\*)\*(?!\*)',
- '_' => '(?<=\S)(?<!_)_(?!_)',
+ '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
+ '_' => '(?<=\S|^)(?<!_)_(?!_)',
);
var $strong_relist = array(
- '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
- '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
- '__' => '(?<=\S)(?<!_)__(?!_)',
+ '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
+ '__' => '(?<=\S|^)(?<!_)__(?!_)',
);
var $em_strong_relist = array(
- '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
- '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
- '___' => '(?<=\S)(?<!_)___(?!_)',
+ '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
+ '___' => '(?<=\S|^)(?<!_)___(?!_)',
);
var $em_strong_prepared_relist;
-
+
function prepareItalicsAndBold() {
#
- # Prepare regular expressions for seraching emphasis tokens in any
+ # Prepare regular expressions for searching emphasis tokens in any
# context.
#
foreach ($this->em_relist as $em => $em_re) {
}
$token_relist[] = $em_re;
$token_relist[] = $strong_re;
-
+
# Construct master expression from list.
$token_re = '{('. implode('|', $token_relist) .')}';
$this->em_strong_prepared_relist["$em$strong"] = $token_re;
}
}
}
-
+
function doItalicsAndBold($text) {
$token_stack = array('');
$text_stack = array('');
$em = '';
$strong = '';
$tree_char_em = false;
-
+
while (1) {
#
# Get prepared regular expression for seraching emphasis tokens
# in current context.
#
$token_re = $this->em_strong_prepared_relist["$em$strong"];
-
+
#
- # Each loop iteration seach for the next emphasis token.
+ # Each loop iteration search for the next emphasis token.
# Each token is then passed to handleSpanToken.
#
$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
$text_stack[0] .= $parts[0];
$token =& $parts[1];
$text =& $parts[2];
-
+
if (empty($token)) {
# Reached end of text span: empty stack without emitting.
# any more emphasis.
}
break;
}
-
+
$token_len = strlen($token);
if ($tree_char_em) {
# Reached closing marker while inside a three-char emphasis.
$$tag = ''; # $$tag stands for $em or $strong
}
} else {
- # Reached opening three-char emphasis marker. Push on token
+ # Reached opening three-char emphasis marker. Push on token
# stack; will be handled by the special condition above.
$em = $token{0};
$strong = "$em$em";
$bq = $this->runBlockGamut($bq); # recurse
$bq = preg_replace('/^/m', " ", $bq);
- # These leading spaces cause problem with <pre> content,
+ # These leading spaces cause problem with <pre> content,
# so we need to fix that:
- $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
- array(&$this, '_DoBlockQuotes_callback2'), $bq);
+ $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
+ array(&$this, '_doBlockQuotes_callback2'), $bq);
return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
}
// # We can't call Markdown(), because that resets the hash;
// # that initialization code should be pulled into its own sub, though.
// $div_content = $this->hashHTMLBlocks($div_content);
-//
+//
// # Run document gamut methods on the content.
// foreach ($this->document_gamut as $method => $priority) {
// $div_content = $this->$method($div_content);
$text = str_replace('"', '"', $text);
return $text;
}
-
-
+
+
function encodeAmpsAndAngles($text) {
#
- # Smart processing for ampersands and angle brackets that need to
+ # Smart processing for ampersands and angle brackets that need to
# be encoded. Valid character entities are left alone unless the
# no-entities mode is set.
#
} else {
# Ampersand-encoding based entirely on Nat Irons's Amputator
# MT plugin: <http://bumppo.net/projects/amputator/>
- $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
+ $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
'&', $text);;
}
# Encode remaining <'s
function doAutoLinks($text) {
- $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
+ $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
array(&$this, '_doAutoLinks_url_callback'), $text);
# Email addresses: <address@domain.foo>
<
(?:mailto:)?
(
- [-.\w\x80-\xFF]+
+ (?:
+ [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
+ |
+ ".*?"
+ )
\@
- [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+ (?:
+ [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+ |
+ \[[\d.a-fA-F:]+\] # IPv4 & IPv6
+ )
)
>
}xi',
$addr = "mailto:" . $addr;
$chars = preg_split('/(?<!^)(?!$)/', $addr);
$seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
-
+
foreach ($chars as $key => $char) {
$ord = ord($char);
# Ignore non-ascii chars.
else $chars[$key] = '&#'.$ord.';';
}
}
-
+
$addr = implode('', $chars);
$text = implode('', array_slice($chars, 7)); # text without `mailto:`
$addr = "<a href=\"$addr\">$text</a>";
# escaped characters and handling code spans.
#
$output = '';
-
+
$span_re = '{
(
\\\\'.$this->escape_chars_re.'
while (1) {
#
- # Each loop iteration seach for either the next tag, the next
- # openning code span marker, or the next escaped character.
+ # Each loop iteration seach for either the next tag, the next
+ # openning code span marker, or the next escaped character.
# Each token is then passed to handleSpanToken.
#
$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
-
+
# Create token from text preceding tag.
if ($parts[0] != "") {
$output .= $parts[0];
}
-
+
# Check if we reach the end.
if (isset($parts[1])) {
$output .= $this->handleSpanToken($parts[1], $parts[2]);
break;
}
}
-
+
return $output;
}
-
-
+
+
function handleSpanToken($token, &$str) {
#
- # Handle $token provided by parseSpan by determining its nature and
+ # Handle $token provided by parseSpan by determining its nature and
# returning the corresponding value that should replace it.
#
switch ($token{0}) {
return $this->hashPart("&#". ord($token{1}). ";");
case "`":
# Search for end marker in remaining text.
- if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
+ if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
$str, $matches))
{
$str = $matches[2];
}
- # String length function for detab. `_initDetab` will create a function to
+ # String length function for detab. `_initDetab` will create a function to
# hanlde UTF-8 if the default function does not exist.
var $utf8_strlen = 'mb_strlen';
-
+
function detab($text) {
#
# Replace tabs with the appropriate amount of space.
#
# For each line we separate the line in blocks delemited by
- # tab characters. Then we reconstruct every line by adding the
+ # tab characters. Then we reconstruct every line by adding the
# appropriate number of space between each blocks.
-
+
$text = preg_replace_callback('/^.*\t.*$/m',
array(&$this, '_detab_callback'), $text);
function _detab_callback($matches) {
$line = $matches[0];
$strlen = $this->utf8_strlen; # strlen function for UTF-8.
-
+
# Split in blocks.
$blocks = explode("\t", $line);
# Add each blocks to the line.
unset($blocks[0]); # Do not add first block twice.
foreach ($blocks as $block) {
# Calculate amount of space, insert spaces, insert block.
- $amount = $this->tab_width -
+ $amount = $this->tab_width -
$strlen($line, 'UTF-8') % $this->tab_width;
$line .= str_repeat(" ", $amount) . $block;
}
function _initDetab() {
#
# Check for the availability of the function in the `utf8_strlen` property
- # (initially `mb_strlen`). If the function is not available, create a
+ # (initially `mb_strlen`). If the function is not available, create a
# function that will loosely count the number of UTF-8 characters with a
# regular expression.
#
if (function_exists($this->utf8_strlen)) return;
$this->utf8_strlen = create_function('$text', 'return preg_match_all(
- "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
+ "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
$text, $m);');
}
#
# Swap back in all the tags hashed by _HashHTMLBlocks.
#
- return preg_replace_callback('/(.)\x1A[0-9]+\1/',
+ return preg_replace_callback('/(.)\x1A[0-9]+\1/',
array(&$this, '_unhash_callback'), $text);
}
function _unhash_callback($matches) {
# Prefix for footnote ids.
var $fn_id_prefix = "";
-
+
# Optional title attribute for footnote links and backlinks.
var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
-
+
# Optional class attribute for footnote links and backlinks.
var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
-
- var $el_enable = MARKDOWN_EL_ENABLE;
- var $el_local_domain = MARKDOWN_EL_LOCAL_DOMAIN;
- var $el_new_window = MARKDOWN_EL_NEW_WINDOW;
- var $el_css_class = MARKDOWN_EL_CSS_CLASS;
-
- var $ha_enable = MARKDOWN_HA_ENABLE;
- var $ha_class = MARKDOWN_HA_CLASS;
- var $ha_text = MARKDOWN_HA_TEXT;
-
+
# Predefined abbreviations.
var $predef_abbr = array();
#
# Constructor function. Initialize the parser object.
#
- # Add extra escapable characters before parent constructor
+ # Add extra escapable characters before parent constructor
# initialize the table.
$this->escape_chars .= ':|';
-
- if ($this->el_local_domain === null) {
- $this->el_local_domain = $_SERVER['SERVER_NAME'];
- }
-
- # Insert extra document, block, and span transformations.
+
+ # Insert extra document, block, and span transformations.
# Parent constructor will do the sorting.
$this->document_gamut += array(
"doFencedCodeBlocks" => 5,
"doFootnotes" => 5,
"doAbbreviations" => 70,
);
-
+
parent::Markdown_Parser();
}
-
-
+
+
# Extra variables used during extra transformations.
var $footnotes = array();
var $footnotes_ordered = array();
var $abbr_desciptions = array();
var $abbr_word_re = '';
-
+
# Give the current footnote number.
var $footnote_counter = 1;
-
-
+
+
function setup() {
#
# Setting up Extra-specific variables.
#
parent::setup();
-
+
$this->footnotes = array();
$this->footnotes_ordered = array();
$this->abbr_desciptions = array();
$this->abbr_word_re = '';
$this->footnote_counter = 1;
-
+
foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
if ($this->abbr_word_re)
$this->abbr_word_re .= '|';
$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
}
}
-
+
function teardown() {
#
# Clearing Extra-specific variables.
$this->footnotes_ordered = array();
$this->abbr_desciptions = array();
$this->abbr_word_re = '';
-
+
parent::teardown();
}
-
-
+
+
### HTML Block Parser ###
-
+
# Tags that are always treated as block tags:
var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
-
+
# Tags treated as block tags only if the opening tag is alone on it's line:
var $context_block_tags_re = 'script|noscript|math|ins|del';
-
+
# Tags where markdown="1" default to span mode:
var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
-
- # Tags which must not have their contents modified, no matter where
+
+ # Tags which must not have their contents modified, no matter where
# they appear:
var $clean_tags_re = 'script|math';
-
+
# Tags that do not need to be closed.
var $auto_close_tags_re = 'hr|img';
-
+
function hashHTMLBlocks($text) {
#
# hard-coded.
#
# This works by calling _HashHTMLBlocks_InMarkdown, which then calls
- # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
+ # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
# attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
# _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
# These two functions are calling each other. It's recursive!
# Call the HTML-in-Markdown hasher.
#
list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
-
+
return $text;
}
- function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
+ function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
$enclosing_tag_re = '', $span = false)
{
#
# Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
#
- # * $indent is the number of space to be ignored when checking for code
- # blocks. This is important because if we don't take the indent into
+ # * $indent is the number of space to be ignored when checking for code
+ # blocks. This is important because if we don't take the indent into
# account, something like this (which looks right) won't work as expected:
#
# <div>
# If you don't like this, just don't indent the tag on which
# you apply the markdown="1" attribute.
#
- # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
+ # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
# tag with that name. Nested tags supported.
#
- # * If $span is true, text inside must treated as span. So any double
- # newline will be replaced by a single newline so that it does not create
+ # * If $span is true, text inside must treated as span. So any double
+ # newline will be replaced by a single newline so that it does not create
# paragraphs.
#
# Returns an array of that form: ( processed text , remaining text )
# Regex to check for the presense of newlines around a block tag.
$newline_before_re = '/(?:^\n?|\n\n)*$/';
- $newline_after_re =
+ $newline_after_re =
'{
^ # Start of text following the tag.
(?>[ ]*<!--.*?-->)? # Optional comment.
[ ]*\n # Must be followed by newline.
}xs';
-
+
# Regex to match any tag.
$block_tag_re =
'{
'. ( !$span ? ' # If not in span.
|
# Indented code block
- (?> ^[ ]*\n? | \n[ ]*\n )
+ (?: ^[ ]*\n | ^ | \n[ ]*\n )
[ ]{'.($indent+4).'}[^\n]* \n
(?>
(?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
|
# Fenced code block marker
(?> ^ | \n )
- [ ]{'.($indent).'}~~~+[ ]*\n
+ [ ]{0,'.($indent).'}~~~+[ ]*\n
' : '' ). ' # End (if not is span).
)
}xs';
-
+
$depth = 0; # Current depth inside the tag tree.
$parsed = ""; # Parsed text that will be returned.
#
# Split the text using the first $tag_match pattern found.
# Text before pattern will be first in the array, text after
- # pattern will be at the end, and between will be any catches made
+ # pattern will be at the end, and between will be any catches made
# by the pattern.
#
- $parts = preg_split($block_tag_re, $text, 2,
+ $parts = preg_split($block_tag_re, $text, 2,
PREG_SPLIT_DELIM_CAPTURE);
-
- # If in Markdown span mode, add a empty-string span-level hash
+
+ # If in Markdown span mode, add a empty-string span-level hash
# after each newline to prevent triggering any block element.
if ($span) {
$void = $this->hashPart("", ':');
$newline = "$void\n";
$parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
}
-
+
$parsed .= $parts[0]; # Text before current tag.
-
+
# If end of $text has been reached. Stop loop.
if (count($parts) < 3) {
$text = "";
break;
}
-
+
$tag = $parts[1]; # Tag to handle.
$text = $parts[2]; # Remaining text after current tag.
$tag_re = preg_quote($tag); # For use in a regular expression.
-
+
#
# Check for: Code span marker
#
}
}
#
- # Check for: Indented code block or fenced code block marker.
+ # Check for: Fenced code block marker.
#
- else if ($tag{0} == "\n" || $tag{0} == "~") {
- if ($tag{1} == "\n" || $tag{1} == " ") {
- # Indented code block: pass it unchanged, will be handled
- # later.
- $parsed .= $tag;
+ else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) {
+ # Fenced code block marker: find matching end marker.
+ $tag_re = preg_quote(trim($tag));
+ if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text,
+ $matches))
+ {
+ # End marker found: pass text unchanged until marker.
+ $parsed .= $tag . $matches[0];
+ $text = substr($text, strlen($matches[0]));
}
else {
- # Fenced code block marker: find matching end marker.
- $tag_re = preg_quote(trim($tag));
- if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
- $matches))
- {
- # End marker found: pass text unchanged until marker.
- $parsed .= $tag . $matches[0];
- $text = substr($text, strlen($matches[0]));
- }
- else {
- # No end marker: just skip it.
- $parsed .= $tag;
- }
+ # No end marker: just skip it.
+ $parsed .= $tag;
}
}
#
+ # Check for: Indented code block.
+ #
+ else if ($tag{0} == "\n" || $tag{0} == " ") {
+ # Indented code block: pass it unchanged, will be handled
+ # later.
+ $parsed .= $tag;
+ }
+ #
# Check for: Opening Block level tag or
- # Opening Context Block tag (like ins and del)
+ # Opening Context Block tag (like ins and del)
# used as a block tag (tag is alone on it's line).
#
else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
)
{
# Need to parse tag and following text using the HTML parser.
- list($block_text, $text) =
+ list($block_text, $text) =
$this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
-
+
# Make sure it stays outside of any paragraph by adding newlines.
$parsed .= "\n\n$block_text\n\n";
}
{
# Need to parse tag and following text using the HTML parser.
# (don't check for markdown attribute)
- list($block_text, $text) =
+ list($block_text, $text) =
$this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
-
+
$parsed .= $block_text;
}
#
$text = $tag . $text;
break;
}
-
+
$parsed .= $tag;
}
else {
$parsed .= $tag;
}
} while ($depth >= 0);
-
+
return array($parsed, $text);
}
function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
# Returns an array of that form: ( processed text , remaining text )
#
if ($text === '') return array('', '');
-
+
# Regex to match `markdown` attribute inside of a tag.
$markdown_attr_re = '
{
markdown
\s*=\s*
(?>
- (["\']) # $1: quote delimiter
+ (["\']) # $1: quote delimiter
(.*?) # $2: attribute value
- \1 # matching delimiter
+ \1 # matching delimiter
|
([^\s>]*) # $3: unquoted attribute value
)
() # $4: make $3 always defined (avoid warnings)
}xs';
-
+
# Regex to match any tag.
$tag_re = '{
( # $2: Capture hole tag.
<!\[CDATA\[.*?\]\]> # CData Block
)
}xs';
-
+
$original_text = $text; # Save original text in case of faliure.
-
+
$depth = 0; # Current depth inside the tag tree.
$block_text = ""; # Temporary text holder for current text.
$parsed = ""; # Parsed text that will be returned.
#
# Split the text using the first $tag_match pattern found.
# Text before pattern will be first in the array, text after
- # pattern will be at the end, and between will be any catches made
+ # pattern will be at the end, and between will be any catches made
# by the pattern.
#
$parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
-
+
if (count($parts) < 3) {
#
# End of $text reached with unbalenced tag(s).
# In that case, we return original text unchanged and pass the
- # first character as filtered to prevent an infinite loop in the
+ # first character as filtered to prevent an infinite loop in the
# parent function.
#
return array($original_text{0}, substr($original_text, 1));
}
-
+
$block_text .= $parts[0]; # Text before current tag.
$tag = $parts[1]; # Tag to handle.
$text = $parts[2]; # Remaining text after current tag.
-
+
#
# Check for: Auto-close tag (like <hr/>)
# Comments and Processing Instructions.
if ($tag{1} == '/') $depth--;
else if ($tag{strlen($tag)-2} != '/') $depth++;
}
-
+
#
# Check for `markdown="1"` attribute and handle it.
#
- if ($md_attr &&
+ if ($md_attr &&
preg_match($markdown_attr_re, $tag, $attr_m) &&
preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
{
# Remove `markdown` attribute from opening tag.
$tag = preg_replace($markdown_attr_re, '', $tag);
-
+
# Check if text inside this tag must be parsed in span mode.
$this->mode = $attr_m[2] . $attr_m[3];
$span_mode = $this->mode == 'span' || $this->mode != 'block' &&
preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
-
+
# Calculate indent before tag.
if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
$strlen = $this->utf8_strlen;
} else {
$indent = 0;
}
-
+
# End preceding block with this tag.
$block_text .= $tag;
$parsed .= $this->$hash_method($block_text);
-
+
# Get enclosing tag name for the ParseMarkdown function.
# (This pattern makes $tag_name_re safe without quoting.)
preg_match('/^<([\w:$]*)\b/', $tag, $matches);
$tag_name_re = $matches[1];
-
+
# Parse the content using the HTML-in-Markdown parser.
list ($block_text, $text)
- = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
+ = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
$tag_name_re, $span_mode);
-
+
# Outdent markdown text.
if ($indent > 0) {
- $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
+ $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
$block_text);
}
-
+
# Append tag content to parsed text.
if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
else $parsed .= "$block_text";
-
+
# Start over a new block.
$block_text = "";
}
else $block_text .= $tag;
}
-
+
} while ($depth > 0);
-
+
#
# Hash last block text that wasn't processed inside the loop.
#
$parsed .= $this->$hash_method($block_text);
-
+
return array($parsed, $text);
}
function hashClean($text) {
#
# Called whenever a tag must be hashed when a function insert a "clean" tag
- # in $text, it pass through this function and is automaticaly escaped,
+ # in $text, it pass through this function and is automaticaly escaped,
# blocking invalid nested overlap.
#
return $this->hashPart($text, 'C');
}
- function _doAnchors_inline_callback($matches) {
- // $whole_match = $matches[1];
- $link_text = $this->runSpanGamut($matches[2]);
- $url = $matches[3] == '' ? $matches[4] : $matches[3];
- $title =& $matches[7];
-
- $url = $this->encodeAttribute($url);
-
- $result = "<a href=\"$url\"";
- if (isset($title)) {
- $title = $this->encodeAttribute($title);
- $result .= " title=\"$title\"";
- }
-
- if ($this->el_enable && preg_match('/^https?\:\/\//', $url) && !preg_match('/^https?\:\/\/'.$this->el_local_domain.'/', $url)) {
- if ($this->el_new_window) {
- $result .= ' target="_blank"';
- }
-
- if ($this->el_css_class) {
- $result .= ' class="'.$this->el_css_class.'"';
- }
- }
-
- $link_text = $this->runSpanGamut($link_text);
- $result .= ">$link_text</a>";
-
- return $this->hashPart($result);
- }
-
- function _doAnchors_reference_callback($matches) {
- $whole_match = $matches[1];
- $link_text = $matches[2];
- $link_id =& $matches[3];
- $result = '';
-
- if ($link_id == "") {
- # for shortcut links like [this][] or [this].
- $link_id = $link_text;
- }
-
- # lower-case and turn embedded newlines into spaces
- $link_id = strtolower($link_id);
- $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
-
- if (isset($this->urls[$link_id])) {
- $url = $this->urls[$link_id];
- $url = $this->encodeAttribute($url);
-
- $result = "<a href=\"$url\"";
- if ( isset( $this->titles[$link_id] ) ) {
- $title = $this->titles[$link_id];
- $title = $this->encodeAttribute($title);
- $result .= " title=\"$title\"";
- }
-
- if ($this->el_enable && preg_match('/^https?\:\/\//', $url) && !preg_match('/^https?\:\/\/'.$this->el_local_domain.'/', $url)) {
- if ($this->el_new_window) {
- $result .= ' target="_blank"';
- }
-
- if ($this->el_css_class) {
- $result .= ' class="'.$this->el_css_class.'"';
- }
- }
-
- $link_text = $this->runSpanGamut($link_text);
- $result .= ">$link_text</a>";
- $result = $this->hashPart($result);
- }
- else {
- $result = $whole_match;
- }
- return $result;
- }
function doHeaders($text) {
#
# Setext-style headers:
# Header 1 {#header1}
# ========
- #
+ #
# Header 2 {#header2}
# --------
#
return $matches[0];
$level = $matches[3]{0} == '=' ? 1 : 2;
$attr = $this->_doHeaders_attr($id =& $matches[2]);
- $body = $this->runSpanGamut($matches[1]);
- $body = $this->_doHeaders_selflink($id, $body);
-
- $block = "<h$level$attr>$body</h$level>";
+ $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
return "\n" . $this->hashBlock($block) . "\n\n";
}
function _doHeaders_callback_atx($matches) {
$level = strlen($matches[1]);
$attr = $this->_doHeaders_attr($id =& $matches[3]);
- $body = $this->runSpanGamut($matches[2]);
- $body = $this->_doHeaders_selflink($id, $body);
-
- $block = "<h$level$attr>$body</h$level>";
+ $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
return "\n" . $this->hashBlock($block) . "\n\n";
}
- function _doHeaders_selflink($id, $body) {
- if (!empty($id)) {
- $link = '<a href="#'.$id.'"';
-
- if ($this->ha_class) {
- $link .= ' class="'.$this->ha_class.'"';
- }
-
- $link .= '>'.$this->ha_text.'</a>';
-
- $body .= $link;
- }
-
- return $body;
- }
function doTables($text) {
[ ]{0,'.$less_than_tab.'} # Allowed whitespace.
[|] # Optional leading pipe (present)
(.+) \n # $1: Header row (at least one pipe)
-
+
[ ]{0,'.$less_than_tab.'} # Allowed whitespace.
[|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
-
+
( # $3: Cells
(?>
[ ]* # Allowed whitespace.
(?=\n|\Z) # Stop at final double newline.
}xm',
array(&$this, '_doTable_leadingPipe_callback'), $text);
-
+
#
# Find tables without leading pipe.
#
^ # Start of a line
[ ]{0,'.$less_than_tab.'} # Allowed whitespace.
(\S.*[|].*) \n # $1: Header row (at least one pipe)
-
+
[ ]{0,'.$less_than_tab.'} # Allowed whitespace.
([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
-
+
( # $3: Cells
(?>
.* [|] .* \n # Row content
$head = $matches[1];
$underline = $matches[2];
$content = $matches[3];
-
+
# Remove leading pipe for each row.
$content = preg_replace('/^ *[|]/m', '', $content);
-
+
return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
}
function _doTable_callback($matches) {
$head = preg_replace('/[|] *$/m', '', $head);
$underline = preg_replace('/[|] *$/m', '', $underline);
$content = preg_replace('/[|] *$/m', '', $content);
-
+
# Reading alignement from header underline.
$separators = preg_split('/ *[|] */', $underline);
foreach ($separators as $n => $s) {
else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
else $attr[$n] = '';
}
-
- # Parsing span elements, including code spans, character escapes,
+
+ # Parsing span elements, including code spans, character escapes,
# and inline HTML tags, so that pipes inside those gets ignored.
$head = $this->parseSpan($head);
$headers = preg_split('/ *[|] */', $head);
$col_count = count($headers);
-
+
# Write column headers.
$text = "<table>\n";
$text .= "<thead>\n";
$text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
$text .= "</tr>\n";
$text .= "</thead>\n";
-
+
# Split content by row.
$rows = explode("\n", trim($content, "\n"));
-
+
$text .= "<tbody>\n";
foreach ($rows as $row) {
- # Parsing span elements, including code spans, character escapes,
+ # Parsing span elements, including code spans, character escapes,
# and inline HTML tags, so that pipes inside those gets ignored.
$row = $this->parseSpan($row);
-
+
# Split row by cell.
$row_cells = preg_split('/ *[|] */', $row, $col_count);
$row_cells = array_pad($row_cells, $col_count, '');
-
+
$text .= "<tr>\n";
foreach ($row_cells as $n => $cell)
$text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
}
$text .= "</tbody>\n";
$text .= "</table>";
-
+
return $this->hashBlock($text) . "\n";
}
-
+
function doDefLists($text) {
#
# Form HTML definition lists.
function _doDefLists_callback($matches) {
# Re-usable patterns to match list item bullets and number markers:
$list = $matches[1];
-
+
# Turn double returns into triple returns, so that we can make a
# paragraph for the last item in a list, if necessary:
$result = trim($this->processDefListItems($list));
# into individual term and definition list items.
#
$less_than_tab = $this->tab_width - 1;
-
+
# trim trailing blank lines:
$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
(?>\A\n?|\n\n+) # leading line
( # definition terms = $1
[ ]{0,'.$less_than_tab.'} # leading whitespace
- (?![:][ ]|[ ]) # negative lookahead for a definition
+ (?![:][ ]|[ ]) # negative lookahead for a definition
# mark (colon) or more whitespace.
- (?> \S.* \n)+? # actual term (not whitespace).
- )
- (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
+ (?> \S.* \n)+? # actual term (not whitespace).
+ )
+ (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
# with a definition mark.
}xm',
array(&$this, '_processDefListItems_callback_dt'), $list_str);
(?= \n+ # stop at next definition mark,
(?: # next term or end of text
[ ]{0,'.$less_than_tab.'} [:][ ] |
- <dt | \z
- )
- )
+ <dt> | \z
+ )
+ )
}xm',
array(&$this, '_processDefListItems_callback_dd'), $list_str);
return $list_str;
}
function _processDefListItems_callback_dt($matches) {
- $anchor_regexp = '/\{\#([-_:a-zA-Z0-9]+)\}/';
$terms = explode("\n", trim($matches[1]));
$text = '';
- $id = array();
-
foreach ($terms as $term) {
- $id = '';
- if (preg_match($anchor_regexp, $term, $id) > 0) {
- $term = preg_replace($anchor_regexp, '', $term);
- $id = ' id="'.trim($id[1]).'"';
- }
-
- if (count($id) === 0) {
- $id = '';
- }
-
$term = $this->runSpanGamut(trim($term));
- $text .= "\n<dt$id>" . $term . "</dt>";
+ $text .= "\n<dt>" . $term . "</dt>";
}
return $text . "\n";
}
# ~~~
#
$less_than_tab = $this->tab_width;
-
+
$text = preg_replace_callback('{
(?:\n|\A)
# 1: Opening marker
~{3,} # Marker: three tilde or more.
)
[ ]* \n # Whitespace and newline following marker.
-
+
# 2: Content
(
(?>
.*\n+
)+
)
-
+
# Closing marker.
\1 [ ]* \n
}xm',
return "\n\n".$this->hashBlock($codeblock)."\n\n";
}
function _doFencedCodeBlocks_newlines($matches) {
- return str_repeat("<br$this->empty_element_suffix",
+ return str_repeat("<br$this->empty_element_suffix",
strlen($matches[0]));
}
# work in the middle of a word.
#
var $em_relist = array(
- '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
- '*' => '(?<=\S)(?<!\*)\*(?!\*)',
- '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
+ '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
+ '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
);
var $strong_relist = array(
- '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
- '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
- '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
+ '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
+ '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
);
var $em_strong_relist = array(
- '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
- '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
- '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
+ '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
+ '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
+ '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
);
#
# Strip leading and trailing lines:
$text = preg_replace('/\A\n+|\n+\z/', '', $text);
-
+
$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
#
#
foreach ($grafs as $key => $value) {
$value = trim($this->runSpanGamut($value));
-
+
# Check if this should be enclosed in a paragraph.
# Clean tag hashes & block tag hashes are left alone.
$is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
-
+
if ($is_p) {
$value = "<p>$value</p>";
}
$grafs[$key] = $value;
}
-
- # Join grafs in one text, then unhash HTML tags.
+
+ # Join grafs in one text, then unhash HTML tags.
$text = implode("\n\n", $grafs);
-
+
# Finish by removing any tag hashes still present in $text.
$text = $this->unhash($text);
-
+
return $text;
}
-
-
+
+
### Footnotes
-
+
function stripFootnotes($text) {
#
# Strips link definitions from text, stores the URLs and titles in
[ ]*
\n? # maybe *one* newline
( # text = $2 (no blank lines allowed)
- (?:
+ (?:
.+ # actual text
|
- \n # newlines but
+ \n # newlines but
(?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
- (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
+ (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
# by non-indented content
)*
- )
+ )
}xm',
array(&$this, '_stripFootnotes_callback'),
$text);
function doFootnotes($text) {
#
- # Replace footnote references in $text [^id] with a special text-token
+ # Replace footnote references in $text [^id] with a special text-token
# which will be replaced by the actual footnote marker in appendFootnotes.
#
if (!$this->in_anchor) {
return $text;
}
-
+
function appendFootnotes($text) {
#
# Append footnote list to text.
#
- $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
+ $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
array(&$this, '_appendFootnotes_callback'), $text);
-
+
if (!empty($this->footnotes_ordered)) {
$text .= "\n\n";
$text .= "<div class=\"footnotes\">\n";
- $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
+ $text .= "<hr". $this->empty_element_suffix ."\n";
$text .= "<ol>\n\n";
-
+
$attr = " rev=\"footnote\"";
if ($this->fn_backlink_class != "") {
$class = $this->fn_backlink_class;
$attr .= " title=\"$title\"";
}
$num = 0;
-
+
while (!empty($this->footnotes_ordered)) {
$footnote = reset($this->footnotes_ordered);
$note_id = key($this->footnotes_ordered);
unset($this->footnotes_ordered[$note_id]);
-
+
$footnote .= "\n"; # Need to append newline before parsing.
- $footnote = $this->runBlockGamut("$footnote\n");
- $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
+ $footnote = $this->runBlockGamut("$footnote\n");
+ $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
array(&$this, '_appendFootnotes_callback'), $footnote);
-
+
$attr = str_replace("%%", ++$num, $attr);
$note_id = $this->encodeAttribute($note_id);
-
+
# Add backlink to last paragraph; create new paragraph if needed.
$backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
if (preg_match('{</p>$}', $footnote)) {
} else {
$footnote .= "\n\n<p>$backlink</p>";
}
-
+
$text .= "<li id=\"fn:$note_id\">\n";
$text .= $footnote . "\n";
$text .= "</li>\n\n";
}
-
+
$text .= "</ol>\n";
$text .= "</div>";
}
}
function _appendFootnotes_callback($matches) {
$node_id = $this->fn_id_prefix . $matches[1];
-
+
# Create footnote marker only if it has a corresponding footnote *and*
# the footnote hasn't been used by another marker.
if (isset($this->footnotes[$node_id])) {
# Transfert footnote content to the ordered list.
$this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
unset($this->footnotes[$node_id]);
-
+
$num = $this->footnote_counter++;
$attr = " rel=\"footnote\"";
if ($this->fn_link_class != "") {
$title = $this->encodeAttribute($title);
$attr .= " title=\"$title\"";
}
-
+
$attr = str_replace("%%", $num, $attr);
$node_id = $this->encodeAttribute($node_id);
-
+
return
"<sup id=\"fnref:$node_id\">".
"<a href=\"#fn:$node_id\"$attr>$num</a>".
"</sup>";
}
-
+
return "[^".$matches[1]."]";
}
-
-
+
+
### Abbreviations ###
-
+
function stripAbbreviations($text) {
#
# Strips abbreviations from text, stores titles in hash references.
# Link defs are in the form: [id]*: url "optional title"
$text = preg_replace_callback('{
^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
- (.*) # text = $2 (no blank lines allowed)
+ (.*) # text = $2 (no blank lines allowed)
}xm',
array(&$this, '_stripAbbreviations_callback'),
$text);
$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
return ''; # String that will replace the block
}
-
-
+
+
function doAbbreviations($text) {
#
# Find defined abbreviations in text and wrap them in <abbr> elements.
#
if ($this->abbr_word_re) {
- // cannot use the /x modifier because abbr_word_re may
+ // cannot use the /x modifier because abbr_word_re may
// contain significant spaces:
$text = preg_replace_callback('{'.
'(?<![\w\x1A])'.
'(?:'.$this->abbr_word_re.')'.
'(?![\w\x1A])'.
- '}',
+ '}',
array(&$this, '_doAbbreviations_callback'), $text);
}
return $text;
Description
-----------
-This is a PHP port of the original Markdown formatter written in Perl
-by John Gruber. This special "Extra" version of PHP Markdown features
-further enhancements to the syntax for making additional constructs
+This is a PHP port of the original Markdown formatter written in Perl
+by John Gruber. This special "Extra" version of PHP Markdown features
+further enhancements to the syntax for making additional constructs
such as tables and definition list.
Markdown is a text-to-HTML filter; it translates an easy-to-read /
Version History
----------------
+---------------
See the readme file for detailed release notes for this version.
Copyright and License
---------------------
-PHP Markdown & Extra
-Copyright (c) 2004-2008 Michel Fortin
-<http://www.michelf.com/>
+PHP Markdown & Extra
+Copyright (c) 2004-2009 Michel Fortin
+<http://michelf.com/>
All rights reserved.
-Based on Markdown
-Copyright (c) 2003-2006 John Gruber
-<http://daringfireball.net/>
+Based on Markdown
+Copyright (c) 2003-2006 John Gruber
+<http://daringfireball.net/>
All rights reserved.
Redistribution and use in source and binary forms, with or without
software, even if advised of the possibility of such damage.
*/
-?>
+?>
\ No newline at end of file