1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

..

12 Commits
0.4.8 ... 0.5.0

4 changed files with 408 additions and 292 deletions

View File

@ -46,17 +46,17 @@ class Parsedown
function parse($text) function parse($text)
{ {
# Removes UTF-8 BOM and marker characters. # removes UTF-8 BOM and marker characters
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
# Removes \r characters. # removes \r characters
$text = str_replace("\r\n", "\n", $text); $text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text); $text = str_replace("\r", "\n", $text);
# Replaces tabs with spaces. # replaces tabs with spaces
$text = str_replace("\t", ' ', $text); $text = str_replace("\t", ' ', $text);
# Encodes escape sequences. # encodes escape sequences
if (strpos($text, '\\') !== FALSE) if (strpos($text, '\\') !== FALSE)
{ {
@ -84,7 +84,7 @@ class Parsedown
$text = $this->parse_block_elements($lines); $text = $this->parse_block_elements($lines);
# Decodes escape sequences (leaves out backslashes). # decodes escape sequences
foreach ($this->escape_sequence_map as $code => $escape_sequence) foreach ($this->escape_sequence_map as $code => $escape_sequence)
{ {
@ -110,28 +110,56 @@ class Parsedown
foreach ($lines as $line) foreach ($lines as $line)
{ {
# Block-Level HTML #
# fenced elements
if ($element['type'] === 'block' and ! isset($element['closed'])) switch ($element['type'])
{ {
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open> case 'fenced_code_block':
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close> if ( ! isset($element['closed']))
{ {
$element['depth'] > 0 if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
? $element['depth']-- {
: $element['closed'] = true; $element['closed'] = true;
} }
else
{
$element['text'] !== '' and $element['text'] .= "\n";
$element['text'] .= "\n".$line; $element['text'] .= $line;
}
continue; continue 2;
}
break;
case 'markup':
if ( ! isset($element['closed']))
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
{
$element['depth'] > 0
? $element['depth']--
: $element['closed'] = true;
}
$element['text'] .= "\n".$line;
continue 2;
}
break;
} }
# Empty # *
if ($line === '') if ($line === '')
{ {
@ -140,260 +168,332 @@ class Parsedown
continue; continue;
} }
# Lazy Blockquote #
# composite elements
if ($element['type'] === 'blockquote' and ! isset($element['interrupted'])) switch ($element['type'])
{ {
$line = preg_replace('/^[ ]*>[ ]?/', '', $line); case 'blockquote':
$element['lines'] []= $line; if ( ! isset($element['interrupted']))
continue;
}
# Lazy List Item
if ($element['type'] === 'li')
{
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
{ {
$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
$element['lines'] []= $line; $element['lines'] []= $line;
continue 2;
}
break;
case 'li':
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
{
$element['lines'] []= $line;
}
else
{
unset($element['last']);
$elements []= $element;
$element = array(
'type' => 'li',
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
),
);
}
continue 2;
}
if (isset($element['interrupted']))
{
if ($line[0] === ' ')
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue 2;
}
} }
else else
{ {
unset($element['last']); $line = preg_replace('/^[ ]{0,4}/', '', $line);
$elements []= $element;
$element = array(
'type' => 'li',
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
),
);
}
continue;
}
if (isset($element['interrupted']))
{
if ($line[0] === ' ')
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$element['lines'] []= $line; $element['lines'] []= $line;
continue; continue 2;
}
}
else
{
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$element['lines'] []= $line;
continue;
}
}
# Quick Paragraph
if ($line[0] >= 'a' or $line[0] >= 'A' and $line[0] <= 'Z')
{
goto paragraph;
}
# Code Block
if ($line[0] === ' ' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
{
if (trim($line) === '')
{
continue;
}
if ($element['type'] === 'code')
{
if (isset($element['interrupted']))
{
$element['text'] .= "\n";
unset ($element['interrupted']);
} }
$element['text'] .= "\n".$matches[1]; break;
}
else
{
$elements []= $element;
$element = array(
'type' => 'code',
'text' => $matches[1],
);
}
continue;
}
# Setext Header (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue;
}
# Atx Header (#)
if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
$level = strlen($matches[1]);
$element = array(
'type' => 'h.',
'text' => $matches[2],
'level' => $level,
);
continue;
}
# Setext Header (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue;
} }
# ~ # ~
$pure_line = $line[0] !== ' ' ? $line : ltrim($line); if ($line[0] >= 'a' and $line[0] !== '~' or $line[0] >= 'A' and $line[0] <= 'Z')
if ($pure_line === '')
{ {
continue; goto paragraph;
} }
# Link Reference # ~
if ($pure_line[0] === '[' and preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $pure_line, $matches)) $deindented_line = $line;
#
# indentation sensitive types
switch ($line[0])
{ {
$label = strtolower($matches[1]); case ' ':
$url = trim($matches[2], '<>');
$this->reference_map[$label] = $url; # ~
continue; $deindented_line = ltrim($line);
}
# Blockquote if ($deindented_line === '')
if ($pure_line[0] === '>' and preg_match('/^>[ ]?(.*)/', $pure_line, $matches))
{
if ($element['type'] === 'blockquote')
{
if (isset($element['interrupted']))
{ {
$element['lines'] []= ''; continue 2;
unset($element['interrupted']);
} }
$element['lines'] []= $matches[1]; # code block
}
else
{
$elements []= $element;
$element = array( if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
'type' => 'blockquote', {
'lines' => array( if ($element['type'] === 'code_block')
$matches[1], {
), if (isset($element['interrupted']))
); {
} $element['text'] .= "\n";
continue; unset ($element['interrupted']);
}
$element['text'] .= "\n".$matches[1];
}
else
{
$elements []= $element;
$element = array(
'type' => 'code_block',
'text' => $matches[1],
);
}
continue 2;
}
break;
case '#':
# atx heading (#)
if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
$level = strlen($matches[1]);
$element = array(
'type' => 'h.',
'text' => $matches[2],
'level' => $level,
);
continue 2;
}
break;
case '-':
# setext heading (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue 2;
}
break;
case '=':
# setext heading (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue 2;
}
break;
} }
# HTML #
# indentation insensitive types
if ($pure_line[0] === '<') switch ($deindented_line[0])
{ {
# Block-Level HTML <self-closing/> case '<':
if (preg_match('{^<.+?/>$}', $pure_line)) # self-closing tag
{
$elements []= $element;
$element = array( if (preg_match('{^<.+?/>$}', $deindented_line))
'type' => '', {
'text' => $pure_line, $elements []= $element;
);
continue; $element = array(
} 'type' => '',
'text' => $deindented_line,
);
# Block-Level HTML <open> continue 2;
}
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $pure_line, $matches)) # opening tag
{
$elements []= $element;
$element = array( if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
'type' => 'block', {
'subtype' => strtolower($matches[1]), $elements []= $element;
'text' => $pure_line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $pure_line) and $element['closed'] = true; $element = array(
'type' => 'markup',
'subtype' => strtolower($matches[1]),
'text' => $deindented_line,
'depth' => 0,
);
continue; preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
}
continue 2;
}
break;
case '>':
# quote
if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'blockquote',
'lines' => array(
$matches[1],
),
);
continue 2;
}
break;
case '[':
# reference
if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
{
$label = strtolower($matches[1]);
$this->reference_map[$label] = trim($matches[2], '<>');;
continue 2;
}
break;
case '`':
case '~':
# fenced code block
if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'fenced_code_block',
'text' => '',
'fence' => $matches[1],
);
isset($matches[2]) and $element['language'] = $matches[2];
continue 2;
}
break;
case '*':
case '+':
case '-':
case '_':
# hr
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue 2;
}
# li
if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => false,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue 2;
}
} }
# Horizontal Rule # li
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $pure_line)) if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue;
}
# List Item
if (preg_match('/^([ ]*)(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{ {
$elements []= $element; $elements []= $element;
$element = array( $element = array(
'type' => 'li', 'type' => 'li',
'ordered' => isset($matches[2][1]), 'ordered' => true,
'indentation' => $matches[1], 'indentation' => $matches[1],
'last' => true, 'last' => true,
'lines' => array( 'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]), preg_replace('/^[ ]{0,4}/', '', $matches[2]),
), ),
); );
@ -432,7 +532,7 @@ class Parsedown
$elements []= $element; $elements []= $element;
array_shift($elements); unset($elements[0]);
# #
# ~ # ~
@ -440,10 +540,67 @@ class Parsedown
$markup = ''; $markup = '';
foreach ($elements as $index => $element) foreach ($elements as $element)
{ {
switch ($element['type']) switch ($element['type'])
{ {
case 'p':
$text = $this->parse_span_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $markup === '')
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'code_block':
case 'fenced_code_block':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'h.':
$text = $this->parse_span_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
case 'li': case 'li':
if (isset($element['ordered'])) # first if (isset($element['ordered'])) # first
@ -466,62 +623,6 @@ class Parsedown
break; break;
case 'p':
$text = $this->parse_inline_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $index === 0)
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'code':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'h.':
$text = $this->parse_inline_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
default: default:
$markup .= $element['text']."\n"; $markup .= $element['text']."\n";
@ -531,13 +632,13 @@ class Parsedown
return $markup; return $markup;
} }
private function parse_inline_elements($text) private function parse_span_elements($text)
{ {
$map = array(); $map = array();
$index = 0; $index = 0;
# Code Span # code span
if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER)) if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
{ {
@ -546,17 +647,17 @@ class Parsedown
$element_text = $matches[1]; $element_text = $matches[1];
$element_text = htmlentities($element_text, ENT_NOQUOTES); $element_text = htmlentities($element_text, ENT_NOQUOTES);
# Decodes escape sequences. # decodes escape sequences
$this->escape_sequence_map $this->escape_sequence_map
and strpos($element_text, "\x1A") !== FALSE and strpos($element_text, "\x1A") !== FALSE
and $element_text = strtr($element_text, $this->escape_sequence_map); and $element_text = strtr($element_text, $this->escape_sequence_map);
# Composes element. # composes element
$element = '<code>'.$element_text.'</code>'; $element = '<code>'.$element_text.'</code>';
# Encodes element. # encodes element
$code = "\x1A".'$'.$index; $code = "\x1A".'$'.$index;
@ -568,7 +669,7 @@ class Parsedown
} }
} }
# Inline Link / Image # inline link or image
if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
{ {
@ -584,7 +685,7 @@ class Parsedown
} }
else else
{ {
$element_text = $this->parse_inline_elements($matches[3]); $element_text = $this->parse_span_elements($matches[3]);
$element = '<a href="'.$url.'">'.$element_text.'</a>'; $element = '<a href="'.$url.'">'.$element_text.'</a>';
} }
@ -601,7 +702,7 @@ class Parsedown
} }
} }
# Reference(d) Link / Image # reference link or image
if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER)) if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
{ {
@ -625,7 +726,7 @@ class Parsedown
} }
else # anchor else # anchor
{ {
$element_text = $this->parse_inline_elements($matches[2]); $element_text = $this->parse_span_elements($matches[2]);
$element = '<a href="'.$url.'">'.$element_text.'</a>'; $element = '<a href="'.$url.'">'.$element_text.'</a>';
} }
@ -643,7 +744,7 @@ class Parsedown
} }
} }
# Automatic Links # automatic link
if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER)) if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
{ {

View File

@ -1,8 +1,8 @@
## Parsedown PHP ## Parsedown
Parsedown PHP is a parser for Markdown. It reads Markdown the way people do. First, it breaks texts into lines. Then, it looks at how these lines start and relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown PHP is (very) fast and consistent. Parsedown is a Markdown parser for PHP. It is fast, consistent and easy to use.
[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/) [Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)
### Installation ### Installation

View File

@ -0,0 +1,5 @@
<pre><code>&lt;?php
$message = 'fenced code block';
echo $message;</code></pre>
<pre><code>tilde</code></pre>

View File

@ -0,0 +1,10 @@
```
<?php
$message = 'fenced code block';
echo $message;
```
~~~
tilde
~~~