1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

...

15 Commits
0.4.8 ... 0.6.0

6 changed files with 447 additions and 338 deletions

View File

@ -5,7 +5,3 @@ php:
- 5.4
- 5.3
- 5.2
matrix:
allow_failures:
- php: 5.2

View File

@ -46,17 +46,17 @@ class Parsedown
function parse($text)
{
# Removes UTF-8 BOM and marker characters.
# removes UTF-8 BOM and marker characters
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
# Removes \r characters.
# removes \r characters
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
# Replaces tabs with spaces.
# replaces tabs with spaces
$text = str_replace("\t", ' ', $text);
# Encodes escape sequences.
# encodes escape sequences
if (strpos($text, '\\') !== FALSE)
{
@ -84,7 +84,7 @@ class Parsedown
$text = $this->parse_block_elements($lines);
# Decodes escape sequences (leaves out backslashes).
# decodes escape sequences
foreach ($this->escape_sequence_map as $code => $escape_sequence)
{
@ -110,16 +110,41 @@ class Parsedown
foreach ($lines as $line)
{
# Block-Level HTML
#
# fenced elements
if ($element['type'] === 'block' and ! isset($element['closed']))
switch ($element['type'])
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open>
case 'fenced_code_block':
if ( ! isset($element['closed']))
{
if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
{
$element['closed'] = true;
}
else
{
$element['text'] !== '' and $element['text'] .= "\n";
$element['text'] .= $line;
}
continue 2;
}
break;
case 'markup':
if ( ! isset($element['closed']))
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close>
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
{
$element['depth'] > 0
? $element['depth']--
@ -128,10 +153,13 @@ class Parsedown
$element['text'] .= "\n".$line;
continue;
continue 2;
}
# Empty
break;
}
# *
if ($line === '')
{
@ -140,21 +168,26 @@ class Parsedown
continue;
}
# Lazy Blockquote
#
# composite elements
if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
switch ($element['type'])
{
case 'blockquote':
if ( ! isset($element['interrupted']))
{
$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
$element['lines'] []= $line;
continue;
continue 2;
}
# Lazy List Item
break;
case 'li':
if ($element['type'] === 'li')
{
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
@ -177,7 +210,7 @@ class Parsedown
);
}
continue;
continue 2;
}
if (isset($element['interrupted']))
@ -186,40 +219,48 @@ class Parsedown
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue;
continue 2;
}
}
else
{
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue;
}
continue 2;
}
# Quick Paragraph
break;
}
if ($line[0] >= 'a' or $line[0] >= 'A' and $line[0] <= 'Z')
#
# indentation sensitive types
$deindented_line = $line;
switch ($line[0])
{
goto paragraph;
case ' ':
# ~
$deindented_line = ltrim($line);
if ($deindented_line === '')
{
continue 2;
}
# Code Block
# code block
if ($line[0] === ' ' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
{
if (trim($line) === '')
{
continue;
}
if ($element['type'] === 'code')
if ($element['type'] === 'code_block')
{
if (isset($element['interrupted']))
{
@ -235,27 +276,21 @@ class Parsedown
$elements []= $element;
$element = array(
'type' => 'code',
'type' => 'code_block',
'text' => $matches[1],
);
}
continue;
continue 2;
}
# Setext Header (---)
break;
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
case '#':
continue;
}
# atx heading (#)
# Atx Header (#)
if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
@ -267,56 +302,86 @@ class Parsedown
'level' => $level,
);
continue;
continue 2;
}
# Setext Header (===)
break;
case '-':
# setext heading (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue 2;
}
break;
case '=':
# setext heading (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue;
continue 2;
}
# ~
$pure_line = $line[0] !== ' ' ? $line : ltrim($line);
if ($pure_line === '')
{
continue;
break;
}
# Link Reference
#
# indentation insensitive types
if ($pure_line[0] === '[' and preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $pure_line, $matches))
switch ($deindented_line[0])
{
$label = strtolower($matches[1]);
$url = trim($matches[2], '<>');
case '<':
$this->reference_map[$label] = $url;
# self-closing tag
continue;
if (preg_match('{^<.+?/>$}', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $deindented_line,
);
continue 2;
}
# Blockquote
# opening tag
if ($pure_line[0] === '>' and preg_match('/^>[ ]?(.*)/', $pure_line, $matches))
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
{
if ($element['type'] === 'blockquote')
{
if (isset($element['interrupted']))
{
$element['lines'] []= '';
$elements []= $element;
unset($element['interrupted']);
$element = array(
'type' => 'markup',
'subtype' => strtolower($matches[1]),
'text' => $deindented_line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
continue 2;
}
$element['lines'] []= $matches[1];
}
else
break;
case '>':
# quote
if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
{
$elements []= $element;
@ -326,51 +391,57 @@ class Parsedown
$matches[1],
),
);
continue 2;
}
continue;
}
break;
# HTML
case '[':
if ($pure_line[0] === '<')
# reference
if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
{
# Block-Level HTML <self-closing/>
$label = strtolower($matches[1]);
if (preg_match('{^<.+?/>$}', $pure_line))
$this->reference_map[$label] = trim($matches[2], '<>');;
continue 2;
}
break;
case '`':
case '~':
# fenced code block
if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $pure_line,
'type' => 'fenced_code_block',
'text' => '',
'fence' => $matches[1],
);
continue;
isset($matches[2]) and $element['language'] = $matches[2];
continue 2;
}
# Block-Level HTML <open>
break;
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $pure_line, $matches))
{
$elements []= $element;
case '*':
case '+':
case '-':
case '_':
$element = array(
'type' => 'block',
'subtype' => strtolower($matches[1]),
'text' => $pure_line,
'depth' => 0,
);
# hr
preg_match('{</'.$matches[1].'>\s*$}', $pure_line) and $element['closed'] = true;
continue;
}
}
# Horizontal Rule
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $pure_line))
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
{
$elements []= $element;
@ -378,31 +449,49 @@ class Parsedown
'type' => 'hr',
);
continue;
continue 2;
}
# List Item
# li
if (preg_match('/^([ ]*)(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => isset($matches[2][1]),
'ordered' => false,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue 2;
}
}
# li
if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => true,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue;
}
# ~
paragraph:
# paragraph
if ($element['type'] === 'p')
{
@ -432,7 +521,7 @@ class Parsedown
$elements []= $element;
array_shift($elements);
unset($elements[0]);
#
# ~
@ -440,10 +529,67 @@ class Parsedown
$markup = '';
foreach ($elements as $index => $element)
foreach ($elements as $element)
{
switch ($element['type'])
{
case 'p':
$text = $this->parse_span_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $markup === '')
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'code_block':
case 'fenced_code_block':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'h.':
$text = $this->parse_span_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
case 'li':
if (isset($element['ordered'])) # first
@ -466,62 +612,6 @@ class Parsedown
break;
case 'p':
$text = $this->parse_inline_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $index === 0)
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'code':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'h.':
$text = $this->parse_inline_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
default:
$markup .= $element['text']."\n";
@ -531,13 +621,13 @@ class Parsedown
return $markup;
}
private function parse_inline_elements($text)
private function parse_span_elements($text)
{
$map = array();
$index = 0;
# Code Span
# code span
if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
{
@ -546,17 +636,17 @@ class Parsedown
$element_text = $matches[1];
$element_text = htmlentities($element_text, ENT_NOQUOTES);
# Decodes escape sequences.
# decodes escape sequences
$this->escape_sequence_map
and strpos($element_text, "\x1A") !== FALSE
and $element_text = strtr($element_text, $this->escape_sequence_map);
# Composes element.
# composes element
$element = '<code>'.$element_text.'</code>';
# Encodes element.
# encodes element
$code = "\x1A".'$'.$index;
@ -568,7 +658,7 @@ class Parsedown
}
}
# Inline Link / Image
# inline link or image
if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
{
@ -584,7 +674,7 @@ class Parsedown
}
else
{
$element_text = $this->parse_inline_elements($matches[3]);
$element_text = $this->parse_span_elements($matches[3]);
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
@ -601,7 +691,7 @@ class Parsedown
}
}
# Reference(d) Link / Image
# reference link or image
if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
{
@ -625,7 +715,7 @@ class Parsedown
}
else # anchor
{
$element_text = $this->parse_inline_elements($matches[2]);
$element_text = $this->parse_span_elements($matches[2]);
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
@ -643,7 +733,7 @@ class Parsedown
}
}
# Automatic Links
# automatic link
if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
{

View File

@ -1,6 +1,6 @@
## Parsedown PHP
## Parsedown
Parsedown PHP is a parser for Markdown. It reads Markdown the way people do. First, it breaks texts into lines. Then, it looks at how these lines start and relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown PHP is (very) fast and consistent.
Parsedown is a Markdown parser for PHP. It is fast, consistent and easy to use.
[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)

View File

@ -20,20 +20,29 @@ class Test extends PHPUnit_Framework_TestCase
{
$provider = array();
$DirectoryIterator = new DirectoryIterator(__DIR__ . '/' . self::provider_dir);
$path = dirname(__FILE__).'/';
$DirectoryIterator = new DirectoryIterator($path . '/' . self::provider_dir);
foreach ($DirectoryIterator as $Item)
{
if ($Item->isFile() and $Item->getExtension() === 'md')
if ($Item->isFile())
{
$filename = $Item->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
continue;
$basename = $Item->getBasename('.md');
$markdown = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.md');
$markdown = file_get_contents($path . '/' . self::provider_dir . $basename . '.md');
if (!$markdown)
continue;
$expected_markup = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.html');
$expected_markup = file_get_contents($path . '/' . self::provider_dir . $basename . '.html');
$expected_markup = str_replace("\r\n", "\n", $expected_markup);
$expected_markup = str_replace("\r", "\n", $expected_markup);
@ -44,4 +53,3 @@ class Test extends PHPUnit_Framework_TestCase
return $provider;
}
}

View File

@ -0,0 +1,5 @@
<pre><code>&lt;?php
$message = 'fenced code block';
echo $message;</code></pre>
<pre><code>tilde</code></pre>

View File

@ -0,0 +1,10 @@
```
<?php
$message = 'fenced code block';
echo $message;
```
~~~
tilde
~~~