1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

...

15 Commits
0.4.8 ... 0.6.0

6 changed files with 447 additions and 338 deletions

View File

@ -4,8 +4,4 @@ php:
- 5.5
- 5.4
- 5.3
- 5.2
matrix:
allow_failures:
- php: 5.2
- 5.2

View File

@ -46,17 +46,17 @@ class Parsedown
function parse($text)
{
# Removes UTF-8 BOM and marker characters.
# removes UTF-8 BOM and marker characters
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
# Removes \r characters.
# removes \r characters
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
# Replaces tabs with spaces.
# replaces tabs with spaces
$text = str_replace("\t", ' ', $text);
# Encodes escape sequences.
# encodes escape sequences
if (strpos($text, '\\') !== FALSE)
{
@ -84,7 +84,7 @@ class Parsedown
$text = $this->parse_block_elements($lines);
# Decodes escape sequences (leaves out backslashes).
# decodes escape sequences
foreach ($this->escape_sequence_map as $code => $escape_sequence)
{
@ -110,28 +110,56 @@ class Parsedown
foreach ($lines as $line)
{
# Block-Level HTML
#
# fenced elements
if ($element['type'] === 'block' and ! isset($element['closed']))
switch ($element['type'])
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open>
{
$element['depth']++;
}
case 'fenced_code_block':
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close>
{
$element['depth'] > 0
? $element['depth']--
: $element['closed'] = true;
}
if ( ! isset($element['closed']))
{
if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
{
$element['closed'] = true;
}
else
{
$element['text'] !== '' and $element['text'] .= "\n";
$element['text'] .= "\n".$line;
$element['text'] .= $line;
}
continue;
continue 2;
}
break;
case 'markup':
if ( ! isset($element['closed']))
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
{
$element['depth'] > 0
? $element['depth']--
: $element['closed'] = true;
}
$element['text'] .= "\n".$line;
continue 2;
}
break;
}
# Empty
# *
if ($line === '')
{
@ -140,269 +168,330 @@ class Parsedown
continue;
}
# Lazy Blockquote
#
# composite elements
if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
switch ($element['type'])
{
$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
case 'blockquote':
$element['lines'] []= $line;
continue;
}
# Lazy List Item
if ($element['type'] === 'li')
{
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
if ( ! isset($element['interrupted']))
{
$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
$element['lines'] []= $line;
continue 2;
}
break;
case 'li':
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
{
$element['lines'] []= $line;
}
else
{
unset($element['last']);
$elements []= $element;
$element = array(
'type' => 'li',
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
),
);
}
continue 2;
}
if (isset($element['interrupted']))
{
if ($line[0] === ' ')
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue 2;
}
}
else
{
unset($element['last']);
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue 2;
}
break;
}
#
# indentation sensitive types
$deindented_line = $line;
switch ($line[0])
{
case ' ':
# ~
$deindented_line = ltrim($line);
if ($deindented_line === '')
{
continue 2;
}
# code block
if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
{
if ($element['type'] === 'code_block')
{
if (isset($element['interrupted']))
{
$element['text'] .= "\n";
unset ($element['interrupted']);
}
$element['text'] .= "\n".$matches[1];
}
else
{
$elements []= $element;
$element = array(
'type' => 'code_block',
'text' => $matches[1],
);
}
continue 2;
}
break;
case '#':
# atx heading (#)
if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
$level = strlen($matches[1]);
$element = array(
'type' => 'h.',
'text' => $matches[2],
'level' => $level,
);
continue 2;
}
break;
case '-':
# setext heading (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue 2;
}
break;
case '=':
# setext heading (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue 2;
}
break;
}
#
# indentation insensitive types
switch ($deindented_line[0])
{
case '<':
# self-closing tag
if (preg_match('{^<.+?/>$}', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $deindented_line,
);
continue 2;
}
# opening tag
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'markup',
'subtype' => strtolower($matches[1]),
'text' => $deindented_line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
continue 2;
}
break;
case '>':
# quote
if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'blockquote',
'lines' => array(
$matches[1],
),
);
continue 2;
}
break;
case '[':
# reference
if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
{
$label = strtolower($matches[1]);
$this->reference_map[$label] = trim($matches[2], '<>');;
continue 2;
}
break;
case '`':
case '~':
# fenced code block
if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'fenced_code_block',
'text' => '',
'fence' => $matches[1],
);
isset($matches[2]) and $element['language'] = $matches[2];
continue 2;
}
break;
case '*':
case '+':
case '-':
case '_':
# hr
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue 2;
}
# li
if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => false,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue 2;
}
continue;
}
if (isset($element['interrupted']))
{
if ($line[0] === ' ')
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$element['lines'] []= $line;
continue;
}
}
else
{
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$element['lines'] []= $line;
continue;
}
}
# Quick Paragraph
# li
if ($line[0] >= 'a' or $line[0] >= 'A' and $line[0] <= 'Z')
{
goto paragraph;
}
# Code Block
if ($line[0] === ' ' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
{
if (trim($line) === '')
{
continue;
}
if ($element['type'] === 'code')
{
if (isset($element['interrupted']))
{
$element['text'] .= "\n";
unset ($element['interrupted']);
}
$element['text'] .= "\n".$matches[1];
}
else
{
$elements []= $element;
$element = array(
'type' => 'code',
'text' => $matches[1],
);
}
continue;
}
# Setext Header (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue;
}
# Atx Header (#)
if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
$level = strlen($matches[1]);
$element = array(
'type' => 'h.',
'text' => $matches[2],
'level' => $level,
);
continue;
}
# Setext Header (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue;
}
# ~
$pure_line = $line[0] !== ' ' ? $line : ltrim($line);
if ($pure_line === '')
{
continue;
}
# Link Reference
if ($pure_line[0] === '[' and preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $pure_line, $matches))
{
$label = strtolower($matches[1]);
$url = trim($matches[2], '<>');
$this->reference_map[$label] = $url;
continue;
}
# Blockquote
if ($pure_line[0] === '>' and preg_match('/^>[ ]?(.*)/', $pure_line, $matches))
{
if ($element['type'] === 'blockquote')
{
if (isset($element['interrupted']))
{
$element['lines'] []= '';
unset($element['interrupted']);
}
$element['lines'] []= $matches[1];
}
else
{
$elements []= $element;
$element = array(
'type' => 'blockquote',
'lines' => array(
$matches[1],
),
);
}
continue;
}
# HTML
if ($pure_line[0] === '<')
{
# Block-Level HTML <self-closing/>
if (preg_match('{^<.+?/>$}', $pure_line))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $pure_line,
);
continue;
}
# Block-Level HTML <open>
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $pure_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'block',
'subtype' => strtolower($matches[1]),
'text' => $pure_line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $pure_line) and $element['closed'] = true;
continue;
}
}
# Horizontal Rule
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $pure_line))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue;
}
# List Item
if (preg_match('/^([ ]*)(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => isset($matches[2][1]),
'ordered' => true,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue;
}
# ~
paragraph:
# paragraph
if ($element['type'] === 'p')
{
@ -432,7 +521,7 @@ class Parsedown
$elements []= $element;
array_shift($elements);
unset($elements[0]);
#
# ~
@ -440,10 +529,67 @@ class Parsedown
$markup = '';
foreach ($elements as $index => $element)
foreach ($elements as $element)
{
switch ($element['type'])
{
case 'p':
$text = $this->parse_span_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $markup === '')
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'code_block':
case 'fenced_code_block':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'h.':
$text = $this->parse_span_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
case 'li':
if (isset($element['ordered'])) # first
@ -466,62 +612,6 @@ class Parsedown
break;
case 'p':
$text = $this->parse_inline_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $index === 0)
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'code':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'h.':
$text = $this->parse_inline_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
default:
$markup .= $element['text']."\n";
@ -531,13 +621,13 @@ class Parsedown
return $markup;
}
private function parse_inline_elements($text)
private function parse_span_elements($text)
{
$map = array();
$index = 0;
# Code Span
# code span
if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
{
@ -546,17 +636,17 @@ class Parsedown
$element_text = $matches[1];
$element_text = htmlentities($element_text, ENT_NOQUOTES);
# Decodes escape sequences.
# decodes escape sequences
$this->escape_sequence_map
and strpos($element_text, "\x1A") !== FALSE
and $element_text = strtr($element_text, $this->escape_sequence_map);
# Composes element.
# composes element
$element = '<code>'.$element_text.'</code>';
# Encodes element.
# encodes element
$code = "\x1A".'$'.$index;
@ -568,7 +658,7 @@ class Parsedown
}
}
# Inline Link / Image
# inline link or image
if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
{
@ -584,7 +674,7 @@ class Parsedown
}
else
{
$element_text = $this->parse_inline_elements($matches[3]);
$element_text = $this->parse_span_elements($matches[3]);
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
@ -601,7 +691,7 @@ class Parsedown
}
}
# Reference(d) Link / Image
# reference link or image
if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
{
@ -625,7 +715,7 @@ class Parsedown
}
else # anchor
{
$element_text = $this->parse_inline_elements($matches[2]);
$element_text = $this->parse_span_elements($matches[2]);
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
@ -643,7 +733,7 @@ class Parsedown
}
}
# Automatic Links
# automatic link
if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
{

View File

@ -1,8 +1,8 @@
## Parsedown PHP
## Parsedown
Parsedown PHP is a parser for Markdown. It reads Markdown the way people do. First, it breaks texts into lines. Then, it looks at how these lines start and relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown PHP is (very) fast and consistent.
Parsedown is a Markdown parser for PHP. It is fast, consistent and easy to use.
[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)
[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)
### Installation

View File

@ -5,7 +5,7 @@ include 'Parsedown.php';
class Test extends PHPUnit_Framework_TestCase
{
const provider_dir = 'data/';
/**
* @dataProvider provider
*/
@ -15,33 +15,41 @@ class Test extends PHPUnit_Framework_TestCase
$this->assertEquals($expected_markup, $actual_markup);
}
function provider()
{
$provider = array();
$DirectoryIterator = new DirectoryIterator(__DIR__ . '/' . self::provider_dir);
$path = dirname(__FILE__).'/';
$DirectoryIterator = new DirectoryIterator($path . '/' . self::provider_dir);
foreach ($DirectoryIterator as $Item)
{
if ($Item->isFile() and $Item->getExtension() === 'md')
if ($Item->isFile())
{
$filename = $Item->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
continue;
$basename = $Item->getBasename('.md');
$markdown = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.md');
$markdown = file_get_contents($path . '/' . self::provider_dir . $basename . '.md');
if (!$markdown)
continue;
$expected_markup = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.html');
$expected_markup = file_get_contents($path . '/' . self::provider_dir . $basename . '.html');
$expected_markup = str_replace("\r\n", "\n", $expected_markup);
$expected_markup = str_replace("\r", "\n", $expected_markup);
$provider [] = array($markdown, $expected_markup);
}
}
return $provider;
}
}
}

View File

@ -0,0 +1,5 @@
<pre><code>&lt;?php
$message = 'fenced code block';
echo $message;</code></pre>
<pre><code>tilde</code></pre>

View File

@ -0,0 +1,10 @@
```
<?php
$message = 'fenced code block';
echo $message;
```
~~~
tilde
~~~