1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

..

50 Commits
0.3.0 ... 0.7.2

Author SHA1 Message Date
3225c66863 ***strong em** inside of em* should produce valid markup 2013-11-23 13:19:06 +02:00
d6dc5ba25b update introduction text to match website 2013-11-23 09:26:44 +02:00
f5451a9eff Merge pull request #37 from hkdobrev/htmlspecialshars-utf8 2013-11-22 13:23:21 -08:00
849a89b121 Use UTF-8 encoding for htmlspecialchars. See #36.
Prior to PHP 5.4.0 the default encoding for `htmlentities()`
and `htmlspecialchars` is "ISO-8859-1". For PHP 5.4+ is "UTF-8".

This ensures always the right encoding is used no matter the PHP version
and the locale settings.
2013-11-22 23:06:20 +02:00
28064a63b3 simplify encoding of special characters 2013-11-22 21:57:21 +02:00
800aac5b56 Merge pull request #36 from josephok/patch-1 2013-11-22 11:21:38 -08:00
b15d40e8a3 Update Parsedown.php
Changes the htmlentities() to htmlspecialchars(). The htmlentities() has some problems encoding non-english words(like Chinese)
2013-11-22 23:05:26 +08:00
ddc5b7e2dd implement URL auto-linking 2013-11-22 00:20:45 +02:00
5a563008aa implement GFM strikethrough 2013-11-21 13:39:00 +02:00
b6f795962f resolve #21 2013-11-21 00:59:30 +02:00
cdb2646063 update readme to match website 2013-11-20 23:10:03 +02:00
e3b8026e39 build should no longer allow failures 2013-11-18 22:39:44 +02:00
d96f668c42 update test case to make it run on PHP 5.2 2013-11-18 22:29:15 +02:00
96bf75bd91 remove goto to provide support for PHP 5.2 2013-11-18 21:42:00 +02:00
67b51794d8 implement fenced code block to resolve #2 2013-11-17 16:52:31 +02:00
a9d6232705 array_shift » unset to simplify code base and improve performance 2013-11-17 13:21:49 +02:00
b91629ad94 organize evaluation blocks into switch statements to improve code readability 2013-11-17 12:48:01 +02:00
24d300ea5d $pure_line » $deindented_line 2013-11-17 01:52:40 +02:00
d54712b989 simplify comments 2013-11-17 01:52:40 +02:00
6ef043ba7d arrange compile cases 2013-11-17 01:52:40 +02:00
fe27b70bdb block » markup 2013-11-17 01:52:40 +02:00
18d3dbf4f6 simplify comments 2013-11-17 01:52:40 +02:00
4758f58f73 remove double semicolons 2013-11-17 01:52:40 +02:00
5fa3eb1b2f parse_inline_elements » parse_span_elements to match the specs 2013-11-17 01:52:40 +02:00
38300323a6 simplify readme 2013-11-16 18:45:13 +02:00
96609329b9 improve readme 2013-11-16 09:51:01 +02:00
e497acb6dc escape sequences with double digit codes do not get decoded properly 2013-11-16 02:05:31 +02:00
30e436ec7d simplify tests 2013-11-16 02:05:31 +02:00
3972f18881 improve readme 2013-11-14 00:50:00 +02:00
4fb12be60a improve introduction 2013-11-13 01:52:59 +02:00
f8b07611d3 homepage » home 2013-11-13 01:47:38 +02:00
21d7f75f5b improve readme 2013-11-13 01:38:29 +02:00
a4fb0651d5 resolve #27 2013-11-13 01:07:39 +02:00
50a58eab16 Merge pull request #29 from hkdobrev/multi-line-emphasis
Allow multi-line emphasis. Fix #28.
2013-11-12 12:46:05 -08:00
1f347e17eb Allow multi-line emphasis. Fix #28. 2013-11-12 19:22:17 +02:00
df3db71698 add 5.2 to PHP versions to test against 2013-11-10 11:02:18 +02:00
a37f5ff31e improve tests 2013-11-10 10:44:52 +02:00
8e6f4cf7b8 leading spaces should not get trimmed 2013-11-09 22:23:56 +02:00
ee9a1e92c0 remove goto comment 2013-11-09 00:40:13 +02:00
689ef24cc5 strip trailing spaces 2013-11-08 23:40:00 +02:00
4403fe4d96 labels of reference links should be case insensitive 2013-11-08 21:59:26 +02:00
400c8f7d46 simplify regex for inline link in attempt to resolve #23 2013-11-08 00:24:40 +02:00
379cbf34b3 parse_block_elements doesn't have to use ltrim on lines with no indentation 2013-11-07 22:48:15 +02:00
b6c8cac512 optimize quick paragraph 2013-11-07 22:46:01 +02:00
0e9202689e escaping of "<" breaks span-level html 2013-11-05 21:40:33 +02:00
7249d02cff code blocks get unwanted empty lines 2013-11-05 10:21:48 +02:00
ecf86b073e error when last line consists of 1-3 spaces 2013-11-05 10:17:19 +02:00
b12973415f parse link references as blocks to improve performance 2013-11-05 00:57:16 +02:00
6d113f47fb rearrange block types to optimize performance 2013-11-04 09:28:50 +02:00
d4d3612710 escaping for special characters 2013-11-03 17:32:45 +02:00
78 changed files with 931 additions and 962 deletions

View File

@ -4,3 +4,4 @@ php:
- 5.5
- 5.4
- 5.3
- 5.2

View File

@ -46,17 +46,17 @@ class Parsedown
function parse($text)
{
# Removes UTF-8 BOM and marker characters.
# removes UTF-8 BOM and marker characters
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
# Removes \r characters.
# removes \r characters
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
# Replaces tabs with spaces.
# replaces tabs with spaces
$text = str_replace("\t", ' ', $text);
# Encodes escape sequences.
# encodes escape sequences
if (strpos($text, '\\') !== FALSE)
{
@ -66,7 +66,7 @@ class Parsedown
{
if (strpos($text, $escape_sequence) !== FALSE)
{
$code = "\x1A".'\\'.$index;
$code = "\x1A".'\\'.$index.';';
$text = str_replace($escape_sequence, $code, $text);
@ -75,18 +75,6 @@ class Parsedown
}
}
# Extracts link references.
if (preg_match_all('/^[ ]{0,3}\[(.+)\][ ]?:[ ]*\n?[ ]*(.+)$/m', $text, $matches, PREG_SET_ORDER))
{
foreach ($matches as $matches)
{
$this->reference_map[strtolower($matches[1])] = $matches[2];
$text = str_replace($matches[0], '', $text);
}
}
# ~
$text = preg_replace('/\n\s*\n/', "\n\n", $text);
@ -96,7 +84,7 @@ class Parsedown
$text = $this->parse_block_elements($lines);
# Decodes escape sequences (leaves out backslashes).
# decodes escape sequences
foreach ($this->escape_sequence_map as $code => $escape_sequence)
{
@ -122,16 +110,41 @@ class Parsedown
foreach ($lines as $line)
{
# Block-Level HTML
#
# fenced elements
if ($element['type'] === 'block' and ! isset($element['closed']))
switch ($element['type'])
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open>
case 'fenced_code_block':
if ( ! isset($element['closed']))
{
if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
{
$element['closed'] = true;
}
else
{
$element['text'] !== '' and $element['text'] .= "\n";
$element['text'] .= $line;
}
continue 2;
}
break;
case 'markup':
if ( ! isset($element['closed']))
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close>
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
{
$element['depth'] > 0
? $element['depth']--
@ -140,10 +153,13 @@ class Parsedown
$element['text'] .= "\n".$line;
continue;
continue 2;
}
# Empty
break;
}
# *
if ($line === '')
{
@ -152,21 +168,26 @@ class Parsedown
continue;
}
# Lazy Blockquote
#
# composite elements
if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
switch ($element['type'])
{
case 'blockquote':
if ( ! isset($element['interrupted']))
{
$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
$element['lines'] []= $line;
continue;
continue 2;
}
# Lazy List Item
break;
case 'li':
if ($element['type'] === 'li')
{
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
if ($element['indentation'] !== $matches[1])
@ -189,7 +210,7 @@ class Parsedown
);
}
continue;
continue 2;
}
if (isset($element['interrupted']))
@ -198,79 +219,55 @@ class Parsedown
{
$element['lines'] []= '';
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue;
continue 2;
}
}
else
{
$line = preg_replace('/^[ ]{0,4}/', '', $line);;
$line = preg_replace('/^[ ]{0,4}/', '', $line);
$element['lines'] []= $line;
continue;
}
continue 2;
}
# Quick Paragraph
break;
}
if ($line[0] >= 'A' and $line['0'] !== '_')
#
# indentation sensitive types
$deindented_line = $line;
switch ($line[0])
{
goto paragraph; # trust me
}
case ' ':
# Setext Header (---)
# ~
if ($element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
$deindented_line = ltrim($line);
if ($deindented_line === '')
{
$element['type'] = 'h.';
$element['level'] = 2;
continue;
continue 2;
}
# Horizontal Rule
if (preg_match('/^[ ]{0,3}([-*_])([ ]{0,2}\1){2,}[ ]*$/', $line))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue;
}
# List Item
if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => isset($matches[2][1]),
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
),
);
continue;
}
# Code
# code block
if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
{
if ($element['type'] === 'code')
if ($element['type'] === 'code_block')
{
isset($element['interrupted']) and $element['text'] .= "\n";
if (isset($element['interrupted']))
{
$element['text'] .= "\n";
unset ($element['interrupted']);
}
$element['text'] .= "\n".$matches[1];
}
@ -279,17 +276,21 @@ class Parsedown
$elements []= $element;
$element = array(
'type' => 'code',
'type' => 'code_block',
'text' => $matches[1],
);
}
continue;
continue 2;
}
# Atx Header (#)
break;
if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
case '#':
# atx heading (#)
if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
{
$elements []= $element;
@ -301,25 +302,86 @@ class Parsedown
'level' => $level,
);
continue;
continue 2;
}
# Blockquote
break;
if (preg_match('/^[ ]*>[ ]?(.*)/', $line, $matches))
{
if ($element['type'] === 'blockquote')
{
if (isset($element['interrupted']))
{
$element['lines'] []= '';
case '-':
unset($element['interrupted']);
# setext heading (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue 2;
}
$element['lines'] []= $matches[1];
break;
case '=':
# setext heading (===)
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 1;
continue 2;
}
else
break;
}
#
# indentation insensitive types
switch ($deindented_line[0])
{
case '<':
# self-closing tag
if (preg_match('{^<.+?/>$}', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $deindented_line,
);
continue 2;
}
# opening tag
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'markup',
'subtype' => strtolower($matches[1]),
'text' => $deindented_line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
continue 2;
}
break;
case '>':
# quote
if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
{
$elements []= $element;
@ -329,56 +391,107 @@ class Parsedown
$matches[1],
),
);
continue 2;
}
continue;
}
break;
# Setext Header (===)
case '[':
if ($element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
# reference
if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
{
$element['type'] = 'h.';
$element['level'] = 1;
$label = strtolower($matches[1]);
continue;
$this->reference_map[$label] = trim($matches[2], '<>');;
continue 2;
}
# Block-Level HTML <self-closing/>
break;
if (preg_match('{^<.+?/>$}', $line))
case '`':
case '~':
# fenced code block
if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
{
$elements []= $element;
$element = array(
'type' => '',
'text' => $line,
'type' => 'fenced_code_block',
'text' => '',
'fence' => $matches[1],
);
isset($matches[2]) and $element['language'] = $matches[2];
continue 2;
}
break;
case '*':
case '+':
case '-':
case '_':
# hr
if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
{
$elements []= $element;
$element = array(
'type' => 'hr',
);
continue 2;
}
# li
if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => false,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue 2;
}
}
# li
if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'li',
'ordered' => true,
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
),
);
continue;
}
# Block-Level HTML <open>
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $line, $matches))
{
$elements []= $element;
$element = array(
'type' => 'block',
'subtype' => strtolower($matches[1]),
'text' => $line,
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $line) and $element['closed'] = true;
continue;
}
# ~
paragraph:
# paragraph
if ($element['type'] === 'p')
{
@ -408,7 +521,7 @@ class Parsedown
$elements []= $element;
array_shift($elements);
unset($elements[0]);
#
# ~
@ -416,10 +529,67 @@ class Parsedown
$markup = '';
foreach ($elements as $index => $element)
foreach ($elements as $element)
{
switch ($element['type'])
{
case 'p':
$text = $this->parse_span_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $markup === '')
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'code_block':
case 'fenced_code_block':
$text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8');
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'h.':
$text = $this->parse_span_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
case 'li':
if (isset($element['ordered'])) # first
@ -442,62 +612,6 @@ class Parsedown
break;
case 'p':
$text = $this->parse_inline_elements($element['text']);
$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
if ($context === 'li' and $index === 0)
{
if (isset($element['interrupted']))
{
$markup .= "\n".'<p>'.$text.'</p>'."\n";
}
else
{
$markup .= $text;
}
}
else
{
$markup .= '<p>'.$text.'</p>'."\n";
}
break;
case 'code':
$text = htmlentities($element['text'], ENT_NOQUOTES);
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'blockquote':
$text = $this->parse_block_elements($element['lines']);
$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
break;
case 'h.':
$text = $this->parse_inline_elements($element['text']);
$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
break;
case 'hr':
$markup .= '<hr />'."\n";
break;
default:
$markup .= $element['text']."\n";
@ -507,32 +621,32 @@ class Parsedown
return $markup;
}
private function parse_inline_elements($text)
private function parse_span_elements($text)
{
$map = array();
$index = 0;
# Code Span
# code span
if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
{
foreach ($matches as $matches)
{
$element_text = $matches[1];
$element_text = htmlentities($element_text, ENT_NOQUOTES);
$element_text = htmlspecialchars($element_text, ENT_NOQUOTES, 'UTF-8');
# Decodes escape sequences.
# decodes escape sequences
$this->escape_sequence_map
and strpos($element_text, "\x1A") !== FALSE
and $element_text = strtr($element_text, $this->escape_sequence_map);
# Composes element.
# composes element
$element = '<code>'.$element_text.'</code>';
# Encodes element.
# encodes element
$code = "\x1A".'$'.$index;
@ -544,21 +658,25 @@ class Parsedown
}
}
# Inline Link / Image
# inline link or image
if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^][]+|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
{
foreach ($matches as $matches)
{
$url = $matches[4];
strpos($url, '&') !== FALSE and $url = preg_replace('/&(?!#?\w+;)/', '&amp;', $url);
if ($matches[1]) # image
{
$element = '<img alt="'.$matches[3].'" src="'.$matches[4].'">';
$element = '<img alt="'.$matches[3].'" src="'.$url.'">';
}
else
{
$element_text = $this->parse_inline_elements($matches[3]);
$element_text = $this->parse_span_elements($matches[3]);
$element = '<a href="'.$matches[4].'">'.$element_text.'</a>';
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
# ~
@ -573,7 +691,7 @@ class Parsedown
}
}
# Reference(d) Link / Image
# reference link or image
if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
{
@ -589,13 +707,15 @@ class Parsedown
{
$url = $this->reference_map[$link_definition];
strpos($url, '&') !== FALSE and $url = preg_replace('/&(?!#?\w+;)/', '&amp;', $url);
if ($matches[1]) # image
{
$element = '<img alt="'.$matches[2].'" src="'.$url.'">';
}
else # anchor
{
$element_text = $this->parse_inline_elements($matches[2]);
$element_text = $this->parse_span_elements($matches[2]);
$element = '<a href="'.$url.'">'.$element_text.'</a>';
}
@ -613,13 +733,22 @@ class Parsedown
}
}
if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
if (strpos($text, '://') !== FALSE)
{
switch (TRUE)
{
case preg_match_all('{<(https?:[/]{2}[^\s]+)>}i', $text, $matches, PREG_SET_ORDER):
case preg_match_all('{\b(https?:[/]{2}[^\s]+)\b}i', $text, $matches, PREG_SET_ORDER):
foreach ($matches as $matches)
{
$url = $matches[1];
strpos($url, '&') !== FALSE and $url = preg_replace('/&(?!#?\w+;)/', '&amp;', $url);
$element = '<a href=":href">:text</a>';
$element = str_replace(':text', $matches[1], $element);
$element = str_replace(':href', $matches[1], $element);
$element = str_replace(':text', $url, $element);
$element = str_replace(':href', $url, $element);
# ~
@ -631,18 +760,38 @@ class Parsedown
$index ++;
}
break;
}
}
# ~
strpos($text, '&') !== FALSE and $text = preg_replace('/&(?!#?\w+;)/', '&amp;', $text);
strpos($text, '<') !== FALSE and $text = preg_replace('/<(?!\/?\w.*?>)/', '&lt;', $text);
# ~
if (strpos($text, '~~') !== FALSE)
{
$text = preg_replace('/~~(?=\S)(.+?)(?<=\S)~~/s', '<del>$1</del>', $text);
}
if (strpos($text, '_') !== FALSE)
{
$text = preg_replace('/__(?=\S)(.+?)(?<=\S)__/', '<strong>$1</strong>', $text);
$text = preg_replace('/_(?=\S)(.+?)(?<=\S)_/', '<em>$1</em>', $text);
$text = preg_replace('/__(?=\S)([^_]+?)(?<=\S)__/s', '<strong>$1</strong>', $text, -1, $count);
$count or $text = preg_replace('/__(?=\S)(.+?)(?<=\S)__(?!_)/s', '<strong>$1</strong>', $text);
$text = preg_replace('/\b_(?=\S)(.+?)(?<=\S)_\b/s', '<em>$1</em>', $text);
}
if (strpos($text, '*') !== FALSE)
{
$text = preg_replace('/\*\*(?=\S)(.+?)(?<=\S)\*\*/', '<strong>$1</strong>', $text);
$text = preg_replace('/\*(?=\S)(.+?)(?<=\S)\*/', '<em>$1</em>', $text);
$text = preg_replace('/\*\*(?=\S)([^*]+?)(?<=\S)\*\*/s', '<strong>$1</strong>', $text, -1, $count);
$count or $text = preg_replace('/\*\*(?=\S)(.+?)(?<=\S)\*\*(?!\*)/s', '<strong>$1</strong>', $text);
$text = preg_replace('/\*(?=\S)([^*]+?)(?<=\S)\*/s', '<em>$1</em>', $text, -1, $count);
$count or $text = preg_replace('/\*(?=\S)(.+?)(?<=\S)\*(?!\*)/s', '<em>$1</em>', $text);
}
$text = strtr($text, $map);
@ -650,4 +799,3 @@ class Parsedown
return $text;
}
}

View File

@ -1,9 +1,8 @@
## Parsedown PHP
## Parsedown
Parsedown is a parser for Markdown. It parses Markdown text the way people do. First, it divides texts into blocks. Then it looks at how these blocks start and how they relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown is (super) fast, consistent and clean.
Fast and consistent [Markdown][1] parser for PHP.
[Explorer (demo)](http://parsedown.org/explorer/)
[Tests](http://parsedown.org/tests/)
[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)
### Installation
@ -18,3 +17,5 @@ $result = Parsedown::instance()->parse($text);
echo $result; # prints: <p>Hello <strong>Parsedown</strong>!</p>
```
[1]: http://daringfireball.net/projects/markdown/

View File

@ -20,20 +20,29 @@ class Test extends PHPUnit_Framework_TestCase
{
$provider = array();
$DirectoryIterator = new DirectoryIterator(__DIR__ . '/' . self::provider_dir);
$path = dirname(__FILE__).'/';
$DirectoryIterator = new DirectoryIterator($path . '/' . self::provider_dir);
foreach ($DirectoryIterator as $Item)
{
if ($Item->isFile() and $Item->getExtension() === 'md')
if ($Item->isFile())
{
$filename = $Item->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
continue;
$basename = $Item->getBasename('.md');
$markdown = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.md');
$markdown = file_get_contents($path . '/' . self::provider_dir . $basename . '.md');
if (!$markdown)
continue;
$expected_markup = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.html');
$expected_markup = file_get_contents($path . '/' . self::provider_dir . $basename . '.html');
$expected_markup = str_replace("\r\n", "\n", $expected_markup);
$expected_markup = str_replace("\r", "\n", $expected_markup);
@ -44,4 +53,3 @@ class Test extends PHPUnit_Framework_TestCase
return $provider;
}
}

View File

@ -1,6 +1,7 @@
<h1>This is an h1</h1>
<h2>This is an h2</h2>
<h3>This is an h3</h3>
<h4>This is an h4</h4>
<h5>This is an h5</h5>
<h6>This is an h6</h6>
<h1>h1</h1>
<h2>h2</h2>
<h3>h3</h3>
<h4>h4</h4>
<h5>h5</h5>
<h6>h6</h6>
<h1>closed h1</h1>

View File

@ -1,11 +1,13 @@
# This is an h1
# h1
## This is an h2
## h2
### This is an h3
### h3
#### This is an h4
#### h4
##### This is an h5
##### h5
###### This is an h6
###### h6
# closed h1 #

View File

@ -0,0 +1,9 @@
<div>content</div>
<hr style="background: #eaa;" />
<p>nested elements:</p>
<div>
parent
<div>
child
</div>
</div>

View File

@ -0,0 +1,12 @@
<div>content</div>
<hr style="background: #eaa;" />
nested elements:
<div>
parent
<div>
child
</div>
</div>

View File

@ -1,13 +0,0 @@
<p>Here's a regular blockquote:</p>
<blockquote>
<p>blockquote</p>
</blockquote>
<p>Here's one with no space after the ">":</p>
<blockquote>
<p>blockquote</p>
</blockquote>
<p>Here's one on multiple lines:</p>
<blockquote>
<p>line 1
line 2</p>
</blockquote>

View File

@ -1,12 +0,0 @@
Here's a regular blockquote:
> blockquote
Here's one with no space after the ">":
>blockquote
Here's one on multiple lines:
> line 1
> line 2

View File

@ -1,6 +0,0 @@
<h1>h1</h1>
<h2>h2</h2>
<h3>h3</h3>
<h4>h4</h4>
<h5>h5</h5>
<h6>h6</h6>

View File

@ -1,11 +0,0 @@
# h1 #
## h2 ##
### h3 ###
#### h4 ####
##### h5 #####
###### h6 ######

View File

@ -1,9 +1,8 @@
<p>Here's a regular code block:</p>
<pre><code>&lt;?php
echo 'Hello World!';
?&gt;</code></pre>
<p>Here's one that holds a list:</p>
<pre><code>- list item
- another list item</code></pre>
$message = 'Hello World!';
echo $message;</code></pre>
<hr />
<pre><code>&gt; not a quote
- not a list item
[not a reference]: http://foo.com</code></pre>

View File

@ -1,13 +1,11 @@
Here's a regular code block:
<?php
echo 'Hello World!';
$message = 'Hello World!';
echo $message;
?>
---
Here's one that holds a list:
- list item
- another list item
> not a quote
- not a list item
[not a reference]: http://foo.com

View File

@ -1 +1 @@
<p>This is a <code>code span</code>.</p>
<p>a <code>code span</code></p>

View File

@ -1 +1 @@
This is a `code span`.
a `code span`

View File

@ -1,16 +1,9 @@
<p>Here's one with multiple paragraphs:</p>
<blockquote>
<p>This is line one.</p>
<p>This is line two.</p>
</blockquote>
<p>Here's one with multiple types of blocks:</p>
<blockquote>
<p>This is a quoted paragraph.</p>
<h2>header</h2>
<p>paragraph</p>
<ul>
<li>This is a list item of a quoted list.</li>
<li>This is another list item.</li>
<li>li</li>
</ul>
<blockquote>
<p>This is a nested quote block.</p>
</blockquote>
<hr />
<p>paragraph</p>
</blockquote>

View File

@ -1,14 +1,10 @@
Here's one with multiple paragraphs:
> This is line one.
> header
> ------
>
> This is line two.
Here's one with multiple types of blocks:
> This is a quoted paragraph.
> paragraph
>
> - This is a list item of a quoted list.
> - This is another list item.
> - li
>
> > This is a nested quote block.
> ---
>
> paragraph

View File

@ -1,13 +1,12 @@
<p>Here's a compound list:</p>
<ul>
<li>
<p>This is the first paragraph of the list item.</p>
<p>This is the second one.</p>
<p>paragraph</p>
<p>paragraph</p>
</li>
<li>
<p>This is another list item.</p>
<p>paragraph</p>
<blockquote>
<p>This is a quote block that belongs to it.</p>
<p>quote</p>
</blockquote>
</li>
</ul>

View File

@ -1,10 +1,7 @@
Here's a compound list:
- paragraph
- This is the first paragraph of the list item.
paragraph
This is the second one.
- This is another list item.
> This is a quote block that belongs to it.
- paragraph
> quote

View File

@ -1,24 +0,0 @@
<p>Here's a regular list:</p>
<ul>
<li>list item</li>
<li>another list item</li>
<li>3rd list item</li>
</ul>
<p>Here's one with white space around items:</p>
<ul>
<li>list item </li>
<li>another list item </li>
</ul>
<p>Here's one with too much space before items:</p>
<pre><code>- list item
- another list item</code></pre>
<p>Here's one with no space after markers:</p>
<p>-list item
-another list item</p>
<p>Here's one where items contain line breaks:</p>
<ul>
<li>list
item</li>
<li>another
list item</li>
</ul>

View File

@ -1,27 +0,0 @@
Here's a regular list:
- list item
- another list item
- 3rd list item
Here's one with white space around items:
- list item
- another list item
Here's one with too much space before items:
- list item
- another list item
Here's one with no space after markers:
-list item
-another list item
Here's one where items contain line breaks:
- list
item
- another
list item

View File

@ -0,0 +1,5 @@
<p><strong><em>em strong</em></strong></p>
<p><strong><em>one</em> at the start</strong></p>
<p><strong>one at the <em>end</em></strong></p>
<p><strong>one <em>in the</em> middle</strong></p>
<p><strong>one with <em>asterisks</em></strong></p>

9
tests/data/em_strong.md Normal file
View File

@ -0,0 +1,9 @@
___em strong___
___one_ at the start__
__one at the _end___
__one _in the_ middle__
**one with *asterisks***

View File

@ -1,7 +1,6 @@
<p>Here's <em>an emphasis</em>.</p>
<p>A short emphasis <em>a</em> <em>b</em> .</p>
<p>Here's <strong>a strong one</strong>. </p>
<p>Here's <em>an emphasis that uses underscores</em>. </p>
<p>Here's <strong>a strong emphasis that uses underscores</strong>.</p>
<p>This is not _ an emphasis _ neither is * this * neither is _ this_ neither is _this _.</p>
<p>Empty emphasis ** is not __ an emphasis.</p>
<p><em>underscore</em>, <em>asterisk</em>, <em>one two</em>, <em>three four</em>, <em>a</em>, <em>b</em></p>
<p><em>multiline
emphasis</em></p>
<p>_ this _ is not an emphasis, neither is _ this_, _this _, or _this*</p>
<p>this_is_not_an_emphasis</p>
<p>an empty emphasis __ ** is not an emphasis</p>

View File

@ -1,13 +1,10 @@
Here's *an emphasis*.
_underscore_, *asterisk*, _one two_, *three four*, _a_, *b*
A short emphasis _a_ *b* .
_multiline
emphasis_
Here's **a strong one**.
_ this _ is not an emphasis, neither is _ this_, _this _, or _this*
Here's _an emphasis that uses underscores_.
this_is_not_an_emphasis
Here's __a strong emphasis that uses underscores__.
This is not _ an emphasis _ neither is * this * neither is _ this_ neither is _this _.
Empty emphasis ** is not __ an emphasis.
an empty emphasis __ ** is not an emphasis

View File

@ -1,6 +1,4 @@
<p>Here's an <em>emphasis</em> and here's an escaped *emphasis*. Here are also an escaped `code span`, escaped [inline link](http://example.com).</p>
<p>Here's <code>an escaped \*emphasis\* inside of a code span</code>.</p>
<p>Here's one inside of a code block:</p>
<pre><code>An escaped \*emphasis\*.</code></pre>
<p>Finally, an escaped reference:</p>
<p>[1]: http://example.com</p>
<p>escaped *emphasis*.</p>
<p><code>escaped \*emphasis\* in a code span</code></p>
<pre><code>escaped \*emphasis\* in a code block</code></pre>
<p>\ ` * _ { } [ ] ( ) > # + - . !</p>

View File

@ -1,11 +1,7 @@
Here's an *emphasis* and here's an escaped \*emphasis\*. Here are also an escaped \`code span\`, escaped \[inline link](http://example.com).
escaped \*emphasis\*.
Here's `an escaped \*emphasis\* inside of a code span`.
`escaped \*emphasis\* in a code span`
Here's one inside of a code block:
escaped \*emphasis\* in a code block
An escaped \*emphasis\*.
Finally, an escaped reference:
\[1]: http://example.com
\\ \` \* \_ \{ \} \[ \] \( \) \> \# \+ \- \. \!

View File

@ -0,0 +1,5 @@
<pre><code>&lt;?php
$message = 'fenced code block';
echo $message;</code></pre>
<pre><code>tilde</code></pre>

View File

@ -0,0 +1,10 @@
```
<?php
$message = 'fenced code block';
echo $message;
```
~~~
tilde
~~~

View File

@ -1,16 +1,5 @@
<p>Dashes:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>---</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>- - -</code></pre>
<p>Asterisks:</p>
<hr />
<p>Underscores:</p>
<hr />
<p>Based on <a href="http://daringfireball.net/projects/downloads/MarkdownTest_1.0.zip">the original</a> test suite.</p>

View File

@ -1,31 +1,9 @@
Dashes:
---
- - -
- - -
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
Underscores:
___
Based on [the original](http://daringfireball.net/projects/downloads/MarkdownTest_1.0.zip) test suite.

View File

@ -1,15 +0,0 @@
<p>Self-closing tag:</p>
<hr/>
<p>Self-closing tag with attributes:</p>
<hr style="background: #eaa" />
<p>Bare element:</p>
<div>content</div>
<p>Element with attributes:</p>
<a href="http://parsedown.org">link</a>
<p>Nested elements:</p>
<div>
parent
<div>
child
</div>
</div>

View File

@ -1,24 +0,0 @@
Self-closing tag:
<hr/>
Self-closing tag with attributes:
<hr style="background: #eaa" />
Bare element:
<div>content</div>
Element with attributes:
<a href="http://parsedown.org">link</a>
Nested elements:
<div>
parent
<div>
child
</div>
</div>

View File

@ -0,0 +1 @@
<p><img alt="Markdown Logo" src="/md.png"></p>

View File

@ -0,0 +1,3 @@
![Markdown Logo][image]
[image]: /md.png

View File

@ -0,0 +1,2 @@
<p>an <a href="http://example.com">implicit</a> reference link</p>
<p>an <a href="http://example.com">implicit</a> reference link with an empty link definition</p>

View File

@ -0,0 +1,5 @@
an [implicit] reference link
[implicit]: http://example.com
an [implicit][] reference link with an empty link definition

View File

@ -1,2 +1,2 @@
<p>Here's a <a href="http://parsedown.org">link</a>.</p>
<p>Here's an image link: <a href="http://daringfireball.net/projects/markdown/"><img alt="MD Logo" src="http://parsedown.org/md.png"></a>.</p>
<p><a href="http://example.com">link</a></p>
<p><a href="http://example.com"><img alt="MD Logo" src="http://parsedown.org/md.png"></a></p>

View File

@ -1,3 +1,3 @@
Here's a [link](http://parsedown.org).
[link](http://example.com)
Here's an image link: [![MD Logo](http://parsedown.org/md.png)](http://daringfireball.net/projects/markdown/).
[![MD Logo](http://parsedown.org/md.png)](http://example.com)

View File

@ -1,4 +1,4 @@
<blockquote>
<p>line 1
line 2</p>
<p>quote
the rest of it</p>
</blockquote>

View File

@ -1,2 +1,2 @@
> line 1
line 2
> quote
the rest of it

View File

@ -0,0 +1,4 @@
<ul>
<li>li
the rest of it</li>
</ul>

2
tests/data/lazy_list.md Normal file
View File

@ -0,0 +1,2 @@
- li
the rest of it

View File

@ -1,4 +0,0 @@
<ul>
<li>li
more text</li>
</ul>

View File

@ -1,2 +0,0 @@
- li
more text

View File

@ -1,16 +1,13 @@
<p>Here's a regular ordered list:</p>
<ol>
<li>one</li>
<li>two</li>
<li>three</li>
</ol>
<p>Here's one with repeating numbers:</p>
<ol>
<li>one</li>
<li>two</li>
</ol>
<p>Here's one with large numbers:</p>
<p>repeating numbers:</p>
<ol>
<li>one</li>
<li>two</li>
</ol>
<p>large numbers:</p>
<ol>
<li>one</li>
</ol>

View File

@ -1,16 +1,11 @@
Here's a regular ordered list:
1. one
2. two
3. three
Here's one with repeating numbers:
repeating numbers:
1. one
1. two
Here's one with large numbers:
large numbers:
123. one
123. two

View File

@ -1,4 +0,0 @@
<p>Here's a paragraph.</p>
<blockquote>
<p>a block quote that belongs to it.</p>
</blockquote>

View File

@ -1,2 +0,0 @@
Here's a paragraph.
> a block quote that belongs to it.

View File

@ -1,5 +0,0 @@
<p>Here's a list that's "inside" a paragraph:</p>
<ul>
<li>list item</li>
<li>another list item</li>
</ul>

View File

@ -1,4 +0,0 @@
Here's a list that's "inside" a paragraph:
- list item
- another list item

View File

@ -1,20 +0,0 @@
<p>Here's a regular quote block:</p>
<blockquote>
<p>Some quoted text.
Here goes some more.</p>
</blockquote>
<p>Here's one with space before lines:</p>
<blockquote>
<p>Some quoted text.
Here goes some more.</p>
</blockquote>
<p>Here's one with no space after >:</p>
<blockquote>
<p>Some quoted text.
Here goes some more.</p>
</blockquote>
<p>Here's one with no > on the second line:</p>
<blockquote>
<p>Some quoted text.
Here goes some more.</p>
</blockquote>

View File

@ -1,19 +0,0 @@
Here's a regular quote block:
> Some quoted text.
> Here goes some more.
Here's one with space before lines:
> Some quoted text.
> Here goes some more.
Here's one with no space after >:
>Some quoted text.
>Here goes some more.
Here's one with no > on the second line:
> Some quoted text.
Here goes some more.

View File

@ -1,14 +0,0 @@
<p>Here's a <a href="http://parsedown.org">reference link</a>.</p>
<p>Here's <a href="http://parsedown.org">one</a> with an alternative syntax.</p>
<p>Here's <a href="http://parsedown.org">one</a> on the next line.</p>
<p>Here's <a href="http://parsedown.org">one</a> on 2 lines.</p>
<p>Here's <a href="http://parsedown.org/tests/">one</a> with a different URL.</p>
<p>Here's <a href="http://parsedown.org">one</a> with a semantic name.</p>
<p>Here's <a href="http://parsedown.org">one</a> with definition name on the next line.</p>
<p>Here's [one][404] with no definition.</p>
<p>Here's an image: <img alt="Markdown Logo" src="/md.png"></p>
<p>Here's an <a href="http://google.com">implicit one</a>.</p>
<p>Here's an <a href="http://google.com">implicit one</a>.</p>
<p>Here's an <a href="http://google.com">implicit one</a> with an empty link definition.</p>
<p>Here's a <a href="http://parsedown.org">multiline
one</a> defined on 2 lines.</p>

View File

@ -1,43 +0,0 @@
Here's a [reference link][1].
[1]: http://parsedown.org
Here's [one] [2] with an alternative syntax.
[2] :http://parsedown.org
Here's [one][3] on the next line.
[3]: http://parsedown.org
Here's [one][4] on 2 lines.
[4]:
http://parsedown.org
Here's [one][5] with a different URL.
[5]: http://parsedown.org/tests/
Here's [one][website] with a semantic name.
[website]: http://parsedown.org
Here's [one]
[website] with definition name on the next line.
Here's [one][404] with no definition.
Here's an image: ![Markdown Logo][image]
[image]: /md.png
Here's an [implicit one].
Here's an [implicit one].
[implicit one]: http://google.com
Here's an [implicit one][] with an empty link definition.
Here's a [multiline
one][website] defined on 2 lines.

View File

@ -1,6 +0,0 @@
<h1>Heading 1</h1>
<h2>Heading 2</h2>
<h2>Block Heading</h2>
<p>This is the rest of the block.</p>
<h1>Single "="</h1>
<h2>Single "-"</h2>

View File

@ -1,16 +0,0 @@
Heading 1
=========
Heading 2
---------
Block Heading
-------------
This is the rest of the block.
Single "="
=
Single "-"
-

View File

@ -0,0 +1,11 @@
<blockquote>
<p>quote</p>
</blockquote>
<p>indented:</p>
<blockquote>
<p>quote</p>
</blockquote>
<p>no space after <code>&gt;</code>:</p>
<blockquote>
<p>quote</p>
</blockquote>

View File

@ -0,0 +1,7 @@
> quote
indented:
> quote
no space after `>`:
>quote

View File

@ -0,0 +1,3 @@
<p>an <b>important</b> <a href=''>link</a></p>
<p>broken<br/>
line</p>

View File

@ -0,0 +1,4 @@
an <b>important</b> <a href=''>link</a>
broken<br/>
line

View File

@ -1,14 +1,15 @@
<p>Here's a list where items are separated by empty lines:</p>
<ul>
<li>
<p>list item</p>
<p>li</p>
</li>
<li>another list item</li>
<li>li</li>
</ul>
<p>Here's an ordered one:</p>
<ol>
<hr />
<ul>
<li>
<p>item one</p>
<p>li</p>
<ul>
<li>indented li</li>
</ul>
</li>
<li>item two</li>
</ol>
</ul>

View File

@ -1,11 +1,9 @@
Here's a list where items are separated by empty lines:
- li
- list item
- li
- another list item
---
Here's an ordered one:
- li
1. item one
2. item two
- indented li

View File

@ -0,0 +1,8 @@
<p>AT&amp;T has an ampersand in their name</p>
<pre><code>Let's play some cards ♠ ♣ ♥ ♦</code></pre>
<p>AT&amp;T is another way to write it</p>
<p>this &amp; that</p>
<p>4 &lt; 5 and 6 > 5</p>
<p><a href="http://example.com/autolink?a=1&amp;b=2">http://example.com/autolink?a=1&amp;b=2</a> </p>
<p><a href="/script?a=1&amp;b=2">inline link</a></p>
<p><a href="http://example.com/?a=1&amp;b=2">reference link</a></p>

View File

@ -0,0 +1,17 @@
AT&T has an ampersand in their name
Let's play some cards ♠ ♣ ♥ ♦
AT&T is another way to write it
this & that
4 < 5 and 6 > 5
<http://example.com/autolink?a=1&b=2>
[inline link](/script?a=1&b=2)
[reference link][1]
[1]: http://example.com/?a=1&b=2

View File

@ -0,0 +1,3 @@
<p><del>strikethrough</del></p>
<p>in the <del>middle</del> of a sentence</p>
<p>in the middle of a w<del>or</del>d</p>

View File

@ -0,0 +1,5 @@
~~strikethrough~~
in the ~~middle~~ of a sentence
in the middle of a w~~or~~d

View File

@ -0,0 +1,6 @@
<p><em><strong>strong em</strong></em> </p>
<p><em>em <strong>strong em</strong></em></p>
<p><em><strong>strong em</strong> em</em></p>
<p><em><strong>strong em</strong></em></p>
<p><em>em <strong>strong em</strong></em></p>
<p><em><strong>strong em</strong> em</em></p>

11
tests/data/strong_em.md Normal file
View File

@ -0,0 +1,11 @@
***strong em***
*em **strong em***
***strong em** em*
___strong em___
_em __strong em___
___strong em__ em_

View File

@ -0,0 +1,6 @@
<p><a href="http://example.com">reference link</a></p>
<p><a href="http://example.com">one</a> with a semantic name</p>
<p>[one][404] with no definition</p>
<p><a href="http://example.com">multiline
one</a> defined on 2 lines</p>
<p><a href="http://example.com">one</a> with an upper case label</p>

View File

@ -0,0 +1,16 @@
[reference link][1]
[1]: http://example.com
[one][website] with a semantic name
[website]: http://example.com
[one][404] with no definition
[multiline
one][website] defined on 2 lines
[one][label] with an upper case label
[LABEL]: http://example.com

View File

@ -1,20 +1,10 @@
<p>Here's a regular unordered list:</p>
<ul>
<li>list item</li>
<li>another list item</li>
<li>3rd list item</li>
<li>li</li>
<li>li</li>
</ul>
<p>Here's one with a variety of markers:</p>
<p>mixed markers:</p>
<ul>
<li>hyphen</li>
<li>plus</li>
<li>asterisk</li>
<li>li</li>
<li>li</li>
<li>li</li>
</ul>
<p>Here's one with white space around items:</p>
<ul>
<li>list item </li>
<li>another list item </li>
</ul>
<p>Here's one with no space after markers:</p>
<p>-list item
-another list item</p>

View File

@ -1,21 +1,8 @@
Here's a regular unordered list:
- li
- li
- list item
- another list item
- 3rd list item
mixed markers:
Here's one with a variety of markers:
- hyphen
+ plus
* asterisk
Here's one with white space around items:
- list item
- another list item
Here's one with no space after markers:
-list item
-another list item
* li
+ li
- li

View File

@ -0,0 +1 @@
<p>Here's an autolink <a href="http://example.com">http://example.com</a>.</p>

View File

@ -0,0 +1 @@
Here's an autolink http://example.com.

View File

@ -0,0 +1 @@
<pre><code>code</code></pre>

5
tests/data/whitespace.md Normal file
View File

@ -0,0 +1,5 @@
code