1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

..

25 Commits
0.8.0 ... 0.9.0

Author SHA1 Message Date
f0fbdaa6ca backtick within code span 2014-01-22 21:28:29 +02:00
e20c0a29bd nested elements should render on a new line 2014-01-22 21:28:29 +02:00
712dd23d30 simplify parsing of list 2014-01-22 21:28:29 +02:00
68f2871996 resolve #3 2014-01-22 21:28:29 +02:00
17e7e33847 name image title test 2014-01-22 21:28:29 +02:00
7cb9646d98 simplify compiling of links 2014-01-22 21:28:29 +02:00
325bdd9ff6 improve readme 2014-01-21 23:15:02 +02:00
2a0700abda resolve #61 2014-01-20 22:19:23 +02:00
4e83d79d76 setters should know nothing 2014-01-20 09:26:25 +02:00
354842fd6e simplify compiling 2014-01-19 23:34:20 +02:00
2b73e94c6c simplify parsing of escaped characters 2014-01-19 22:49:43 +02:00
0182812d6c remove unnecessary blank lines 2014-01-19 15:37:05 +02:00
f5dd3455f9 resolve #58 2014-01-19 00:52:07 +02:00
1017f22cdd fix paragraph list 2014-01-18 16:45:39 +02:00
88854955d6 take $inline_tags out of the method 2014-01-18 16:45:39 +02:00
654dd74074 lines that start with inline html should not get parsed as block-level markup, should resolve #54 and #57 2014-01-18 16:45:39 +02:00
fee5b71998 improve readme 2014-01-17 01:33:20 +02:00
149b687ee7 improve tests 2014-01-17 01:25:41 +02:00
98b17e3354 setext heading doesn't have to use regex 2014-01-17 01:23:25 +02:00
da966b83f1 atx heading doesn't have to use regex 2014-01-17 00:36:11 +02:00
b9ab495cb4 parse method doesn't have to use regex 2014-01-16 23:43:34 +02:00
408cb5c21f code block doesn't have to use regex 2014-01-16 23:43:12 +02:00
5dd0e8cb7b $deindented_line >= ... doesn't make sense 2014-01-16 23:39:56 +02:00
5521afde31 refactor $element 2014-01-13 23:45:31 +02:00
4317add3a2 add hhvm to PHP versions to test against 2013-12-28 14:57:25 +02:00
25 changed files with 317 additions and 187 deletions

View File

@ -4,4 +4,5 @@ php:
- 5.5
- 5.4
- 5.3
- 5.2
- 5.2
- hhvm

View File

@ -37,11 +37,11 @@ class Parsedown
# Setters
#
private $break_marker = " \n";
private $breaks_enabled = false;
function set_breaks_enabled($breaks_enabled)
{
$this->break_marker = $breaks_enabled ? "\n" : " \n";
$this->breaks_enabled = $breaks_enabled;
return $this;
}
@ -51,7 +51,6 @@ class Parsedown
#
private $reference_map = array();
private $escape_sequence_map = array();
#
# Public Methods
@ -59,9 +58,6 @@ class Parsedown
function parse($text)
{
# removes UTF-8 BOM and marker characters
$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
# removes \r characters
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
@ -69,43 +65,14 @@ class Parsedown
# replaces tabs with spaces
$text = str_replace("\t", ' ', $text);
# encodes escape sequences
if (strpos($text, '\\') !== FALSE)
{
$escape_sequences = array('\\\\', '\`', '\*', '\_', '\{', '\}', '\[', '\]', '\(', '\)', '\>', '\#', '\+', '\-', '\.', '\!');
foreach ($escape_sequences as $index => $escape_sequence)
{
if (strpos($text, $escape_sequence) !== FALSE)
{
$code = "\x1A".'\\'.$index.';';
$text = str_replace($escape_sequence, $code, $text);
$this->escape_sequence_map[$code] = $escape_sequence;
}
}
}
# ~
$text = preg_replace('/\n\s*\n/', "\n\n", $text);
$text = trim($text, "\n");
$lines = explode("\n", $text);
$text = $this->parse_block_elements($lines);
# decodes escape sequences
foreach ($this->escape_sequence_map as $code => $escape_sequence)
{
$text = str_replace($code, $escape_sequence[1], $text);
}
# ~
$text = rtrim($text, "\n");
return $text;
@ -129,7 +96,7 @@ class Parsedown
switch ($element['type'])
{
case 'fenced_code_block':
case 'fenced block':
if ( ! isset($element['closed']))
{
@ -149,16 +116,16 @@ class Parsedown
break;
case 'markup':
case 'block-level markup':
if ( ! isset($element['closed']))
{
if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
if (strpos($line, $element['start']) !== false) # opening tag
{
$element['depth']++;
}
if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
if (strpos($line, $element['end']) !== false) # closing tag
{
$element['depth'] > 0
? $element['depth']--
@ -175,7 +142,9 @@ class Parsedown
# *
if ($line === '')
$deindented_line = ltrim($line);
if ($deindented_line === '')
{
$element['interrupted'] = true;
@ -213,13 +182,11 @@ class Parsedown
$elements []= $element;
$element = array(
'type' => 'li',
'indentation' => $matches[1],
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
),
unset($element['first']);
$element['last'] = true;
$element['lines'] = array(
preg_replace('/^[ ]{0,4}/', '', $matches[3]),
);
}
@ -255,26 +222,17 @@ class Parsedown
# indentation sensitive types
$deindented_line = $line;
switch ($line[0])
{
case ' ':
# ~
$deindented_line = ltrim($line);
if ($deindented_line === '')
{
continue 2;
}
# code block
if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
if (isset($line[3]) and $line[3] === ' ' and $line[2] === ' ' and $line[1] === ' ')
{
if ($element['type'] === 'code_block')
$code_line = substr($line, 4);
if ($element['type'] === 'code block')
{
if (isset($element['interrupted']))
{
@ -283,15 +241,15 @@ class Parsedown
unset ($element['interrupted']);
}
$element['text'] .= "\n".$matches[1];
$element['text'] .= "\n".$code_line;
}
else
{
$elements []= $element;
$element = array(
'type' => 'code_block',
'text' => $matches[1],
'type' => 'code block',
'text' => $code_line,
);
}
@ -304,15 +262,20 @@ class Parsedown
# atx heading (#)
if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
if (isset($line[1]))
{
$elements []= $element;
$level = strlen($matches[1]);
$level = 1;
while (isset($line[$level]) and $line[$level] === '#')
{
$level++;
}
$element = array(
'type' => 'h.',
'text' => $matches[2],
'type' => 'heading',
'text' => trim($line, '# '),
'level' => $level,
);
@ -322,27 +285,28 @@ class Parsedown
break;
case '-':
# setext heading (---)
if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
{
$element['type'] = 'h.';
$element['level'] = 2;
continue 2;
}
break;
case '=':
# setext heading (===)
# setext heading
if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
if ($element['type'] === 'paragraph' and isset($element['interrupted']) === false)
{
$element['type'] = 'h.';
$element['level'] = 1;
$chopped_line = rtrim($line);
$i = 1;
while (isset($chopped_line[$i]))
{
if ($chopped_line[$i] !== $line[0])
{
break 2;
}
$i++;
}
$element['type'] = 'heading';
$element['level'] = $line[0] === '-' ? 2 : 1;
continue 2;
}
@ -356,34 +320,63 @@ class Parsedown
{
case '<':
# self-closing tag
$position = strpos($deindented_line, '>');
if (preg_match('{^<.+?/>$}', $deindented_line))
if ($position > 1) # tag
{
$name = substr($deindented_line, 1, $position - 1);
$name = rtrim($name);
if (substr($name, -1) === '/')
{
$self_closing = true;
$name = substr($name, 0, -1);
}
$position = strpos($name, ' ');
if ($position)
{
$name = substr($name, 0, $position);
}
if ( ! ctype_alpha($name))
{
break;
}
if (in_array($name, $this->inline_tags))
{
break;
}
$elements []= $element;
$element = array(
'type' => '',
'text' => $deindented_line,
);
if (isset($self_closing))
{
$element = array(
'type' => 'self-closing tag',
'text' => $deindented_line,
);
continue 2;
}
unset($self_closing);
# opening tag
if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
{
$elements []= $element;
continue 2;
}
$element = array(
'type' => 'markup',
'subtype' => strtolower($matches[1]),
'type' => 'block-level markup',
'text' => $deindented_line,
'start' => '<'.$name.'>',
'end' => '</'.$name.'>',
'depth' => 0,
);
preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
if (strpos($deindented_line, $element['end']))
{
$element['closed'] = true;
}
continue 2;
}
@ -442,7 +435,7 @@ class Parsedown
$elements []= $element;
$element = array(
'type' => 'fenced_code_block',
'type' => 'fenced block',
'text' => '',
'fence' => $matches[1],
);
@ -482,6 +475,7 @@ class Parsedown
'type' => 'li',
'ordered' => false,
'indentation' => $matches[1],
'first' => true,
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
@ -494,7 +488,7 @@ class Parsedown
# li
if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
if ($deindented_line[0] <= '9' and $deindented_line[0] >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
{
$elements []= $element;
@ -502,6 +496,7 @@ class Parsedown
'type' => 'li',
'ordered' => true,
'indentation' => $matches[1],
'first' => true,
'last' => true,
'lines' => array(
preg_replace('/^[ ]{0,4}/', '', $matches[2]),
@ -513,7 +508,7 @@ class Parsedown
# paragraph
if ($element['type'] === 'p')
if ($element['type'] === 'paragraph')
{
if (isset($element['interrupted']))
{
@ -525,6 +520,8 @@ class Parsedown
}
else
{
$this->breaks_enabled and $element['text'] .= ' ';
$element['text'] .= "\n".$line;
}
}
@ -533,7 +530,7 @@ class Parsedown
$elements []= $element;
$element = array(
'type' => 'p',
'type' => 'paragraph',
'text' => $line,
);
}
@ -553,7 +550,7 @@ class Parsedown
{
switch ($element['type'])
{
case 'p':
case 'paragraph':
$text = $this->parse_span_elements($element['text']);
@ -566,6 +563,11 @@ class Parsedown
else
{
$markup .= $text;
if (isset($elements[2]))
{
$markup .= "\n";
}
}
}
else
@ -583,22 +585,27 @@ class Parsedown
break;
case 'code_block':
case 'fenced_code_block':
case 'code block':
$text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8');
strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
$markup .= isset($element['language'])
? '<pre><code class="language-'.$element['language'].'">'.$text.'</code></pre>'
: '<pre><code>'.$text.'</code></pre>';
$markup .= "\n";
$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
break;
case 'h.':
case 'fenced block':
$text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8');
$markup .= '<pre><code';
isset($element['language']) and $markup .= ' class="language-'.$element['language'].'"';
$markup .= '>'.$text.'</code></pre>'."\n";
break;
case 'heading':
$text = $this->parse_span_elements($element['text']);
@ -614,11 +621,11 @@ class Parsedown
case 'li':
if (isset($element['ordered'])) # first
if (isset($element['first']))
{
$list_type = $element['ordered'] ? 'ol' : 'ul';
$type = $element['ordered'] ? 'ol' : 'ul';
$markup .= '<'.$list_type.'>'."\n";
$markup .= '<'.$type.'>'."\n";
}
if (isset($element['interrupted']) and ! isset($element['last']))
@ -630,13 +637,18 @@ class Parsedown
$markup .= '<li>'.$text.'</li>'."\n";
isset($element['last']) and $markup .= '</'.$list_type.'>'."\n";
if (isset($element['last']))
{
$type = $element['ordered'] ? 'ol' : 'ul';
$markup .= '</'.$type.'>'."\n";
}
break;
case 'markup':
case 'block-level markup':
$markup .= $this->parse_span_elements($element['text'])."\n";
$markup .= $element['text']."\n";
break;
@ -649,31 +661,9 @@ class Parsedown
return $markup;
}
# ~
private $strong_regex = array(
'*' => '/^[*]{2}([^*]+?)[*]{2}(?![*])/s',
'_' => '/^__([^_]+?)__(?!_)/s',
);
private $em_regex = array(
'*' => '/^[*]([^*]+?)[*](?![*])/s',
'_' => '/^_([^_]+?)[_](?![_])\b/s',
);
private $strong_em_regex = array(
'*' => '/^[*]{2}(.*?)[*](.+?)[*](.*?)[*]{2}/s',
'_' => '/^__(.*?)_(.+?)_(.*?)__/s',
);
private $em_strong_regex = array(
'*' => '/^[*](.*?)[*]{2}(.+?)[*]{2}(.*?)[*]/s',
'_' => '/^_(.*?)__(.+?)__(.*?)_/s',
);
private function parse_span_elements($text, $markers = array('![', '&', '*', '<', '[', '_', '`', 'http', '~~'))
private function parse_span_elements($text, $markers = array(" \n", '![', '&', '*', '<', '[', '\\', '_', '`', 'http', '~~'))
{
if (isset($text[2]) === false or $markers === array())
if (isset($text[1]) === false or $markers === array())
{
return $text;
}
@ -709,7 +699,7 @@ class Parsedown
# ~
if ($closest_marker === null or isset($text[$closest_marker_position + 2]) === false)
if ($closest_marker === null or isset($text[$closest_marker_position + 1]) === false)
{
$markup .= $text;
@ -730,6 +720,14 @@ class Parsedown
switch ($closest_marker)
{
case " \n":
$markup .= '<br />'."\n";
$offset = 3;
break;
case '![':
case '[':
@ -797,15 +795,21 @@ class Parsedown
if ($element['!'])
{
$markup .= '<img alt="'.$element['a'].'" src="'.$element['»'].'" />';
$markup .= '<img alt="'.$element['a'].'" src="'.$element['»'].'"';
isset($element['#']) and $markup .= ' title="'.$element['#'].'"';
$markup .= ' />';
}
else
{
$element['a'] = $this->parse_span_elements($element['a'], $markers);
$markup .= isset($element['#'])
? '<a href="'.$element['»'].'" title="'.$element['#'].'">'.$element['a'].'</a>'
: '<a href="'.$element['»'].'">'.$element['a'].'</a>';
$markup .= '<a href="'.$element['»'].'"';
isset($element['#']) and $markup .= ' title="'.$element['#'].'"';
$markup .= '>'.$element['a'].'</a>';
}
unset($element);
@ -821,9 +825,18 @@ class Parsedown
case '&':
$markup .= '&amp;';
if (preg_match('/^&#?\w+;/', $text, $matches))
{
$markup .= $matches[0];
$offset = substr($text, 0, 5) === '&amp;' ? 5 : 1;
$offset = strlen($matches[0]);
}
else
{
$markup .= '&amp;';
$offset = 1;
}
break;
@ -888,6 +901,12 @@ class Parsedown
$offset = strlen($matches[0]);
}
elseif (strpos($text, '@') > 1 and preg_match('/<(\S+?@\S+?)>/', $text, $matches))
{
$markup .= '<a href="mailto:'.$matches[1].'">'.$matches[1].'</a>';
$offset = strlen($matches[0]);
}
elseif (preg_match('/^<\/?\w.*?>/', $text, $matches))
{
$markup .= $matches[0];
@ -910,18 +929,30 @@ class Parsedown
break;
case '\\':
if (in_array($text[1], $this->special_characters))
{
$markup .= $text[1];
$offset = 2;
}
else
{
$markup .= '\\';
$offset = 1;
}
break;
case '`':
if (preg_match('/^`(.+?)`/', $text, $matches))
if (preg_match('/^(`+)(.+?)\1(?!`)/', $text, $matches))
{
$element_text = $matches[1];
$element_text = $matches[2];
$element_text = htmlspecialchars($element_text, ENT_NOQUOTES, 'UTF-8');
if ($this->escape_sequence_map and strpos($element_text, "\x1A") !== false)
{
$element_text = strtr($element_text, $this->escape_sequence_map);
}
$markup .= '<code>'.$element_text.'</code>';
$offset = strlen($matches[0]);
@ -984,8 +1015,41 @@ class Parsedown
$markers[$closest_marker_index] = $closest_marker;
}
$markup = str_replace($this->break_marker, '<br />'."\n", $markup);
return $markup;
}
#
# Read-only
#
private $inline_tags = array(
'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button',
'cite', 'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd',
'label', 'map', 'object', 'q', 'samp', 'script', 'select', 'small',
'span', 'strong', 'sub', 'sup', 'textarea', 'tt', 'var',
);
private $special_characters = array('\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!');
# ~
private $strong_regex = array(
'*' => '/^[*]{2}([^*]+?)[*]{2}(?![*])/s',
'_' => '/^__([^_]+?)__(?!_)/s',
);
private $em_regex = array(
'*' => '/^[*]([^*]+?)[*](?![*])/s',
'_' => '/^_([^_]+?)[_](?![_])\b/s',
);
private $strong_em_regex = array(
'*' => '/^[*]{2}(.*?)[*](.+?)[*](.*?)[*]{2}/s',
'_' => '/^__(.*?)_(.+?)_(.*?)__/s',
);
private $em_strong_regex = array(
'*' => '/^[*](.*?)[*]{2}(.+?)[*]{2}(.*?)[*]/s',
'_' => '/^_(.*?)__(.+?)__(.*?)_/s',
);
}

View File

@ -13,7 +13,7 @@ Better [Markdown](http://en.wikipedia.org/wiki/Markdown) parser for PHP.
* [fast](http://parsedown.org/speed)
* [consistent](http://parsedown.org/consistency)
* [GitHub Flavored](https://help.github.com/articles/github-flavored-markdown)
* [tested](https://travis-ci.org/erusev/parsedown) in PHP 5.2, 5.3, 5.4 and 5.5
* [tested](https://travis-ci.org/erusev/parsedown) in PHP 5.2, 5.3, 5.4, 5.5 and [hhvm](http://www.hhvm.com/)
* friendly to international input
### Installation
@ -23,9 +23,9 @@ Include `Parsedown.php` or install [the composer package](https://packagist.org/
### Example
```php
$text = 'Hello **Parsedown**!';
$text = 'Hello *Parsedown*!';
$result = Parsedown::instance()->parse($text);
echo $result; # prints: <p>Hello <strong>Parsedown</strong>!</p>
echo $result; # prints: <p>Hello <em>Parsedown</em>!</p>
```

View File

@ -4,4 +4,5 @@
<h4>h4</h4>
<h5>h5</h5>
<h6>h6</h6>
<h1>closed h1</h1>
<h1>closed h1</h1>
<p>#</p>

View File

@ -10,4 +10,6 @@
###### h6
# closed h1 #
# closed h1 #
#

View File

@ -1,9 +1,5 @@
<div>content</div>
<hr style="background: #eaa;" />
<p>nested elements:</p>
<div>_content_</div>
<p>sparse:</p>
<div>
parent
<div>
child
</div>
_content_
</div>

View File

@ -1,12 +1,7 @@
<div>content</div>
<div>_content_</div>
<hr style="background: #eaa;" />
nested elements:
sparse:
<div>
parent
<div>
child
</div>
_content_
</div>

1
tests/data/email.html Normal file
View File

@ -0,0 +1 @@
<p>my email is <a href="mailto:me@example.com">me@example.com</a></p>

1
tests/data/email.md Normal file
View File

@ -0,0 +1 @@
my email is <me@example.com>

View File

@ -0,0 +1 @@
<p>&amp; &copy; &#123;</p>

View File

@ -0,0 +1 @@
&amp; &copy; &#123;

View File

@ -0,0 +1 @@
<p><img alt="alt" src="/md.png" title="title" /></p>

View File

@ -0,0 +1 @@
![alt](/md.png "title")

View File

@ -0,0 +1,10 @@
<div>
_parent_
<div>
_child_
</div>
<pre>
_adopted child_
</pre>
</div>
<p><em>outside</em></p>

View File

@ -0,0 +1,11 @@
<div>
_parent_
<div>
_child_
</div>
<pre>
_adopted child_
</pre>
</div>
_outside_

View File

@ -0,0 +1,12 @@
<p>paragraph</p>
<ul>
<li>li</li>
<li>li</li>
</ul>
<p>paragraph</p>
<ul>
<li>
<p>li</p>
</li>
<li>li</li>
</ul>

View File

@ -0,0 +1,9 @@
paragraph
- li
- li
paragraph
* li
* li

View File

@ -0,0 +1,4 @@
<hr />
<p>attributes:</p>
<hr style="background: #9bd;" />
<p>...</p>

View File

@ -0,0 +1,7 @@
<hr />
attributes:
<hr style="background: #9bd;" />
...

View File

@ -1,3 +1,4 @@
<p>an <b>important</b> <a href=''>link</a></p>
<p>broken<br/>
line</p>
line</p>
<p><b>inline tag</b> at the beginning</p>

View File

@ -1,4 +1,6 @@
an <b>important</b> <a href=''>link</a>
broken<br/>
line
line
<b>inline tag</b> at the beginning

View File

@ -1,5 +1,4 @@
<p>AT&amp;T has an ampersand in their name</p>
<p>AT&amp;T is another way to write it</p>
<p>this &amp; that</p>
<p>4 &lt; 5 and 6 > 5</p>
<p><a href="http://example.com/autolink?a=1&amp;b=2">http://example.com/autolink?a=1&amp;b=2</a></p>

View File

@ -1,7 +1,5 @@
AT&T has an ampersand in their name
AT&T is another way to write it
this & that
4 < 5 and 6 > 5

View File

@ -0,0 +1,6 @@
<pre><code>&lt;?php
$message = 'Hello World!';
echo $message;
echo "following a blank line";</code></pre>

View File

@ -0,0 +1,6 @@
<?php
$message = 'Hello World!';
echo $message;
echo "following a blank line";