1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Compare commits

...

48 Commits

Author SHA1 Message Date
da5d75e97e resolve #209 2014-10-29 22:29:46 +02:00
2adb87ef41 Merge pull request #236 from naNuke/escapeComment
Ignore html comments as well with markupEscape option.
2014-10-10 21:11:23 +03:00
74926c9831 Add test of escaped html comment. 2014-10-10 20:07:41 +02:00
68f3aea036 Ignore html comments as well with markupEscape option. 2014-10-10 19:07:25 +02:00
f91e4dece3 improve consistency 2014-09-26 13:06:40 +03:00
c62365adc4 improve extensibility of test case 2014-09-26 02:06:16 +03:00
bb7a3f41e3 improve readme 2014-09-22 12:01:39 +03:00
f64c1387f8 fix indents 2014-09-22 02:52:45 +03:00
59c77e706b improve consistency 2014-09-22 02:36:42 +03:00
e0965ce09b Merge pull request #224 from hkdobrev/no-markup-option
Add `noMarkup` option to escape user HTML
2014-09-21 23:19:23 +03:00
0a3fde3774 Add noMarkup option to escape user HTML
Resolves #106.

This change introduces a new option - `noMarkup`. You could set it the
`setNoMarkup()` method similar to the `setBreaksEnabled()` one.

Example usage:

``` php
<?php

$parsedown = new Parsedown();
$parsedown->setNoMarkup(true);
$parsedown->text('<div><strong>*Some text*</strong></div>');

// Outputs:
// <p>&lt;div>&lt;strong><em>Some text</em>&lts;/strong>&lt;/div></p>
```
2014-09-20 14:53:19 +03:00
93f7b26427 imp 2014-09-14 01:14:40 +03:00
e1cb3b7b23 improve readme 2014-09-13 00:11:56 +03:00
5bf56ea041 improve readme 2014-09-12 23:52:06 +03:00
9e98ed04de improve readme 2014-09-11 15:47:59 +03:00
1c89e6f771 improve readme 2014-09-09 14:30:17 +03:00
0220a93010 Merge pull request #208 from apfelbox/max-h6
Limit generated headlines to h6
2014-08-26 09:31:52 +03:00
512cc1f065 Limit generated headlines to h6 2014-08-25 21:06:03 +02:00
9437766539 Merge pull request #200 from hkdobrev/html-attributes-slashes
Allow slashes in HTML attributes
2014-08-14 01:27:48 +03:00
1127681d56 Allow slashes in HTML attributes 2014-08-13 23:54:52 +03:00
e33ac1c56e improve readme 2014-06-18 12:27:25 +03:00
d24439ada0 improve test suite 2014-05-21 23:20:46 +03:00
1ae100beab improve comment 2014-05-17 17:37:17 +03:00
82a5a78a36 improve readme 2014-05-17 17:13:00 +03:00
4ede4340ab improve readme 2014-05-16 03:34:43 +03:00
170a6bf770 improve readme 2014-05-16 01:27:54 +03:00
21db821324 improve readme 2014-05-16 01:15:21 +03:00
b384839d15 update readme 2014-05-14 20:07:52 +03:00
2da10d277b resolve #105 2014-05-14 13:14:49 +03:00
532b5ede35 resolve #129 2014-05-14 01:11:05 +03:00
2bd2f81f4f methods should not have more than one optional parameters 2014-05-12 16:18:00 +03:00
e318e66de5 improve consistency 2014-05-12 00:41:00 +03:00
0820d0a607 paragraph doesn't have to use a type 2014-05-12 00:34:47 +03:00
b8d1cfe91a improve extensibility 2014-05-11 22:31:02 +03:00
d85a233611 Merge pull request #171 from scarwu/master
identifyEscapeSequence() needs Array check
2014-05-11 20:57:05 +03:00
973d4a866d add array check 2014-05-11 23:36:01 +08:00
d19c2b6942 improve names 2014-05-10 16:28:00 +03:00
4dde57451d fix consecutive reference links 2014-05-06 17:05:49 +03:00
44686c4f1e improve extensibility 2014-05-06 01:12:27 +03:00
db02ecf259 "reference" is a definition 2014-05-05 14:43:31 +03:00
aa004d4595 improve code organisation 2014-05-05 14:39:40 +03:00
1bb65457ed remove unnecessary comments 2014-05-05 13:46:26 +03:00
0c9a4af8ab improve naming consistency 2014-05-03 18:02:06 +03:00
cc94c1b584 resolve #167 2014-05-02 18:21:10 +03:00
e8d8801db4 resolve #135 2014-05-01 02:44:35 +03:00
521803cdcd resolve #136 2014-05-01 02:42:01 +03:00
0eb480324c resolve #145 2014-05-01 02:02:14 +03:00
7c78aff578 resolve #163 2014-05-01 01:47:14 +03:00
23 changed files with 519 additions and 349 deletions

View File

@ -18,17 +18,19 @@ class Parsedown
# #
# Philosophy # Philosophy
# Markdown is intended to be easy-to-read by humans - those of us who read # Parsedown recognises that the Markdown syntax is optimised for humans so
# line by line, left to right, top to bottom. In order to take advantage of # it tries to read like one. It goes through text line by line. It looks at
# this, Parsedown tries to read in a similar way. It breaks texts into # how lines start to identify blocks. It looks for special characters to
# lines, it iterates through them and it looks at how they start and relate # identify inline elements.
# to each other.
# #
# ~ # ~
function text($text) function text($text)
{ {
# make sure no definitions are set
$this->Definitions = array();
# standardize line breaks # standardize line breaks
$text = str_replace("\r\n", "\n", $text); $text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text); $text = str_replace("\r", "\n", $text);
@ -48,9 +50,6 @@ class Parsedown
# trim line breaks # trim line breaks
$markup = trim($markup, "\n"); $markup = trim($markup, "\n");
# clean up
$this->definitions = array();
return $markup; return $markup;
} }
@ -58,6 +57,8 @@ class Parsedown
# Setters # Setters
# #
private $breaksEnabled;
function setBreaksEnabled($breaksEnabled) function setBreaksEnabled($breaksEnabled)
{ {
$this->breaksEnabled = $breaksEnabled; $this->breaksEnabled = $breaksEnabled;
@ -65,13 +66,20 @@ class Parsedown
return $this; return $this;
} }
private $breaksEnabled; private $markupEscaped;
function setMarkupEscaped($markupEscaped)
{
$this->markupEscaped = $markupEscaped;
return $this;
}
# #
# Blocks # Lines
# #
protected $blockMarkers = array( protected $BlockTypes = array(
'#' => array('Atx'), '#' => array('Atx'),
'*' => array('Rule', 'List'), '*' => array('Rule', 'List'),
'+' => array('List'), '+' => array('List'),
@ -87,24 +95,31 @@ class Parsedown
'8' => array('List'), '8' => array('List'),
'9' => array('List'), '9' => array('List'),
':' => array('Table'), ':' => array('Table'),
'<' => array('Markup'), '<' => array('Comment', 'Markup'),
'=' => array('Setext'), '=' => array('Setext'),
'>' => array('Quote'), '>' => array('Quote'),
'[' => array('Reference'),
'_' => array('Rule'), '_' => array('Rule'),
'`' => array('FencedCode'), '`' => array('FencedCode'),
'|' => array('Table'), '|' => array('Table'),
'~' => array('FencedCode'), '~' => array('FencedCode'),
); );
protected $definitionMarkers = array( # ~
protected $DefinitionTypes = array(
'[' => array('Reference'), '[' => array('Reference'),
); );
# ~
protected $unmarkedBlockTypes = array( protected $unmarkedBlockTypes = array(
'CodeBlock', 'CodeBlock',
); );
#
# Blocks
#
private function lines(array $lines) private function lines(array $lines)
{ {
$CurrentBlock = null; $CurrentBlock = null;
@ -134,7 +149,7 @@ class Parsedown
$Line = array('body' => $line, 'indent' => $indent, 'text' => $text); $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
# Multiline block types define "addTo" methods. # ~
if (isset($CurrentBlock['incomplete'])) if (isset($CurrentBlock['incomplete']))
{ {
@ -161,17 +176,15 @@ class Parsedown
$marker = $text[0]; $marker = $text[0];
# Definitions if (isset($this->DefinitionTypes[$marker]))
if (isset($this->definitionMarkers[$marker]))
{ {
foreach ($this->definitionMarkers[$marker] as $definitionType) foreach ($this->DefinitionTypes[$marker] as $definitionType)
{ {
$Definition = $this->{'identify'.$definitionType}($Line, $CurrentBlock); $Definition = $this->{'identify'.$definitionType}($Line, $CurrentBlock);
if (isset($Definition)) if (isset($Definition))
{ {
$this->definitions[$definitionType][$Definition['id']] = $Definition['data']; $this->Definitions[$definitionType][$Definition['id']] = $Definition['data'];
continue 2; continue 2;
} }
@ -182,9 +195,9 @@ class Parsedown
$blockTypes = $this->unmarkedBlockTypes; $blockTypes = $this->unmarkedBlockTypes;
if (isset($this->blockMarkers[$marker])) if (isset($this->BlockTypes[$marker]))
{ {
foreach ($this->blockMarkers[$marker] as $blockType) foreach ($this->BlockTypes[$marker] as $blockType)
{ {
$blockTypes []= $blockType; $blockTypes []= $blockType;
} }
@ -195,23 +208,19 @@ class Parsedown
foreach ($blockTypes as $blockType) foreach ($blockTypes as $blockType)
{ {
# Block types define "identify" methods.
$Block = $this->{'identify'.$blockType}($Line, $CurrentBlock); $Block = $this->{'identify'.$blockType}($Line, $CurrentBlock);
if (isset($Block)) if (isset($Block))
{ {
$Block['type'] = $blockType; $Block['type'] = $blockType;
if ( ! isset($Block['identified'])) # » if ( ! isset($Block['identified']))
{ {
$Elements []= $CurrentBlock['element']; $Elements []= $CurrentBlock['element'];
$Block['identified'] = true; $Block['identified'] = true;
} }
# Multiline block types define "addTo" methods.
if (method_exists($this, 'addTo'.$blockType)) if (method_exists($this, 'addTo'.$blockType))
{ {
$Block['incomplete'] = true; $Block['incomplete'] = true;
@ -225,7 +234,7 @@ class Parsedown
# ~ # ~
if ($CurrentBlock['type'] === 'Paragraph' and ! isset($CurrentBlock['interrupted'])) if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
{ {
$CurrentBlock['element']['text'] .= "\n".$text; $CurrentBlock['element']['text'] .= "\n".$text;
} }
@ -233,15 +242,9 @@ class Parsedown
{ {
$Elements []= $CurrentBlock['element']; $Elements []= $CurrentBlock['element'];
$CurrentBlock = array( $CurrentBlock = $this->buildParagraph($Line);
'type' => 'Paragraph',
'identified' => true, $CurrentBlock['identified'] = true;
'element' => array(
'name' => 'p',
'text' => $text,
'handler' => 'line',
),
);
} }
} }
@ -285,7 +288,7 @@ class Parsedown
$Block = array( $Block = array(
'element' => array( 'element' => array(
'name' => 'h'.$level, 'name' => 'h' . min(6, $level),
'text' => $text, 'text' => $text,
'handler' => 'line', 'handler' => 'line',
), ),
@ -296,15 +299,22 @@ class Parsedown
} }
# #
# Rule # Code
protected function identifyRule($Line) protected function identifyCodeBlock($Line)
{ {
if (preg_match('/^(['.$Line['text'][0].'])([ ]{0,2}\1){2,}[ ]*$/', $Line['text'])) if ($Line['indent'] >= 4)
{ {
$text = substr($Line['body'], 4);
$Block = array( $Block = array(
'element' => array( 'element' => array(
'name' => 'hr' 'name' => 'pre',
'handler' => 'element',
'text' => array(
'name' => 'code',
'text' => $text,
),
), ),
); );
@ -312,104 +322,77 @@ class Parsedown
} }
} }
# protected function addToCodeBlock($Line, $Block)
# Reference
protected function identifyReference($Line)
{ {
if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) if ($Line['indent'] >= 4)
{ {
$Definition = array( if (isset($Block['interrupted']))
'id' => strtolower($matches[1]),
'data' => array(
'url' => $matches[2],
),
);
if (isset($matches[3]))
{ {
$Definition['data']['title'] = $matches[3]; $Block['element']['text']['text'] .= "\n";
unset($Block['interrupted']);
} }
return $Definition; $Block['element']['text']['text'] .= "\n";
$text = substr($Line['body'], 4);
$Block['element']['text']['text'] .= $text;
return $Block;
} }
} }
# protected function completeCodeBlock($Block)
# Setext
protected function identifySetext($Line, array $Block = null)
{ {
if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) $text = $Block['element']['text']['text'];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$Block['element']['text']['text'] = $text;
return $Block;
}
#
# Comment
protected function identifyComment($Line)
{
if ($this->markupEscaped)
{ {
return; return;
} }
if (chop($Line['text'], $Line['text'][0]) === '') if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
{ {
$Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
return $Block;
}
}
#
# Markup
protected function identifyMarkup($Line)
{
if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>\/]*)?(\/?)[ ]*>/', $Line['text'], $matches))
{
if (in_array($matches[1], $this->textLevelElements))
{
return;
}
$Block = array( $Block = array(
'element' => $Line['body'], 'element' => $Line['body'],
); );
if ($matches[2] or $matches[1] === 'hr' or preg_match('/<\/'.$matches[1].'>[ ]*$/', $Line['text'])) if (preg_match('/-->$/', $Line['text']))
{ {
$Block['closed'] = true; $Block['closed'] = true;
} }
else
{
$Block['depth'] = 0;
$Block['start'] = '<'.$matches[1].'>';
$Block['end'] = '</'.$matches[1].'>';
}
return $Block; return $Block;
} }
} }
protected function addToMarkup($Line, array $Block) protected function addToComment($Line, array $Block)
{ {
if (isset($Block['closed'])) if (isset($Block['closed']))
{ {
return; return;
} }
if (stripos($Line['text'], $Block['start']) !== false) # opening tag $Block['element'] .= "\n" . $Line['body'];
{
$Block['depth'] ++;
}
if (stripos($Line['text'], $Block['end']) !== false) # closing tag if (preg_match('/-->$/', $Line['text']))
{ {
if ($Block['depth'] > 0) $Block['closed'] = true;
{
$Block['depth'] --;
}
else
{
$Block['closed'] = true;
}
} }
$Block['element'] .= "\n".$Line['body'];
return $Block; return $Block;
} }
@ -418,7 +401,7 @@ class Parsedown
protected function identifyFencedCode($Line) protected function identifyFencedCode($Line)
{ {
if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*(\w+)?[ ]*$/', $Line['text'], $matches)) if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches))
{ {
$Element = array( $Element = array(
'name' => 'code', 'name' => 'code',
@ -593,6 +576,8 @@ class Parsedown
if (isset($Block['interrupted'])) if (isset($Block['interrupted']))
{ {
$Block['element']['text'] []= ''; $Block['element']['text'] []= '';
unset($Block['interrupted']);
} }
$Block['element']['text'] []= $matches[1]; $Block['element']['text'] []= $matches[1];
@ -608,12 +593,111 @@ class Parsedown
} }
} }
#
# Rule
protected function identifyRule($Line)
{
if (preg_match('/^(['.$Line['text'][0].'])([ ]{0,2}\1){2,}[ ]*$/', $Line['text']))
{
$Block = array(
'element' => array(
'name' => 'hr'
),
);
return $Block;
}
}
#
# Setext
protected function identifySetext($Line, array $Block = null)
{
if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
{
return;
}
if (chop($Line['text'], $Line['text'][0]) === '')
{
$Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
return $Block;
}
}
#
# Markup
protected function identifyMarkup($Line)
{
if ($this->markupEscaped)
{
return;
}
if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>]*)?(\/?)[ ]*>/', $Line['text'], $matches))
{
if (in_array($matches[1], $this->textLevelElements))
{
return;
}
$Block = array(
'element' => $Line['body'],
);
if ($matches[2] or in_array($matches[1], $this->voidElements) or preg_match('/<\/'.$matches[1].'>[ ]*$/', $Line['text']))
{
$Block['closed'] = true;
}
else
{
$Block['depth'] = 0;
$Block['name'] = $matches[1];
}
return $Block;
}
}
protected function addToMarkup($Line, array $Block)
{
if (isset($Block['closed']))
{
return;
}
if (preg_match('/<'.$Block['name'].'([ ][^\/]+)?>/', $Line['text'])) # opening tag
{
$Block['depth'] ++;
}
if (stripos($Line['text'], '</'.$Block['name'].'>') !== false) # closing tag
{
if ($Block['depth'] > 0)
{
$Block['depth'] --;
}
else
{
$Block['closed'] = true;
}
}
$Block['element'] .= "\n".$Line['body'];
return $Block;
}
# #
# Table # Table
protected function identifyTable($Line, array $Block = null) protected function identifyTable($Line, array $Block = null)
{ {
if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
{ {
return; return;
} }
@ -764,57 +848,42 @@ class Parsedown
} }
# #
# Code # Definitions
#
protected function identifyCodeBlock($Line) protected function identifyReference($Line)
{ {
if ($Line['indent'] >= 4) if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
{ {
$text = substr($Line['body'], 4); $Definition = array(
'id' => strtolower($matches[1]),
$Block = array( 'data' => array(
'element' => array( 'url' => $matches[2],
'name' => 'pre',
'handler' => 'element',
'text' => array(
'name' => 'code',
'text' => $text,
),
), ),
); );
return $Block; if (isset($matches[3]))
}
}
protected function addToCodeBlock($Line, $Block)
{
if ($Line['indent'] >= 4)
{
if (isset($Block['interrupted']))
{ {
$Block['element']['text']['text'] .= "\n"; $Definition['data']['title'] = $matches[3];
unset($Block['interrupted']);
} }
$Block['element']['text']['text'] .= "\n"; return $Definition;
$text = substr($Line['body'], 4);
$Block['element']['text']['text'] .= $text;
return $Block;
} }
} }
protected function completeCodeBlock($Block) #
# ~
#
protected function buildParagraph($Line)
{ {
$text = $Block['element']['text']['text']; $Block = array(
'element' => array(
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); 'name' => 'p',
'text' => $Line['text'],
$Block['element']['text']['text'] = $text; 'handler' => 'line',
),
);
return $Block; return $Block;
} }
@ -823,7 +892,7 @@ class Parsedown
# ~ # ~
# #
private function element(array $Element) protected function element(array $Element)
{ {
$markup = '<'.$Element['name']; $markup = '<'.$Element['name'];
@ -858,7 +927,7 @@ class Parsedown
return $markup; return $markup;
} }
private function elements(array $Elements) protected function elements(array $Elements)
{ {
$markup = ''; $markup = '';
@ -871,7 +940,7 @@ class Parsedown
$markup .= "\n"; $markup .= "\n";
if (is_string($Element)) # because of markup if (is_string($Element)) # because of Markup
{ {
$markup .= $Element; $markup .= $Element;
@ -890,7 +959,7 @@ class Parsedown
# Spans # Spans
# #
protected $spanMarkers = array( protected $SpanTypes = array(
'!' => array('Link'), # ? '!' => array('Link'), # ?
'&' => array('Ampersand'), '&' => array('Ampersand'),
'*' => array('Emphasis'), '*' => array('Emphasis'),
@ -903,8 +972,14 @@ class Parsedown
'\\' => array('EscapeSequence'), '\\' => array('EscapeSequence'),
); );
# ~
protected $spanMarkerList = '*_!&[</`~\\'; protected $spanMarkerList = '*_!&[</`~\\';
#
# ~
#
public function line($text) public function line($text)
{ {
$markup = ''; $markup = '';
@ -913,17 +988,19 @@ class Parsedown
$markerPosition = 0; $markerPosition = 0;
while ($markedExcerpt = strpbrk($remainder, $this->spanMarkerList)) while ($excerpt = strpbrk($remainder, $this->spanMarkerList))
{ {
$marker = $markedExcerpt[0]; $marker = $excerpt[0];
$markerPosition += strpos($remainder, $marker); $markerPosition += strpos($remainder, $marker);
foreach ($this->spanMarkers[$marker] as $spanType) $Excerpt = array('text' => $excerpt, 'context' => $text);
foreach ($this->SpanTypes[$marker] as $spanType)
{ {
$handler = 'identify'.$spanType; $handler = 'identify'.$spanType;
$Span = $this->$handler($markedExcerpt, $text); $Span = $this->$handler($Excerpt);
if ( ! isset($Span)) if ( ! isset($Span))
{ {
@ -948,7 +1025,7 @@ class Parsedown
$markup .= $this->readPlainText($plainText); $markup .= $this->readPlainText($plainText);
$markup .= isset($Span['element']) ? $this->element($Span['element']) : $Span['markup']; $markup .= isset($Span['markup']) ? $Span['markup'] : $this->element($Span['element']);
$text = substr($text, $Span['position'] + $Span['extent']); $text = substr($text, $Span['position'] + $Span['extent']);
@ -959,7 +1036,7 @@ class Parsedown
continue 2; continue 2;
} }
$remainder = substr($markedExcerpt, 1); $remainder = substr($excerpt, 1);
$markerPosition ++; $markerPosition ++;
} }
@ -973,14 +1050,14 @@ class Parsedown
# ~ # ~
# #
protected function identifyUrl($excerpt, $text) protected function identifyUrl($Excerpt)
{ {
if ( ! isset($excerpt[1]) or $excerpt[1] !== '/') if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '/')
{ {
return; return;
} }
if (preg_match('/\bhttps?:[\/]{2}[^\s]+\b\/*/ui', $text, $matches, PREG_OFFSET_CAPTURE)) if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
{ {
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[0][0]); $url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[0][0]);
@ -998,9 +1075,9 @@ class Parsedown
} }
} }
protected function identifyAmpersand($excerpt) protected function identifyAmpersand($Excerpt)
{ {
if ( ! preg_match('/^&#?\w+;/', $excerpt)) if ( ! preg_match('/^&#?\w+;/', $Excerpt['text']))
{ {
return array( return array(
'markup' => '&amp;', 'markup' => '&amp;',
@ -1009,14 +1086,14 @@ class Parsedown
} }
} }
protected function identifyStrikethrough($excerpt) protected function identifyStrikethrough($Excerpt)
{ {
if ( ! isset($excerpt[1])) if ( ! isset($Excerpt['text'][1]))
{ {
return; return;
} }
if ($excerpt[1] === $excerpt[0] and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $excerpt, $matches)) if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
{ {
return array( return array(
'extent' => strlen($matches[0]), 'extent' => strlen($matches[0]),
@ -1029,12 +1106,12 @@ class Parsedown
} }
} }
protected function identifyEscapeSequence($excerpt) protected function identifyEscapeSequence($Excerpt)
{ {
if (in_array($excerpt[1], $this->specialCharacters)) if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
{ {
return array( return array(
'markup' => $excerpt[1], 'markup' => $Excerpt['text'][1],
'extent' => 2, 'extent' => 2,
); );
} }
@ -1048,9 +1125,9 @@ class Parsedown
); );
} }
protected function identifyUrlTag($excerpt) protected function identifyUrlTag($Excerpt)
{ {
if (strpos($excerpt, '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $excerpt, $matches)) if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $Excerpt['text'], $matches))
{ {
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]); $url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
@ -1067,9 +1144,9 @@ class Parsedown
} }
} }
protected function identifyEmailTag($excerpt) protected function identifyEmailTag($Excerpt)
{ {
if (strpos($excerpt, '>') !== false and preg_match('/<(\S+?@\S+?)>/', $excerpt, $matches)) if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\S+?@\S+?)>/', $Excerpt['text'], $matches))
{ {
return array( return array(
'extent' => strlen($matches[0]), 'extent' => strlen($matches[0]),
@ -1084,9 +1161,14 @@ class Parsedown
} }
} }
protected function identifyTag($excerpt) protected function identifyTag($Excerpt)
{ {
if (strpos($excerpt, '>') !== false and preg_match('/^<\/?\w.*?>/', $excerpt, $matches)) if ($this->markupEscaped)
{
return;
}
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<\/?\w.*?>/', $Excerpt['text'], $matches))
{ {
return array( return array(
'markup' => $matches[0], 'markup' => $matches[0],
@ -1095,11 +1177,11 @@ class Parsedown
} }
} }
protected function identifyInlineCode($excerpt) protected function identifyInlineCode($Excerpt)
{ {
$marker = $excerpt[0]; $marker = $Excerpt['text'][0];
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/', $excerpt, $matches)) if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/', $Excerpt['text'], $matches))
{ {
$text = $matches[2]; $text = $matches[2];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
@ -1114,25 +1196,25 @@ class Parsedown
} }
} }
protected function identifyLink($excerpt) protected function identifyLink($Excerpt)
{ {
$extent = $excerpt[0] === '!' ? 1 : 0; $extent = $Excerpt['text'][0] === '!' ? 1 : 0;
if (strpos($excerpt, ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $excerpt, $matches)) if (strpos($Excerpt['text'], ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $Excerpt['text'], $matches))
{ {
$Link = array('text' => $matches[1], 'label' => strtolower($matches[1])); $Link = array('text' => $matches[1], 'label' => strtolower($matches[1]));
$extent += strlen($matches[0]); $extent += strlen($matches[0]);
$substring = substr($excerpt, $extent); $substring = substr($Excerpt['text'], $extent);
if (preg_match('/^\s*\[(.+?)\]/', $substring, $matches)) if (preg_match('/^\s*\[([^][]+)\]/', $substring, $matches))
{ {
$Link['label'] = strtolower($matches[1]); $Link['label'] = strtolower($matches[1]);
if (isset($this->definitions['Reference'][$Link['label']])) if (isset($this->Definitions['Reference'][$Link['label']]))
{ {
$Link += $this->definitions['Reference'][$Link['label']]; $Link += $this->Definitions['Reference'][$Link['label']];
$extent += strlen($matches[0]); $extent += strlen($matches[0]);
} }
@ -1141,9 +1223,9 @@ class Parsedown
return; return;
} }
} }
elseif (isset($this->definitions['Reference'][$Link['label']])) elseif (isset($this->Definitions['Reference'][$Link['label']]))
{ {
$Link += $this->definitions['Reference'][$Link['label']]; $Link += $this->Definitions['Reference'][$Link['label']];
if (preg_match('/^[ ]*\[\]/', $substring, $matches)) if (preg_match('/^[ ]*\[\]/', $substring, $matches))
{ {
@ -1173,7 +1255,7 @@ class Parsedown
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Link['url']); $url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Link['url']);
if ($excerpt[0] === '!') if ($Excerpt['text'][0] === '!')
{ {
$Element = array( $Element = array(
'name' => 'img', 'name' => 'img',
@ -1206,20 +1288,20 @@ class Parsedown
); );
} }
protected function identifyEmphasis($excerpt) protected function identifyEmphasis($Excerpt)
{ {
if ( ! isset($excerpt[1])) if ( ! isset($Excerpt['text'][1]))
{ {
return; return;
} }
$marker = $excerpt[0]; $marker = $Excerpt['text'][0];
if ($excerpt[1] === $marker and preg_match($this->strongRegex[$marker], $excerpt, $matches)) if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
{ {
$emphasis = 'strong'; $emphasis = 'strong';
} }
elseif (preg_match($this->emRegex[$marker], $excerpt, $matches)) elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
{ {
$emphasis = 'em'; $emphasis = 'em';
} }
@ -1311,7 +1393,7 @@ class Parsedown
# Fields # Fields
# #
protected $definitions; protected $Definitions;
# #
# Read-only # Read-only
@ -1320,25 +1402,29 @@ class Parsedown
'\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!',
); );
protected $strongRegex = array( protected $StrongRegex = array(
'*' => '/^[*]{2}((?:[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', '*' => '/^[*]{2}((?:[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
'_' => '/^__((?:[^_]|_[^_]*_)+?)__(?!_)/us', '_' => '/^__((?:[^_]|_[^_]*_)+?)__(?!_)/us',
); );
protected $emRegex = array( protected $EmRegex = array(
'*' => '/^[*]((?:[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', '*' => '/^[*]((?:[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
'_' => '/^_((?:[^_]|__[^_]*__)+?)_(?!_)\b/us', '_' => '/^_((?:[^_]|__[^_]*__)+?)_(?!_)\b/us',
); );
protected $voidElements = array(
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
);
protected $textLevelElements = array( protected $textLevelElements = array(
'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
'i', 'rp', 'sub', 'code', 'strike', 'marquee', 'i', 'rp', 'del', 'code', 'strike', 'marquee',
'q', 'rt', 'sup', 'font', 'strong', 'q', 'rt', 'ins', 'font', 'strong',
's', 'tt', 'var', 'mark', 's', 'tt', 'sub', 'mark',
'u', 'xm', 'wbr', 'nobr', 'u', 'xm', 'sup', 'nobr',
'ruby', 'var', 'ruby',
'span', 'wbr', 'span',
'time', 'time',
); );
} }

View File

@ -2,16 +2,17 @@
Better [Markdown](http://en.wikipedia.org/wiki/Markdown) parser for PHP. Better [Markdown](http://en.wikipedia.org/wiki/Markdown) parser for PHP.
- [Demo](http://parsedown.org/demo) [[ demo ]](http://parsedown.org/demo)
- [Tests](http://parsedown.org/tests/)
### Features ### Features
* [Fast](http://parsedown.org/speed) * [Fast](http://parsedown.org/speed)
* [Consistent](http://parsedown.org/consistency) * [Consistent](http://parsedown.org/consistency)
* [GitHub Flavored](https://help.github.com/articles/github-flavored-markdown) * [GitHub Flavored](https://help.github.com/articles/github-flavored-markdown)
* [Tested](https://travis-ci.org/erusev/parsedown) in PHP 5.2, 5.3, 5.4, 5.5, 5.6 and [hhvm](http://www.hhvm.com/) * [Tested](http://parsedown.org/tests/) in PHP 5.2, 5.3, 5.4, 5.5, 5.6 and [hhvm](http://www.hhvm.com/)
* Extensible * Extensible
* [Markdown Extra extension](https://github.com/erusev/parsedown-extra) <sup>new</sup>
* [JavaScript port](https://github.com/hkdobrev/parsedown.js) under development <sup>new</sup>
### Installation ### Installation
@ -24,3 +25,19 @@ $Parsedown = new Parsedown();
echo $Parsedown->text('Hello _Parsedown_!'); # prints: <p>Hello <em>Parsedown</em>!</p> echo $Parsedown->text('Hello _Parsedown_!'); # prints: <p>Hello <em>Parsedown</em>!</p>
``` ```
More examples in [the wiki](https://github.com/erusev/parsedown/wiki/Usage) and in [this video tutorial](http://youtu.be/wYZBY8DEikI).
### Questions
**How does Parsedown work?**<br/>
Parsedown recognises that the Markdown syntax is optimised for humans so it tries to read like one. It goes through text line by line. It looks at how lines start to identify blocks. It looks for special characters to identify inline elements.
**Why doesnt Parsedown use namespaces?**<br/>
Using namespaces would mean dropping support for PHP 5.2. We believe that since Parsedown is a single class with an uncommon name, making this trade wouldn't be worth it.
**Is Parsedown compliant with CommonMark?**<br/>
We are [working on it](https://github.com/erusev/parsedown/tree/commonmark).
**Who uses Parsedown?**<br/>
[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references).

View File

@ -2,7 +2,7 @@
<phpunit bootstrap="test/bootstrap.php" colors="true"> <phpunit bootstrap="test/bootstrap.php" colors="true">
<testsuites> <testsuites>
<testsuite> <testsuite>
<file>test/Test.php</file> <file>test/ParsedownTest.php</file>
</testsuite> </testsuite>
</testsuites> </testsuites>
</phpunit> </phpunit>

139
test/ParsedownTest.php Normal file
View File

@ -0,0 +1,139 @@
<?php
class ParsedownTest extends PHPUnit_Framework_TestCase
{
final function __construct($name = null, array $data = array(), $dataName = '')
{
$this->dirs = $this->initDirs();
$this->Parsedown = $this->initParsedown();
parent::__construct($name, $data, $dataName);
}
private $dirs, $Parsedown;
/**
* @return array
*/
protected function initDirs()
{
$dirs []= dirname(__FILE__).'/data/';
return $dirs;
}
/**
* @return Parsedown
*/
protected function initParsedown()
{
$Parsedown = new Parsedown();
return $Parsedown;
}
/**
* @dataProvider data
* @param $test
* @param $dir
*/
function test_($test, $dir)
{
$markdown = file_get_contents($dir . $test . '.md');
$expectedMarkup = file_get_contents($dir . $test . '.html');
$expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
$expectedMarkup = str_replace("\r", "\n", $expectedMarkup);
$actualMarkup = $this->Parsedown->text($markdown);
$this->assertEquals($expectedMarkup, $actualMarkup);
}
function data()
{
$data = array();
foreach ($this->dirs as $dir)
{
$Folder = new DirectoryIterator($dir);
foreach ($Folder as $File)
{
/** @var $File DirectoryIterator */
if ( ! $File->isFile())
{
continue;
}
$filename = $File->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
{
continue;
}
$basename = $File->getBasename('.md');
if (file_exists($dir . $basename . '.html'))
{
$data []= array($basename, $dir);
}
}
}
return $data;
}
public function test_no_markup()
{
$markdownWithHtml = <<<MARKDOWN_WITH_MARKUP
<div>_content_</div>
sparse:
<div>
<div class="inner">
_content_
</div>
</div>
paragraph
<style type="text/css">
p {
color: red;
}
</style>
comment
<!-- html comment -->
MARKDOWN_WITH_MARKUP;
$expectedHtml = <<<EXPECTED_HTML
<p>&lt;div><em>content</em>&lt;/div></p>
<p>sparse:</p>
<p>&lt;div>
&lt;div class="inner">
<em>content</em>
&lt;/div>
&lt;/div></p>
<p>paragraph</p>
<p>&lt;style type="text/css"></p>
<pre><code>p {
color: red;
}</code></pre>
<p>&lt;/style></p>
<p>comment</p>
<p>&lt;!-- html comment --></p>
EXPECTED_HTML;
$parsedownWithNoMarkup = new Parsedown();
$parsedownWithNoMarkup->setMarkupEscaped(true);
$this->assertEquals($expectedHtml, $parsedownWithNoMarkup->text($markdownWithHtml));
}
}

View File

@ -1,65 +0,0 @@
<?php
class Test extends PHPUnit_Framework_TestCase
{
public function __construct($name = null, array $data = array(), $dataName = '')
{
$this->dataDir = dirname(__FILE__).'/data/';
parent::__construct($name, $data, $dataName);
}
private $dataDir;
/**
* @dataProvider data
*/
function test_($filename)
{
$markdown = file_get_contents($this->dataDir . $filename . '.md');
$expectedMarkup = file_get_contents($this->dataDir . $filename . '.html');
$expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
$expectedMarkup = str_replace("\r", "\n", $expectedMarkup);
$actualMarkup = Parsedown::instance()->text($markdown);
$this->assertEquals($expectedMarkup, $actualMarkup);
}
function data()
{
$data = array();
$Folder = new DirectoryIterator($this->dataDir);
foreach ($Folder as $File)
{
/** @var $File DirectoryIterator */
if ( ! $File->isFile())
{
continue;
}
$filename = $File->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
{
continue;
}
$basename = $File->getBasename('.md');
if (file_exists($this->dataDir . $basename . '.html'))
{
$data []= array($basename);
}
}
return $data;
}
}

View File

@ -4,5 +4,6 @@
<h4>h4</h4> <h4>h4</h4>
<h5>h5</h5> <h5>h5</h5>
<h6>h6</h6> <h6>h6</h6>
<h6>h6</h6>
<h1>closed h1</h1> <h1>closed h1</h1>
<p>#</p> <p>#</p>

View File

@ -10,6 +10,8 @@
###### h6 ###### h6
####### h6
# closed h1 # # closed h1 #
# #

View File

@ -1,5 +1,13 @@
<div>_content_</div> <div>_content_</div>
<p>sparse:</p> <p>sparse:</p>
<div> <div>
<div class="inner">
_content_ _content_
</div> </div>
</div>
<p>paragraph</p>
<style type="text/css">
p {
color: red;
}
</style>

View File

@ -3,5 +3,15 @@
sparse: sparse:
<div> <div>
<div class="inner">
_content_ _content_
</div> </div>
</div>
paragraph
<style type="text/css">
p {
color: red;
}
</style>

View File

@ -0,0 +1,5 @@
<!-- single line -->
<p>paragraph</p>
<!--
multiline -->
<p>paragraph</p>

View File

@ -0,0 +1,8 @@
<!-- single line -->
paragraph
<!--
multiline -->
paragraph

View File

@ -1,28 +0,0 @@
<p>Headings:</p>
<h2 id="overview">Overview</h2>
<p>blah</p>
<H2 id="block">Block Elements</H2>
<p>blah</p>
<h3 id="span">
Span Elements
</h3>
<p>blah</p>
<p>Hr's:</p>
<hr>
<p>blah</p>
<hr/>
<p>blah</p>
<hr />
<p>blah</p>
<hr>
<p>blah</p>
<hr/>
<p>blah</p>
<hr />
<p>blah</p>
<hr class="foo" id="bar" />
<p>blah</p>
<hr class="foo" id="bar"/>
<p>blah</p>
<hr class="foo" id="bar" >
<p>blah</p>

View File

@ -1,39 +0,0 @@
Headings:
<h2 id="overview">Overview</h2>
blah
<H2 id="block">Block Elements</H2>
blah
<h3 id="span">
Span Elements
</h3>
blah
Hr's:
<hr>
blah
<hr/>
blah
<hr />
blah
<hr>
blah
<hr/>
blah
<hr />
blah
<hr class="foo" id="bar" />
blah
<hr class="foo" id="bar"/>
blah
<hr class="foo" id="bar" >
blah

View File

@ -1,3 +1,4 @@
<p>an <a href="http://example.com">implicit</a> reference link</p> <p>an <a href="http://example.com">implicit</a> reference link</p>
<p>an <a href="http://example.com">implicit</a> reference link with an empty link definition</p> <p>an <a href="http://example.com">implicit</a> reference link with an empty link definition</p>
<p>an <a href="http://example.com">implicit</a> reference link followed by <a href="http://cnn.com">another</a></p>
<p>an <a href="http://example.com" title="Example">explicit</a> reference link with a title</p> <p>an <a href="http://example.com" title="Example">explicit</a> reference link with a title</p>

View File

@ -4,6 +4,10 @@ an [implicit] reference link
an [implicit][] reference link with an empty link definition an [implicit][] reference link with an empty link definition
an [implicit][] reference link followed by [another][]
[another]: http://cnn.com
an [explicit][example] reference link with a title an [explicit][example] reference link with a title
[example]: http://example.com "Example" [example]: http://example.com "Example"

View File

@ -1,4 +1,6 @@
<blockquote> <blockquote>
<p>quote <p>quote
the rest of it</p> the rest of it</p>
<p>another paragraph
the rest of it</p>
</blockquote> </blockquote>

View File

@ -1,2 +1,5 @@
> quote > quote
the rest of it
> another paragraph
the rest of it the rest of it

View File

@ -1,4 +0,0 @@
<hr />
<p>attributes:</p>
<hr style="background: #9bd;" />
<p>...</p>

View File

@ -1,7 +0,0 @@
<hr />
attributes:
<hr style="background: #9bd;" />
...

View File

@ -0,0 +1,12 @@
<hr>
<p>paragraph</p>
<hr/>
<p>paragraph</p>
<hr />
<p>paragraph</p>
<hr class="foo" id="bar" />
<p>paragraph</p>
<hr class="foo" id="bar"/>
<p>paragraph</p>
<hr class="foo" id="bar" >
<p>paragraph</p>

View File

@ -0,0 +1,12 @@
<hr>
paragraph
<hr/>
paragraph
<hr />
paragraph
<hr class="foo" id="bar" />
paragraph
<hr class="foo" id="bar"/>
paragraph
<hr class="foo" id="bar" >
paragraph

View File

@ -1,4 +1,5 @@
<p>an <b>important</b> <a href=''>link</a></p> <p>an <b>important</b> <a href=''>link</a></p>
<p>broken<br/> <p>broken<br/>
line</p> line</p>
<p><b>inline tag</b> at the beginning</p> <p><b>inline tag</b> at the beginning</p>
<p><span><a href="http://example.com">http://example.com</a></span></p>

View File

@ -3,4 +3,6 @@ an <b>important</b> <a href=''>link</a>
broken<br/> broken<br/>
line line
<b>inline tag</b> at the beginning <b>inline tag</b> at the beginning
<span>http://example.com</span>