diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c1a5d9a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Ignore all tests for archive +/test export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.travis.yml export-ignore +/phpunit.xml.dist export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d8a7996 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +composer.lock +vendor/ diff --git a/.travis.yml b/.travis.yml index 256dcf1..a5382fe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,30 @@ language: php -php: - - 7.0 - - 5.6 - - 5.5 - - 5.4 - - 5.3 - - hhvm - - hhvm-nightly +dist: trusty +sudo: false matrix: + include: + - php: 5.3 + dist: precise + - php: 5.4 + - php: 5.5 + - php: 5.6 + - php: 7.0 + - php: 7.1 + - php: 7.2 + - php: nightly + - php: hhvm + - php: hhvm-nightly fast_finish: true allow_failures: + - php: nightly - php: hhvm-nightly + +install: + - composer install --prefer-dist --no-interaction --no-progress + +script: + - vendor/bin/phpunit + - vendor/bin/phpunit test/CommonMarkTestWeak.php || true + - '[ -z "$TRAVIS_TAG" ] || [ "$TRAVIS_TAG" == "$(php -r "require(\"Parsedown.php\"); echo Parsedown::version;")" ]' diff --git a/LICENSE.txt b/LICENSE.txt index baca86f..8e7c764 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013 Emanuil Rusev, erusev.com +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Parsedown.php b/Parsedown.php index d53b65f..a62c38f 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -17,11 +17,24 @@ class Parsedown { # ~ - const version = '1.6.0'; + const version = '1.8.0-beta-5'; # ~ function text($text) + { + $Elements = $this->textElements($text); + + # convert to markup + $markup = $this->elements($Elements); + + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; + } + + protected function textElements($text) { # make sure no definitions are set $this->DefinitionData = array(); @@ -36,12 +49,7 @@ class Parsedown $lines = explode("\n", $text); # iterate through lines to identify blocks - $markup = $this->lines($lines); - - # trim line breaks - $markup = trim($markup, "\n"); - - return $markup; + return $this->linesElements($lines); } # @@ -74,12 +82,49 @@ class Parsedown } protected $urlsLinked = true; - private $relativePath; - function setRelativePath($relativePath) - { - $this->relativePath = $relativePath; - return $this; - } + + private $relativePath; + function setRelativePath($relativePath) + { + $this->relativePath = $relativePath; + return $this; + } + + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + function setStrictMode($strictMode) + { + $this->strictMode = (bool) $strictMode; + + return $this; + } + + protected $strictMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -122,6 +167,12 @@ class Parsedown protected function lines(array $lines) { + return $this->elements($this->linesElements($lines)); + } + + protected function linesElements(array $lines) + { + $Elements = array(); $CurrentBlock = null; foreach ($lines as $line) @@ -130,35 +181,25 @@ class Parsedown { if (isset($CurrentBlock)) { - $CurrentBlock['interrupted'] = true; + $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted']) + ? $CurrentBlock['interrupted'] + 1 : 1 + ); } continue; } - if (strpos($line, "\t") !== false) + while (($beforeTab = strstr($line, "\t", true)) !== false) { - $parts = explode("\t", $line); + $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4; - $line = $parts[0]; - - unset($parts[0]); - - foreach ($parts as $part) - { - $shortage = 4 - mb_strlen($line, 'utf-8') % 4; - - $line .= str_repeat(' ', $shortage); - $line .= $part; - } + $line = $beforeTab + . str_repeat(' ', $shortage) + . substr($line, strlen($beforeTab) + 1) + ; } - $indent = 0; - - while (isset($line[$indent]) and $line[$indent] === ' ') - { - $indent ++; - } + $indent = strspn($line, ' '); $text = $indent > 0 ? substr($line, $indent) : $line; @@ -170,7 +211,8 @@ class Parsedown if (isset($CurrentBlock['continuable'])) { - $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; + $Block = $this->$methodName($Line, $CurrentBlock); if (isset($Block)) { @@ -182,7 +224,8 @@ class Parsedown { if ($this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } } } @@ -208,7 +251,7 @@ class Parsedown foreach ($blockTypes as $blockType) { - $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + $Block = $this->{"block$blockType"}($Line, $CurrentBlock); if (isset($Block)) { @@ -216,7 +259,10 @@ class Parsedown if ( ! isset($Block['identified'])) { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $Block['identified'] = true; } @@ -234,13 +280,21 @@ class Parsedown # ~ - if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) + if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph') { - $CurrentBlock['element']['text'] .= "\n".$text; + $Block = $this->paragraphContinue($Line, $CurrentBlock); + } + + if (isset($Block)) + { + $CurrentBlock = $Block; } else { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $CurrentBlock = $this->paragraph($Line); @@ -252,48 +306,47 @@ class Parsedown if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } # ~ - $Blocks []= $CurrentBlock; - - unset($Blocks[0]); - - # ~ - - $markup = ''; - - foreach ($Blocks as $Block) + if (isset($CurrentBlock)) { - if (isset($Block['hidden'])) - { - continue; - } - - $markup .= "\n"; - $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); + $Elements[] = $this->extractElement($CurrentBlock); } - $markup .= "\n"; - # ~ - return $markup; + return $Elements; + } + + protected function extractElement(array $Component) + { + if ( ! isset($Component['element'])) + { + if (isset($Component['markup'])) + { + $Component['element'] = array('rawHtml' => $Component['markup']); + } + elseif (isset($Component['hidden'])) + { + $Component['element'] = array(); + } + } + + return $Component['element']; } - # - # Allow for plugin extensibility - # protected function isBlockContinuable($Type) { - return method_exists($this, 'block'.$Type.'Continue'); + return method_exists($this, 'block' . $Type . 'Continue'); } protected function isBlockCompletable($Type) { - return method_exists($this, 'block'.$Type.'Complete'); + return method_exists($this, 'block' . $Type . 'Complete'); } # @@ -301,7 +354,7 @@ class Parsedown protected function blockCode($Line, $Block = null) { - if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted'])) { return; } @@ -313,8 +366,7 @@ class Parsedown $Block = array( 'element' => array( 'name' => 'pre', - 'handler' => 'element', - 'text' => array( + 'element' => array( 'name' => 'code', 'text' => $text, ), @@ -331,16 +383,16 @@ class Parsedown { if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= "\n"; $text = substr($Line['body'], 4); - $Block['element']['text']['text'] .= $text; + $Block['element']['element']['text'] .= $text; return $Block; } @@ -348,12 +400,6 @@ class Parsedown protected function blockCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -362,18 +408,21 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') + if (strpos($Line['text'], '$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -389,9 +438,9 @@ class Parsedown return; } - $Block['markup'] .= "\n" . $Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; - if (preg_match('/-->$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -404,33 +453,42 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + $marker = $Line['text'][0]; + + $openerLength = strspn($Line['text'], $marker); + + if ($openerLength < 3) { - $Element = array( - 'name' => 'code', - 'text' => '', - ); - - if (isset($matches[1])) - { - $class = 'language-'.$matches[1]; - - $Element['attributes'] = array( - 'class' => $class, - ); - } - - $Block = array( - 'char' => $Line['text'][0], - 'element' => array( - 'name' => 'pre', - 'handler' => 'element', - 'text' => $Element, - ), - ); - - return $Block; + return; } + + $infostring = trim(substr($Line['text'], $openerLength), "\t "); + + if (strpos($infostring, '`') !== false) + { + return; + } + + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if ($infostring !== '') + { + $Element['attributes'] = array('class' => "language-$infostring"); + } + + $Block = array( + 'char' => $marker, + 'openerLength' => $openerLength, + 'element' => array( + 'name' => 'pre', + 'element' => $Element, + ), + ); + + return $Block; } protected function blockFencedCodeContinue($Line, $Block) @@ -442,33 +500,28 @@ class Parsedown if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) - { - $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] + and chop(substr($Line['text'], $len), ' ') === '' + ) { + $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1); $Block['complete'] = true; return $Block; } - $Block['element']['text']['text'] .= "\n".$Line['body'];; + $Block['element']['element']['text'] .= "\n" . $Line['body']; return $Block; } protected function blockFencedCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -477,61 +530,103 @@ class Parsedown protected function blockHeader($Line) { - if (isset($Line['text'][1])) + $level = strspn($Line['text'], '#'); + + if ($level > 6) { - $level = 1; - - while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') - { - $level ++; - } - - if ($level > 6) - { - return; - } - - $text = trim($Line['text'], '# '); - - $Block = array( - 'element' => array( - 'name' => 'h' . min(6, $level), - 'text' => $text, - 'handler' => 'line', - ), - ); - - return $Block; + return; } + + $text = trim($Line['text'], '#'); + + if ($this->strictMode and isset($text[0]) and $text[0] !== ' ') + { + return; + } + + $text = trim($text, ' '); + + $Block = array( + 'element' => array( + 'name' => 'h' . min(6, $level), + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $text, + 'destination' => 'elements', + ) + ), + ); + + return $Block; } # # List - protected function blockList($Line) + protected function blockList($Line, array $CurrentBlock = null) { - list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]'); - if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) { + $contentIndent = strlen($matches[2]); + + if ($contentIndent >= 5) + { + $contentIndent -= 1; + $matches[1] = substr($matches[1], 0, -$contentIndent); + $matches[3] = str_repeat(' ', $contentIndent) . $matches[3]; + } + elseif ($contentIndent === 0) + { + $matches[1] .= ' '; + } + + $markerWithoutWhitespace = strstr($matches[1], ' ', true); + $Block = array( 'indent' => $Line['indent'], 'pattern' => $pattern, + 'data' => array( + 'type' => $name, + 'marker' => $matches[1], + 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)), + ), 'element' => array( 'name' => $name, - 'handler' => 'elements', + 'elements' => array(), ), ); + $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/'); + + if ($name === 'ol') + { + $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; + + if ($listStart !== '1') + { + if ( + isset($CurrentBlock) + and $CurrentBlock['type'] === 'Paragraph' + and ! isset($CurrentBlock['interrupted']) + ) { + return; + } + + $Block['element']['attributes'] = array('start' => $listStart); + } + } $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $matches[2], - ), + 'handler' => array( + 'function' => 'li', + 'argument' => !empty($matches[3]) ? array($matches[3]) : array(), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } @@ -539,11 +634,29 @@ class Parsedown protected function blockListContinue($Line, array $Block) { - if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) + if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument'])) { + return null; + } + + $requiredIndent = ($Block['indent'] + strlen($Block['data']['marker'])); + + if ($Line['indent'] < $requiredIndent + and ( + ( + $Block['data']['type'] === 'ol' + and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) or ( + $Block['data']['type'] === 'ul' + and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) + ) + ) { if (isset($Block['interrupted'])) { - $Block['li']['text'] []= ''; + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; unset($Block['interrupted']); } @@ -552,45 +665,73 @@ class Parsedown $text = isset($matches[1]) ? $matches[1] : ''; + $Block['indent'] = $Line['indent']; + $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $text, - ), + 'handler' => array( + 'function' => 'li', + 'argument' => array($text), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } + elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line)) + { + return null; + } if ($Line['text'][0] === '[' and $this->blockReference($Line)) { return $Block; } + if ($Line['indent'] >= $requiredIndent) + { + if (isset($Block['interrupted'])) + { + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; + + unset($Block['interrupted']); + } + + $text = substr($Line['body'], $requiredIndent); + + $Block['li']['handler']['argument'] []= $text; + + return $Block; + } + if ( ! isset($Block['interrupted'])) { - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); - $Block['li']['text'] []= $text; + $Block['li']['handler']['argument'] []= $text; return $Block; } + } - if ($Line['indent'] > 0) + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) { - $Block['li']['text'] []= ''; - - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); - - $Block['li']['text'] []= $text; - - unset($Block['interrupted']); - - return $Block; + foreach ($Block['element']['elements'] as &$li) + { + if (end($li['handler']['argument']) !== '') + { + $li['handler']['argument'] []= ''; + } + } } + + return $Block; } # @@ -598,13 +739,16 @@ class Parsedown protected function blockQuote($Line) { - if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block = array( 'element' => array( 'name' => 'blockquote', - 'handler' => 'lines', - 'text' => (array) $matches[1], + 'handler' => array( + 'function' => 'linesElements', + 'argument' => (array) $matches[1], + 'destination' => 'elements', + ) ), ); @@ -614,23 +758,21 @@ class Parsedown protected function blockQuoteContinue($Line, array $Block) { - if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (isset($Block['interrupted'])) { - if (isset($Block['interrupted'])) - { - $Block['element']['text'] []= ''; + return; + } - unset($Block['interrupted']); - } - - $Block['element']['text'] []= $matches[1]; + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) + { + $Block['element']['handler']['argument'] []= $matches[1]; return $Block; } if ( ! isset($Block['interrupted'])) { - $Block['element']['text'] []= $Line['text']; + $Block['element']['handler']['argument'] []= $Line['text']; return $Block; } @@ -641,11 +783,13 @@ class Parsedown protected function blockRule($Line) { - if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + $marker = $Line['text'][0]; + + if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '') { $Block = array( 'element' => array( - 'name' => 'hr' + 'name' => 'hr', ), ); @@ -658,12 +802,12 @@ class Parsedown protected function blockSetextHeader($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (chop($Line['text'], $Line['text'][0]) === '') + if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '') { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; @@ -676,12 +820,12 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches)) { $element = strtolower($matches[1]); @@ -692,72 +836,24 @@ class Parsedown $Block = array( 'name' => $matches[1], - 'depth' => 0, - 'markup' => $Line['text'], + 'element' => array( + 'rawHtml' => $Line['text'], + 'autobreak' => true, + ), ); - $length = strlen($matches[0]); - - $remainder = substr($Line['text'], $length); - - if (trim($remainder) === '') - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - $Block['closed'] = true; - - $Block['void'] = true; - } - } - else - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - return; - } - - if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) - { - $Block['closed'] = true; - } - } - return $Block; } } protected function blockMarkupContinue($Line, array $Block) { - if (isset($Block['closed'])) + if (isset($Block['closed']) or isset($Block['interrupted'])) { return; } - if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open - { - $Block['depth'] ++; - } - - if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close - { - if ($Block['depth'] > 0) - { - $Block['depth'] --; - } - else - { - $Block['closed'] = true; - } - } - - if (isset($Block['interrupted'])) - { - $Block['markup'] .= "\n"; - - unset($Block['interrupted']); - } - - $Block['markup'] .= "\n".$Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; return $Block; } @@ -767,24 +863,20 @@ class Parsedown protected function blockReference($Line) { - if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) - { + if (strpos($Line['text'], ']') !== false + and preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) + ) { $id = strtolower($matches[1]); $Data = array( 'url' => $matches[2], - 'title' => null, + 'title' => isset($matches[3]) ? $matches[3] : null, ); - if (isset($matches[3])) - { - $Data['title'] = $matches[3]; - } - $this->DefinitionData['Reference'][$id] = $Data; $Block = array( - 'hidden' => true, + 'element' => array(), ); return $Block; @@ -796,109 +888,125 @@ class Parsedown protected function blockTable($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') - { - $alignments = array(); - - $divider = $Line['text']; - - $divider = trim($divider); - $divider = trim($divider, '|'); - - $dividerCells = explode('|', $divider); - - foreach ($dividerCells as $dividerCell) - { - $dividerCell = trim($dividerCell); - - if ($dividerCell === '') - { - continue; - } - - $alignment = null; - - if ($dividerCell[0] === ':') - { - $alignment = 'left'; - } - - if (substr($dividerCell, - 1) === ':') - { - $alignment = $alignment === 'left' ? 'center' : 'right'; - } - - $alignments []= $alignment; - } - - # ~ - - $HeaderElements = array(); - - $header = $Block['element']['text']; - - $header = trim($header); - $header = trim($header, '|'); - - $headerCells = explode('|', $header); - - foreach ($headerCells as $index => $headerCell) - { - $headerCell = trim($headerCell); - - $HeaderElement = array( - 'name' => 'th', - 'text' => $headerCell, - 'handler' => 'line', - ); - - if (isset($alignments[$index])) - { - $alignment = $alignments[$index]; - - $HeaderElement['attributes'] = array( - 'style' => 'text-align: '.$alignment.';', - ); - } - - $HeaderElements []= $HeaderElement; - } - - # ~ - - $Block = array( - 'alignments' => $alignments, - 'identified' => true, - 'element' => array( - 'name' => 'table', - 'handler' => 'elements', - ), - ); - - $Block['element']['text'] []= array( - 'name' => 'thead', - 'handler' => 'elements', - ); - - $Block['element']['text'] []= array( - 'name' => 'tbody', - 'handler' => 'elements', - 'text' => array(), - ); - - $Block['element']['text'][0]['text'] []= array( - 'name' => 'tr', - 'handler' => 'elements', - 'text' => $HeaderElements, - ); - - return $Block; + if ( + strpos($Block['element']['handler']['argument'], '|') === false + and strpos($Line['text'], '|') === false + and strpos($Line['text'], ':') === false + or strpos($Block['element']['handler']['argument'], "\n") !== false + ) { + return; } + + if (chop($Line['text'], ' -:|') !== '') + { + return; + } + + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + return; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['handler']['argument']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + if (count($headerCells) !== count($alignments)) + { + return; + } + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $headerCell, + 'destination' => 'elements', + ) + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'style' => "text-align: $alignment;", + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'elements' => array(), + ), + ); + + $Block['element']['elements'] []= array( + 'name' => 'thead', + ); + + $Block['element']['elements'] []= array( + 'name' => 'tbody', + 'elements' => array(), + ); + + $Block['element']['elements'][0]['elements'] []= array( + 'name' => 'tr', + 'elements' => $HeaderElements, + ); + + return $Block; } protected function blockTableContinue($Line, array $Block) @@ -908,7 +1016,7 @@ class Parsedown return; } - if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|')) { $Elements = array(); @@ -917,22 +1025,27 @@ class Parsedown $row = trim($row); $row = trim($row, '|'); - preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); - foreach ($matches[0] as $index => $cell) + $cells = array_slice($matches[0], 0, count($Block['alignments'])); + + foreach ($cells as $index => $cell) { $cell = trim($cell); $Element = array( 'name' => 'td', - 'handler' => 'line', - 'text' => $cell, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $cell, + 'destination' => 'elements', + ) ); if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( - 'style' => 'text-align: '.$Block['alignments'][$index].';', + 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', ); } @@ -941,11 +1054,10 @@ class Parsedown $Element = array( 'name' => 'tr', - 'handler' => 'elements', - 'text' => $Elements, + 'elements' => $Elements, ); - $Block['element']['text'][1]['text'] []= $Element; + $Block['element']['elements'][1]['elements'] []= $Element; return $Block; } @@ -957,13 +1069,27 @@ class Parsedown protected function paragraph($Line) { - $Block = array( + return array( + 'type' => 'Paragraph', 'element' => array( 'name' => 'p', - 'text' => $Line['text'], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $Line['text'], + 'destination' => 'elements', + ), ), ); + } + + protected function paragraphContinue($Line, array $Block) + { + if (isset($Block['interrupted'])) + { + return; + } + + $Block['element']['handler']['argument'] .= "\n".$Line['text']; return $Block; } @@ -973,13 +1099,11 @@ class Parsedown # protected $InlineTypes = array( - '"' => array('SpecialCharacter'), '!' => array('Image'), '&' => array('SpecialCharacter'), '*' => array('Emphasis'), ':' => array('Url'), - '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), - '>' => array('SpecialCharacter'), + '<' => array('UrlTag', 'EmailTag', 'Markup'), '[' => array('Link'), '_' => array('Emphasis'), '`' => array('Code'), @@ -989,15 +1113,25 @@ class Parsedown # ~ - protected $inlineMarkerList = '!"*_&[:<>`~\\'; + protected $inlineMarkerList = '!*_&[:<`~\\'; # # ~ # - public function line($text) + public function line($text, $nonNestables = array()) { - $markup = ''; + return $this->elements($this->lineElements($text, $nonNestables)); + } + + protected function lineElements($text, $nonNestables = array()) + { + $Elements = array(); + + $nonNestables = (empty($nonNestables) + ? array() + : array_combine($nonNestables, $nonNestables) + ); # $excerpt is based on the first occurrence of a marker @@ -1005,13 +1139,20 @@ class Parsedown { $marker = $excerpt[0]; - $markerPosition = strpos($text, $marker); + $markerPosition = strlen($text) - strlen($excerpt); $Excerpt = array('text' => $excerpt, 'context' => $text); foreach ($this->InlineTypes[$marker] as $inlineType) { - $Inline = $this->{'inline'.$inlineType}($Excerpt); + # check to see if the current inline type is nestable in the current context + + if (isset($nonNestables[$inlineType])) + { + continue; + } + + $Inline = $this->{"inline$inlineType"}($Excerpt); if ( ! isset($Inline)) { @@ -1032,14 +1173,23 @@ class Parsedown $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + + $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) + ? array_merge($Inline['element']['nonNestables'], $nonNestables) + : $nonNestables + ; + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); # compile the unmarked text - $markup .= $this->unmarkedText($unmarkedText); + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; # compile the inline - $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + $Elements[] = $this->extractElement($Inline); # remove the examined text $text = substr($text, $Inline['position'] + $Inline['extent']); @@ -1051,29 +1201,57 @@ class Parsedown $unmarkedText = substr($text, 0, $markerPosition + 1); - $markup .= $this->unmarkedText($unmarkedText); + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; $text = substr($text, $markerPosition + 1); } - $markup .= $this->unmarkedText($text); + $InlineText = $this->inlineText($text); + $Elements[] = $InlineText['element']; - return $markup; + foreach ($Elements as &$Element) + { + if ( ! isset($Element['autobreak'])) + { + $Element['autobreak'] = false; + } + } + + return $Elements; } # # ~ # + protected function inlineText($text) + { + $Inline = array( + 'extent' => strlen($text), + 'element' => array(), + ); + + $Inline['element']['elements'] = self::pregReplaceElements( + $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', + array( + array('name' => 'br'), + array('text' => "\n"), + ), + $text + ); + + return $Inline; + } + protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; - if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), @@ -1087,13 +1265,19 @@ class Parsedown protected function inlineEmailTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) - { + $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; + + $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' + . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; + + if (strpos($Excerpt['text'], '>') !== false + and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches) + ){ $url = $matches[1]; if ( ! isset($matches[2])) { - $url = 'mailto:' . $url; + $url = "mailto:$url"; } return array( @@ -1135,8 +1319,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => $emphasis, - 'handler' => 'line', - 'text' => $matches[1], + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1146,7 +1333,7 @@ class Parsedown if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { return array( - 'markup' => $Excerpt['text'][1], + 'element' => array('rawHtml' => $Excerpt['text'][1]), 'extent' => 2, ); } @@ -1181,8 +1368,9 @@ class Parsedown 'name' => 'img', 'attributes' => array( 'src' => $url, - 'alt' => $Link['element']['text'], + 'alt' => $Link['element']['handler']['argument'], ), + 'autobreak' => true, ), ); @@ -1197,8 +1385,12 @@ class Parsedown { $Element = array( 'name' => 'a', - 'handler' => 'line', - 'text' => null, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => null, + 'destination' => 'elements', + ), + 'nonNestables' => array('Url', 'Link'), 'attributes' => array( 'href' => null, 'title' => null, @@ -1209,9 +1401,9 @@ class Parsedown $remainder = $Excerpt['text']; - if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { - $Element['text'] = $matches[1]; + $Element['handler']['argument'] = $matches[1]; $extent += strlen($matches[0]); @@ -1222,7 +1414,7 @@ class Parsedown return; } - if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches)) + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; @@ -1237,14 +1429,14 @@ class Parsedown { if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { - $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; + $definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument']; $definition = strtolower($definition); $extent += strlen($matches[0]); } else { - $definition = strtolower($Element['text']); + $definition = strtolower($Element['handler']['argument']); } if ( ! isset($this->DefinitionData['Reference'][$definition])) @@ -1258,8 +1450,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1268,31 +1458,31 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } @@ -1300,23 +1490,16 @@ class Parsedown protected function inlineSpecialCharacter($Excerpt) { - if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) - { + if ($Excerpt['text'][1] !== ' ' and strpos($Excerpt['text'], ';') !== false + and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) + ) { return array( - 'markup' => '&', - 'extent' => 1, + 'element' => array('rawHtml' => '&' . $matches[1] . ';'), + 'extent' => strlen($matches[0]), ); } - $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); - - if (isset($SpecialCharacter[$Excerpt['text'][0]])) - { - return array( - 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', - 'extent' => 1, - ); - } + return; } protected function inlineStrikethrough($Excerpt) @@ -1332,8 +1515,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'del', - 'text' => $matches[1], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1346,16 +1532,19 @@ class Parsedown return; } - if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) - { + if (strpos($Excerpt['context'], 'http') !== false + and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE) + ) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1366,10 +1555,10 @@ class Parsedown protected function inlineUrlTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); - + $url = $matches[1]; + return array( 'extent' => strlen($matches[0]), 'element' => array( @@ -1387,56 +1576,189 @@ class Parsedown protected function unmarkedText($text) { - if ($this->breaksEnabled) - { - $text = preg_replace('/[ ]*\n/', "
\n", $text); - } - else - { - $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
\n", $text); - $text = str_replace(" \n", "\n", $text); - } - - return $text; + $Inline = $this->inlineText($text); + return $this->element($Inline['element']); } # # Handlers # - protected function element(array $Element) + protected function handle(array $Element) { - $markup = '<'.$Element['name']; - - if (isset($Element['attributes'])) + if (isset($Element['handler'])) { - foreach ($Element['attributes'] as $name => $value) + if (!isset($Element['nonNestables'])) { - if ($value === null) - { - continue; - } - - $markup .= ' '.$name.'="'.$value.'"'; + $Element['nonNestables'] = array(); } - } - if (isset($Element['text'])) - { - $markup .= '>'; - - if (isset($Element['handler'])) + if (is_string($Element['handler'])) { - $markup .= $this->{$Element['handler']}($Element['text']); + $function = $Element['handler']; + $argument = $Element['text']; + unset($Element['text']); + $destination = 'rawHtml'; } else { - $markup .= $Element['text']; + $function = $Element['handler']['function']; + $argument = $Element['handler']['argument']; + $destination = $Element['handler']['destination']; } - $markup .= ''; + $Element[$destination] = $this->{$function}($argument, $Element['nonNestables']); + + if ($destination === 'handler') + { + $Element = $this->handle($Element); + } + + unset($Element['handler']); } - else + + return $Element; + } + + protected function handleElementRecursive(array $Element) + { + return $this->elementApplyRecursive(array($this, 'handle'), $Element); + } + + protected function handleElementsRecursive(array $Elements) + { + return $this->elementsApplyRecursive(array($this, 'handle'), $Elements); + } + + protected function elementApplyRecursive($closure, array $Element) + { + $Element = call_user_func($closure, $Element); + + if (isset($Element['elements'])) + { + $Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']); + } + elseif (isset($Element['element'])) + { + $Element['element'] = $this->elementApplyRecursive($closure, $Element['element']); + } + + return $Element; + } + + protected function elementApplyRecursiveDepthFirst($closure, array $Element) + { + if (isset($Element['elements'])) + { + $Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']); + } + elseif (isset($Element['element'])) + { + $Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']); + } + + $Element = call_user_func($closure, $Element); + + return $Element; + } + + protected function elementsApplyRecursive($closure, array $Elements) + { + foreach ($Elements as &$Element) + { + $Element = $this->elementApplyRecursive($closure, $Element); + } + + return $Elements; + } + + protected function elementsApplyRecursiveDepthFirst($closure, array $Elements) + { + foreach ($Elements as &$Element) + { + $Element = $this->elementApplyRecursiveDepthFirst($closure, $Element); + } + + return $Elements; + } + + protected function element(array $Element) + { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + + # identity map if element has no handler + $Element = $this->handle($Element); + + $hasName = isset($Element['name']); + + $markup = ''; + + if ($hasName) + { + $markup .= '<' . $Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= " $name=\"".self::escape($value).'"'; + } + } + } + + $permitRawHtml = false; + + if (isset($Element['text'])) + { + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + $hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']); + + if ($hasContent) + { + $markup .= $hasName ? '>' : ''; + + if (isset($Element['elements'])) + { + $markup .= $this->elements($Element['elements']); + } + elseif (isset($Element['element'])) + { + $markup .= $this->element($Element['element']); + } + else + { + if (!$permitRawHtml) + { + $markup .= self::escape($text, true); + } + else + { + $markup .= $text; + } + } + + $markup .= $hasName ? '' : ''; + } + elseif ($hasName) { $markup .= ' />'; } @@ -1448,12 +1770,26 @@ class Parsedown { $markup = ''; + $autoBreak = true; + foreach ($Elements as $Element) { - $markup .= "\n" . $this->element($Element); + if (empty($Element)) + { + continue; + } + + $autoBreakNext = (isset($Element['autobreak']) + ? $Element['autobreak'] : isset($Element['name']) + ); + // (autobreak === false) covers both sides of an element + $autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext; + + $markup .= ($autoBreak ? "\n" : '') . $this->element($Element); + $autoBreak = $autoBreakNext; } - $markup .= "\n"; + $markup .= $autoBreak ? "\n" : ''; return $markup; } @@ -1462,21 +1798,49 @@ class Parsedown protected function li($lines) { - $markup = $this->lines($lines); + $Elements = $this->linesElements($lines); - $trimmedMarkup = trim($markup); - - if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '

') - { - $markup = $trimmedMarkup; - $markup = substr($markup, 3); - - $position = strpos($markup, "

"); - - $markup = substr_replace($markup, '', $position, 4); + if ( ! in_array('', $lines) + and isset($Elements[0]) and isset($Elements[0]['name']) + and $Elements[0]['name'] === 'p' + ) { + unset($Elements[0]['name']); } - return $markup; + return $Elements; + } + + # + # AST Convenience + # + + /** + * Replace occurrences $regexp with $Elements in $text. Return an array of + * elements representing the replacement. + */ + protected static function pregReplaceElements($regexp, $Elements, $text) + { + $newElements = array(); + + while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE)) + { + $offset = $matches[0][1]; + $before = substr($text, 0, $offset); + $after = substr($text, $offset + strlen($matches[0][0])); + + $newElements[] = array('text' => $before); + + foreach ($Elements as $Element) + { + $newElements[] = $Element; + } + + $text = $after; + } + + $newElements[] = array('text' => $text); + + return $newElements; } # @@ -1490,10 +1854,83 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if ( ! isset($Element['name'])) + { + unset($Element['attributes']); + return $Element; + } + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) @@ -1520,12 +1957,12 @@ class Parsedown # Read-Only protected $specialCharacters = array( - '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~' ); protected $StrongRegex = array( - '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', - '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us', ); protected $EmRegex = array( @@ -1533,7 +1970,7 @@ class Parsedown '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); - protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', @@ -1544,10 +1981,10 @@ class Parsedown 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'del', 'code', 'strike', 'marquee', 'q', 'rt', 'ins', 'font', 'strong', - 's', 'tt', 'sub', 'mark', - 'u', 'xm', 'sup', 'nobr', - 'var', 'ruby', - 'wbr', 'span', - 'time', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', ); } diff --git a/README.md b/README.md index 7e1f10e..cdf0167 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -> You might also like [Caret](http://caret.io?ref=parsedown) - our Markdown editor for Mac / Windows / Linux. +> I also make [Caret](https://caret.io?ref=parsedown) - a Markdown editor for Mac and PC. ## Parsedown @@ -14,15 +14,26 @@ Better Markdown Parser in PHP ### Features +* One File +* No Dependencies * Super Fast -* [GitHub flavored](https://help.github.com/articles/github-flavored-markdown) * Extensible -* Tested in 5.3 to 7.0 and in HHVM +* [GitHub flavored](https://help.github.com/articles/github-flavored-markdown) +* Tested in 5.3 to 7.2 and in HHVM * [Markdown Extra extension](https://github.com/erusev/parsedown-extra) ### Installation +#### Composer +Install the [composer package] by running the following command: -Include `Parsedown.php` or install [the composer package](https://packagist.org/packages/erusev/parsedown). + composer require erusev/parsedown + +#### Manual +1. Download the "Source code" from the [latest release] +2. Include `Parsedown.php` + +[composer package]: https://packagist.org/packages/erusev/parsedown "The Parsedown package on packagist.org" +[latest release]: https://github.com/erusev/parsedown/releases/latest "The latest release of Parsedown" ### Example @@ -30,15 +41,46 @@ Include `Parsedown.php` or install [the composer package](https://packagist.org/ $Parsedown = new Parsedown(); echo $Parsedown->text('Hello _Parsedown_!'); # prints:

Hello Parsedown!

+// you can also parse inline markdown only +echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! ``` More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). +### Security + +Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. + +To tell Parsedown that it is processing untrusted user-input, use the following: +```php +$parsedown = new Parsedown; +$parsedown->setSafeMode(true); +``` + +If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). + +In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. + +#### Security of Parsedown Extensions + +Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. + +### Escaping HTML +> ⚠️  **WARNING:** This method isn't safe from XSS! + +If you wish to escape HTML **in trusted input**, you can use the following: +```php +$parsedown = new Parsedown; +$parsedown->setMarkupEscaped(true); +``` + +Beware that this still allows users to insert unsafe scripting vectors, such as links like `[xss](javascript:alert%281%29)`. + ### Questions **How does Parsedown work?** -It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line start with a `-` then it perhaps belong to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). +It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. @@ -48,7 +90,7 @@ It passes most of the CommonMark tests. Most of the tests that don't pass deal w **Who uses it?** -[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [Kirby CMS](http://getkirby.com/), [Grav CMS](http://getgrav.org/), [Statamic CMS](http://www.statamic.com/), [Herbie CMS](http://www.getherbie.org/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references). +[Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony demo](https://github.com/symfony/symfony-demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** diff --git a/composer.json b/composer.json index 28145af..f8b40f8 100644 --- a/composer.json +++ b/composer.json @@ -13,9 +13,21 @@ } ], "require": { - "php": ">=5.3.0" + "php": ">=5.3.0", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35" }, "autoload": { "psr-0": {"Parsedown": ""} + }, + "autoload-dev": { + "psr-0": { + "TestParsedown": "test/", + "ParsedownTest": "test/", + "CommonMarkTest": "test/", + "CommonMarkTestWeak": "test/" + } } } diff --git a/phpunit.xml.dist b/phpunit.xml.dist index b2d5e9d..4fe3177 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,8 +1,8 @@ - + test/ParsedownTest.php - \ No newline at end of file + diff --git a/test/CommonMarkTest.php b/test/CommonMarkTest.php deleted file mode 100644 index 9b8d116..0000000 --- a/test/CommonMarkTest.php +++ /dev/null @@ -1,74 +0,0 @@ -setUrlsLinked(false); - - $actualHtml = $Parsedown->text($markdown); - $actualHtml = $this->normalizeMarkup($actualHtml); - - $this->assertEquals($expectedHtml, $actualHtml); - } - - function data() - { - $spec = file_get_contents(self::SPEC_URL); - $spec = strstr($spec, '', true); - - $tests = array(); - $currentSection = ''; - - preg_replace_callback( - '/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$|^#{1,6} *(.*)$/m', - function($matches) use ( & $tests, & $currentSection, & $testCount) { - if (isset($matches[3]) and $matches[3]) { - $currentSection = $matches[3]; - } else { - $testCount++; - $markdown = $matches[1]; - $markdown = preg_replace('/→/', "\t", $markdown); - $expectedHtml = $matches[2]; - $expectedHtml = $this->normalizeMarkup($expectedHtml); - $tests []= array( - $currentSection, # section - $markdown, # markdown - $expectedHtml, # html - ); - } - }, - $spec - ); - - return $tests; - } - - private function normalizeMarkup($markup) - { - $markup = preg_replace("/\n+/", "\n", $markup); - $markup = preg_replace('/^\s+/m', '', $markup); - $markup = preg_replace('/^((?:<[\w]+>)+)\n/m', '$1', $markup); - $markup = preg_replace('/\n((?:<\/[\w]+>)+)$/m', '$1', $markup); - $markup = trim($markup); - - return $markup; - } -} diff --git a/test/CommonMarkTestStrict.php b/test/CommonMarkTestStrict.php new file mode 100644 index 0000000..3837738 --- /dev/null +++ b/test/CommonMarkTestStrict.php @@ -0,0 +1,71 @@ +parsedown = new TestParsedown(); + $this->parsedown->setUrlsLinked(false); + } + + /** + * @dataProvider data + * @param $id + * @param $section + * @param $markdown + * @param $expectedHtml + */ + public function testExample($id, $section, $markdown, $expectedHtml) + { + $actualHtml = $this->parsedown->text($markdown); + $this->assertEquals($expectedHtml, $actualHtml); + } + + /** + * @return array + */ + public function data() + { + $spec = file_get_contents(self::SPEC_URL); + if ($spec === false) { + $this->fail('Unable to load CommonMark spec from ' . self::SPEC_URL); + } + + $spec = str_replace("\r\n", "\n", $spec); + $spec = strstr($spec, '', true); + + $matches = array(); + preg_match_all('/^`{32} example\n((?s).*?)\n\.\n(?:|((?s).*?)\n)`{32}$|^#{1,6} *(.*?)$/m', $spec, $matches, PREG_SET_ORDER); + + $data = array(); + $currentId = 0; + $currentSection = ''; + foreach ($matches as $match) { + if (isset($match[3])) { + $currentSection = $match[3]; + } else { + $currentId++; + $markdown = str_replace('→', "\t", $match[1]); + $expectedHtml = isset($match[2]) ? str_replace('→', "\t", $match[2]) : ''; + + $data[$currentId] = array( + 'id' => $currentId, + 'section' => $currentSection, + 'markdown' => $markdown, + 'expectedHtml' => $expectedHtml + ); + } + } + + return $data; + } +} diff --git a/test/CommonMarkTestWeak.php b/test/CommonMarkTestWeak.php new file mode 100644 index 0000000..ef4081a --- /dev/null +++ b/test/CommonMarkTestWeak.php @@ -0,0 +1,63 @@ +parsedown->getTextLevelElements(); + array_walk($textLevelElements, function (&$element) { + $element = preg_quote($element, '/'); + }); + $this->textLevelElementRegex = '\b(?:' . implode('|', $textLevelElements) . ')\b'; + } + + /** + * @dataProvider data + * @param $id + * @param $section + * @param $markdown + * @param $expectedHtml + */ + public function testExample($id, $section, $markdown, $expectedHtml) + { + $expectedHtml = $this->cleanupHtml($expectedHtml); + + $actualHtml = $this->parsedown->text($markdown); + $actualHtml = $this->cleanupHtml($actualHtml); + + $this->assertEquals($expectedHtml, $actualHtml); + } + + protected function cleanupHtml($markup) + { + // invisible whitespaces at the beginning and end of block elements + // however, whitespaces at the beginning of
 elements do matter
+        $markup = preg_replace(
+            array(
+                '/(<(?!(?:' . $this->textLevelElementRegex . '|\bpre\b))\w+\b[^>]*>(?:<' . $this->textLevelElementRegex . '[^>]*>)*)\s+/s',
+                '/\s+((?:<\/' . $this->textLevelElementRegex . '>)*<\/(?!' . $this->textLevelElementRegex . ')\w+\b>)/s'
+            ),
+            '$1',
+            $markup
+        );
+
+        return $markup;
+    }
+}
diff --git a/test/ParsedownTest.php b/test/ParsedownTest.php
old mode 100644
new mode 100755
index c922ab1..bf40317
--- a/test/ParsedownTest.php
+++ b/test/ParsedownTest.php
@@ -1,6 +1,9 @@
 Parsedown->setSafeMode(substr($test, 0, 3) === 'xss');
+        $this->Parsedown->setStrictMode(substr($test, 0, 6) === 'strict');
+
         $actualMarkup = $this->Parsedown->text($markdown);
 
         $this->assertEquals($expectedMarkup, $actualMarkup);
     }
 
+    function testRawHtml()
+    {
+        $markdown = "```php\nfoobar\n```";
+        $expectedMarkup = '

foobar

'; + $expectedSafeMarkup = '
<p>foobar</p>
'; + + $unsafeExtension = new UnsafeExtension; + $actualMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedMarkup, $actualMarkup); + + $unsafeExtension->setSafeMode(true); + $actualSafeMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); + } + + function testTrustDelegatedRawHtml() + { + $markdown = "```php\nfoobar\n```"; + $expectedMarkup = '

foobar

'; + $expectedSafeMarkup = $expectedMarkup; + + $unsafeExtension = new TrustDelegatedExtension; + $actualMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedMarkup, $actualMarkup); + + $unsafeExtension->setSafeMode(true); + $actualSafeMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); + } + function data() { $data = array(); @@ -119,12 +160,12 @@ MARKDOWN_WITH_MARKUP;

<div>content</div>

sparse:

<div> -<div class="inner"> +<div class="inner"> content </div> </div>

paragraph

-

<style type="text/css"> +

<style type="text/css"> p { color: red; } @@ -132,15 +173,14 @@ color: red;

comment

<!-- html comment -->

EXPECTED_HTML; - $parsedownWithNoMarkup = new Parsedown(); + + $parsedownWithNoMarkup = new TestParsedown(); $parsedownWithNoMarkup->setMarkupEscaped(true); $this->assertEquals($expectedHtml, $parsedownWithNoMarkup->text($markdownWithHtml)); } public function testLateStaticBinding() { - include 'test/TestParsedown.php'; - $parsedown = Parsedown::instance(); $this->assertInstanceOf('Parsedown', $parsedown); diff --git a/test/SampleExtensions.php b/test/SampleExtensions.php new file mode 100644 index 0000000..e855c71 --- /dev/null +++ b/test/SampleExtensions.php @@ -0,0 +1,40 @@ +$text

"; + + return $Block; + } +} + + +class TrustDelegatedExtension extends Parsedown +{ + protected function blockFencedCodeComplete($Block) + { + $text = $Block['element']['element']['text']; + unset($Block['element']['element']['text']); + + // WARNING: There is almost always a better way of doing things! + // + // This behaviour is NOT needed in the demonstrated case. + // Only use this if you are sure that the result being added into + // rawHtml is safe. + // (e.g. using an external parser with escaping capabilities). + $Block['element']['element']['rawHtml'] = "

$text

"; + $Block['element']['element']['allowRawHtmlInSafeMode'] = true; + + return $Block; + } +} diff --git a/test/TestParsedown.php b/test/TestParsedown.php index 7024dfb..2faa0ab 100644 --- a/test/TestParsedown.php +++ b/test/TestParsedown.php @@ -2,4 +2,8 @@ class TestParsedown extends Parsedown { + public function getTextLevelElements() + { + return $this->textLevelElements; + } } diff --git a/test/bootstrap.php b/test/bootstrap.php deleted file mode 100644 index 5f264d2..0000000 --- a/test/bootstrap.php +++ /dev/null @@ -1,3 +0,0 @@ -h6

####### not a heading

closed h1

-

#

\ No newline at end of file +

+

+

# of levels

+

# of levels #

+

heading

\ No newline at end of file diff --git a/test/data/atx_heading.md b/test/data/atx_heading.md index ad97b44..50f991c 100644 --- a/test/data/atx_heading.md +++ b/test/data/atx_heading.md @@ -14,4 +14,12 @@ # closed h1 # -# \ No newline at end of file +# + +## + +# # of levels + +# # of levels # # + +#heading \ No newline at end of file diff --git a/test/data/code_block.html b/test/data/code_block.html index 889b02d..a186e9a 100644 --- a/test/data/code_block.html +++ b/test/data/code_block.html @@ -5,4 +5,9 @@ echo $message;

> not a quote
 - not a list item
-[not a reference]: http://foo.com
\ No newline at end of file +[not a reference]: http://foo.com +
+
foo
+
+
+bar
\ No newline at end of file diff --git a/test/data/code_block.md b/test/data/code_block.md index 2cfc953..badf873 100644 --- a/test/data/code_block.md +++ b/test/data/code_block.md @@ -7,4 +7,11 @@ > not a quote - not a list item - [not a reference]: http://foo.com \ No newline at end of file + [not a reference]: http://foo.com + +--- + + foo + + + bar \ No newline at end of file diff --git a/test/data/deeply_nested_list.html b/test/data/deeply_nested_list.html index d2c7e5a..96efd22 100644 --- a/test/data/deeply_nested_list.html +++ b/test/data/deeply_nested_list.html @@ -1,12 +1,40 @@ +
+
-
  • li
  • - -
  • li
  • +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
  • e
  • +
  • f
  • +
  • g
  • +
  • h
  • +
  • i
  • \ No newline at end of file diff --git a/test/data/deeply_nested_list.md b/test/data/deeply_nested_list.md index 76b7552..82f73b8 100644 --- a/test/data/deeply_nested_list.md +++ b/test/data/deeply_nested_list.md @@ -3,4 +3,24 @@ - li - li - li -- li \ No newline at end of file +- li + +--- + +- level 1 + - level 2 + - level 3 + - level 4 + - level 5 + +--- + +- a + - b + - c + - d + - e + - f + - g + - h +- i \ No newline at end of file diff --git a/test/data/email.html b/test/data/email.html index c40759c..93e0705 100644 --- a/test/data/email.html +++ b/test/data/email.html @@ -1 +1,2 @@ -

    my email is me@example.com

    \ No newline at end of file +

    my email is me@example.com

    +

    html tags shouldn't start an email autolink first.last@example.com

    \ No newline at end of file diff --git a/test/data/email.md b/test/data/email.md index 26b7b6c..00b6969 100644 --- a/test/data/email.md +++ b/test/data/email.md @@ -1 +1,3 @@ -my email is \ No newline at end of file +my email is + +html tags shouldn't start an email autolink first.last@example.com \ No newline at end of file diff --git a/test/data/fenced_code_block.html b/test/data/fenced_code_block.html index 8bdabba..50d39df 100644 --- a/test/data/fenced_code_block.html +++ b/test/data/fenced_code_block.html @@ -3,4 +3,16 @@ $message = 'fenced code block'; echo $message;
    tilde
    -
    echo 'language identifier';
    \ No newline at end of file +
    echo 'language identifier';
    +
    echo 'language identifier with non words';
    +
    <?php
    +echo "Hello World";
    +?>
    +<a href="http://auraphp.com" >Aura Project</a>
    +
    the following isn't quite enough to close
    +```
    +still a fenced code block
    +
    foo
    +
    +
    +bar
    \ No newline at end of file diff --git a/test/data/fenced_code_block.md b/test/data/fenced_code_block.md index cbed8eb..3e4155a 100644 --- a/test/data/fenced_code_block.md +++ b/test/data/fenced_code_block.md @@ -11,4 +11,28 @@ tilde ```php echo 'language identifier'; +``` + +```c# +echo 'language identifier with non words'; +``` + +```html+php + +Aura Project +``` + +```` +the following isn't quite enough to close +``` +still a fenced code block +```` + +``` +foo + + +bar ``` \ No newline at end of file diff --git a/test/data/html_comment.html b/test/data/html_comment.html index 566dc3a..009c309 100644 --- a/test/data/html_comment.html +++ b/test/data/html_comment.html @@ -2,4 +2,10 @@

    paragraph

    -

    paragraph

    \ No newline at end of file +

    paragraph

    +abc + \ No newline at end of file diff --git a/test/data/html_comment.md b/test/data/html_comment.md index 6ddfdb4..27aeb29 100644 --- a/test/data/html_comment.md +++ b/test/data/html_comment.md @@ -5,4 +5,10 @@ paragraph -paragraph \ No newline at end of file +paragraph + +abc + +* abcd +* bbbb +* cccc \ No newline at end of file diff --git a/test/data/inline_link.html b/test/data/inline_link.html index 5ad564a..cef29cf 100644 --- a/test/data/inline_link.html +++ b/test/data/inline_link.html @@ -3,4 +3,5 @@

    (link) in parentheses

    link

    MD Logo

    -

    MD Logo and text

    \ No newline at end of file +

    MD Logo and text

    +

    MD Logo and text

    \ No newline at end of file diff --git a/test/data/inline_link.md b/test/data/inline_link.md index 6bac0b3..1ba24b7 100644 --- a/test/data/inline_link.md +++ b/test/data/inline_link.md @@ -8,4 +8,6 @@ [![MD Logo](http://parsedown.org/md.png)](http://example.com) -[![MD Logo](http://parsedown.org/md.png) and text](http://example.com) \ No newline at end of file +[![MD Logo](http://parsedown.org/md.png) and text](http://example.com) + +[![MD Logo]() and text](http://example.com) \ No newline at end of file diff --git a/test/data/lazy_blockquote.html b/test/data/lazy_blockquote.html index 0a2a2aa..dea3dca 100644 --- a/test/data/lazy_blockquote.html +++ b/test/data/lazy_blockquote.html @@ -1,6 +1,8 @@

    quote the rest of it

    +
    +

    another paragraph the rest of it

    \ No newline at end of file diff --git a/test/data/markup_consecutive_one.html b/test/data/markup_consecutive_one.html new file mode 100644 index 0000000..d4c5005 --- /dev/null +++ b/test/data/markup_consecutive_one.html @@ -0,0 +1,3 @@ +
    Markup
    +_No markdown_ without blank line for **strict** compliance with CommonMark. +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_one.md b/test/data/markup_consecutive_one.md new file mode 100644 index 0000000..18b4dcb --- /dev/null +++ b/test/data/markup_consecutive_one.md @@ -0,0 +1,4 @@ +
    Markup
    +_No markdown_ without blank line for **strict** compliance with CommonMark. + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_one_line.html b/test/data/markup_consecutive_one_line.html new file mode 100644 index 0000000..a89b4fd --- /dev/null +++ b/test/data/markup_consecutive_one_line.html @@ -0,0 +1,4 @@ +
    One markup on +two lines
    +_No markdown_ +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_one_line.md b/test/data/markup_consecutive_one_line.md new file mode 100644 index 0000000..daf945a --- /dev/null +++ b/test/data/markup_consecutive_one_line.md @@ -0,0 +1,5 @@ +
    One markup on +two lines
    +_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_one_stripped.html b/test/data/markup_consecutive_one_stripped.html new file mode 100644 index 0000000..ccc01f1 --- /dev/null +++ b/test/data/markup_consecutive_one_stripped.html @@ -0,0 +1,3 @@ +

    Stripped markup

    +_No markdown_ +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_one_stripped.md b/test/data/markup_consecutive_one_stripped.md new file mode 100644 index 0000000..7f8df0c --- /dev/null +++ b/test/data/markup_consecutive_one_stripped.md @@ -0,0 +1,4 @@ +

    Stripped markup

    +_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two.html b/test/data/markup_consecutive_two.html new file mode 100644 index 0000000..f7e71c7 --- /dev/null +++ b/test/data/markup_consecutive_two.html @@ -0,0 +1,3 @@ +
    First markup

    and second markup on the same line.

    +_No markdown_ +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_two.md b/test/data/markup_consecutive_two.md new file mode 100644 index 0000000..83f3af7 --- /dev/null +++ b/test/data/markup_consecutive_two.md @@ -0,0 +1,4 @@ +
    First markup

    and second markup on the same line.

    +_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two_lines.html b/test/data/markup_consecutive_two_lines.html new file mode 100644 index 0000000..ffa4728 --- /dev/null +++ b/test/data/markup_consecutive_two_lines.html @@ -0,0 +1,4 @@ +
    First markup

    and partial markup +on two lines.

    +_No markdown_ +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_two_lines.md b/test/data/markup_consecutive_two_lines.md new file mode 100644 index 0000000..dc70bb7 --- /dev/null +++ b/test/data/markup_consecutive_two_lines.md @@ -0,0 +1,5 @@ +
    First markup

    and partial markup +on two lines.

    +_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two_stripped.html b/test/data/markup_consecutive_two_stripped.html new file mode 100644 index 0000000..707d6be --- /dev/null +++ b/test/data/markup_consecutive_two_stripped.html @@ -0,0 +1,4 @@ +

    Stripped markup +on two lines

    +_No markdown_ +

    Markdown

    \ No newline at end of file diff --git a/test/data/markup_consecutive_two_stripped.md b/test/data/markup_consecutive_two_stripped.md new file mode 100644 index 0000000..af5b781 --- /dev/null +++ b/test/data/markup_consecutive_two_stripped.md @@ -0,0 +1,5 @@ +

    Stripped markup +on two lines

    +_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/multiline_lists.html b/test/data/multiline_lists.html new file mode 100644 index 0000000..a223792 --- /dev/null +++ b/test/data/multiline_lists.html @@ -0,0 +1,10 @@ +
      +
    1. +

      One +First body copy

      +
    2. +
    3. +

      Two +Last body copy

      +
    4. +
    \ No newline at end of file diff --git a/test/data/multiline_lists.md b/test/data/multiline_lists.md new file mode 100644 index 0000000..6251115 --- /dev/null +++ b/test/data/multiline_lists.md @@ -0,0 +1,5 @@ +1. One + First body copy + +2. Two + Last body copy diff --git a/test/data/ordered_list.html b/test/data/ordered_list.html index b6c5216..b748fc1 100644 --- a/test/data/ordered_list.html +++ b/test/data/ordered_list.html @@ -8,6 +8,9 @@
  • two
  • large numbers:

    -
      +
      1. one
      2. -
      \ No newline at end of file +
    +

    foo 1. the following should not start a list +100.
    +200.

    \ No newline at end of file diff --git a/test/data/ordered_list.md b/test/data/ordered_list.md index b307032..d7e7fc1 100644 --- a/test/data/ordered_list.md +++ b/test/data/ordered_list.md @@ -8,4 +8,8 @@ repeating numbers: large numbers: -123. one \ No newline at end of file +123. one + +foo 1. the following should not start a list +100. +200. \ No newline at end of file diff --git a/test/data/paragraph_list.html b/test/data/paragraph_list.html index ced1c43..00a612c 100644 --- a/test/data/paragraph_list.html +++ b/test/data/paragraph_list.html @@ -8,5 +8,7 @@
  • li

  • -
  • li
  • +
  • +

    li

    +
  • \ No newline at end of file diff --git a/test/data/self-closing_html.md b/test/data/self-closing_html.md index acb2032..61d16a3 100644 --- a/test/data/self-closing_html.md +++ b/test/data/self-closing_html.md @@ -1,12 +1,18 @@
    + paragraph
    + paragraph
    + paragraph
    + paragraph
    + paragraph
    + paragraph \ No newline at end of file diff --git a/test/data/setext_header_spaces.html b/test/data/setext_header_spaces.html new file mode 100644 index 0000000..daf97c0 --- /dev/null +++ b/test/data/setext_header_spaces.html @@ -0,0 +1,12 @@ +

    trailing space

    +

    trailing space

    +

    leading and trailing space

    +

    leading and trailing space

    +

    1 leading space

    +

    1 leading space

    +

    3 leading spaces

    +

    3 leading spaces

    +

    too many leading spaces +==

    +

    too many leading spaces +--

    \ No newline at end of file diff --git a/test/data/setext_header_spaces.md b/test/data/setext_header_spaces.md new file mode 100644 index 0000000..4ac35bb --- /dev/null +++ b/test/data/setext_header_spaces.md @@ -0,0 +1,29 @@ +trailing space +== + +trailing space +-- + +leading and trailing space + == + +leading and trailing space + -- + +1 leading space + == + +1 leading space + -- + +3 leading spaces + == + +3 leading spaces + -- + +too many leading spaces + == + +too many leading spaces + -- \ No newline at end of file diff --git a/test/data/simple_blockquote.html b/test/data/simple_blockquote.html index 8225d57..9f9bfab 100644 --- a/test/data/simple_blockquote.html +++ b/test/data/simple_blockquote.html @@ -8,4 +8,19 @@

    no space after >:

    quote

    +
    +
    +
    +
    +
    +

    Info 1 text

    +
    +
    +
    +
    +
    +
    +

    Info 2 text

    +
    +
    \ No newline at end of file diff --git a/test/data/simple_blockquote.md b/test/data/simple_blockquote.md index 22b6b11..d7c3e12 100644 --- a/test/data/simple_blockquote.md +++ b/test/data/simple_blockquote.md @@ -4,4 +4,10 @@ indented: > quote no space after `>`: ->quote \ No newline at end of file +>quote + +--- + +>>> Info 1 text + +>>> Info 2 text \ No newline at end of file diff --git a/test/data/simple_table.html b/test/data/simple_table.html index 237d7ef..b74a2ec 100644 --- a/test/data/simple_table.html +++ b/test/data/simple_table.html @@ -34,4 +34,42 @@ cell 2.2 - \ No newline at end of file + +
    + + + + + + + + + + + + + + +
    header 1
    cell 1.1
    cell 2.1
    +
    + + + + + + + + + + + + + + +
    header 1
    cell 1.1
    cell 2.1
    +
    +

    Not a table, we haven't ended the paragraph: +header 1 | header 2 +-------- | -------- +cell 1.1 | cell 1.2 +cell 2.1 | cell 2.2

    \ No newline at end of file diff --git a/test/data/simple_table.md b/test/data/simple_table.md index 466d140..42eff5a 100644 --- a/test/data/simple_table.md +++ b/test/data/simple_table.md @@ -8,4 +8,26 @@ cell 2.1 | cell 2.2 header 1 | header 2 :------- | -------- cell 1.1 | cell 1.2 +cell 2.1 | cell 2.2 + +--- + +header 1 +:------- +cell 1.1 +cell 2.1 + +--- + +header 1 +-------| +cell 1.1 +cell 2.1 + +--- + +Not a table, we haven't ended the paragraph: +header 1 | header 2 +-------- | -------- +cell 1.1 | cell 1.2 cell 2.1 | cell 2.2 \ No newline at end of file diff --git a/test/data/sparse_dense_list.html b/test/data/sparse_dense_list.html index 095bc73..58923f8 100644 --- a/test/data/sparse_dense_list.html +++ b/test/data/sparse_dense_list.html @@ -2,6 +2,10 @@
  • li

  • -
  • li
  • -
  • li
  • +
  • +

    li

    +
  • +
  • +

    li

    +
  • \ No newline at end of file diff --git a/test/data/sparse_html.html b/test/data/sparse_html.html index 9e89627..6e0213f 100644 --- a/test/data/sparse_html.html +++ b/test/data/sparse_html.html @@ -1,8 +1,6 @@
    line 1 - -line 2 -line 3 - -line 4 +

    line 2 +line 3

    +

    line 4

    \ No newline at end of file diff --git a/test/data/sparse_list.html b/test/data/sparse_list.html index 452b2b8..9803d27 100644 --- a/test/data/sparse_list.html +++ b/test/data/sparse_list.html @@ -2,7 +2,9 @@
  • li

  • -
  • li
  • +
  • +

    li

    +

  • -

    mixed markers:

    +

    mixed unordered markers:

    + + \ No newline at end of file + +

    mixed ordered markers:

    +
      +
    1. starting at 1, list one
    2. +
    3. number 2, list one
    4. +
    +
      +
    1. starting at 3, list two
    2. +
    \ No newline at end of file diff --git a/test/data/unordered_list.md b/test/data/unordered_list.md index cf62c99..c1b8640 100644 --- a/test/data/unordered_list.md +++ b/test/data/unordered_list.md @@ -1,8 +1,14 @@ - li - li -mixed markers: +mixed unordered markers: * li + li -- li \ No newline at end of file +- li + +mixed ordered markers: + +1. starting at 1, list one +2. number 2, list one +3) starting at 3, list two \ No newline at end of file diff --git a/test/data/xss_attribute_encoding.html b/test/data/xss_attribute_encoding.html new file mode 100644 index 0000000..287ff51 --- /dev/null +++ b/test/data/xss_attribute_encoding.html @@ -0,0 +1,6 @@ +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss"

    +

    xss'

    \ No newline at end of file diff --git a/test/data/xss_attribute_encoding.md b/test/data/xss_attribute_encoding.md new file mode 100644 index 0000000..3d8e0c8 --- /dev/null +++ b/test/data/xss_attribute_encoding.md @@ -0,0 +1,11 @@ +[xss](https://www.example.com") + +![xss](https://www.example.com") + +[xss](https://www.example.com') + +![xss](https://www.example.com') + +![xss"](https://www.example.com) + +![xss'](https://www.example.com) \ No newline at end of file diff --git a/test/data/xss_bad_url.html b/test/data/xss_bad_url.html new file mode 100644 index 0000000..0b216d1 --- /dev/null +++ b/test/data/xss_bad_url.html @@ -0,0 +1,16 @@ +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    +

    xss

    \ No newline at end of file diff --git a/test/data/xss_bad_url.md b/test/data/xss_bad_url.md new file mode 100644 index 0000000..a730952 --- /dev/null +++ b/test/data/xss_bad_url.md @@ -0,0 +1,31 @@ +[xss](javascript:alert(1)) + +[xss]( javascript:alert(1)) + +[xss](javascript://alert(1)) + +[xss](javascript:alert(1)) + +![xss](javascript:alert(1)) + +![xss]( javascript:alert(1)) + +![xss](javascript://alert(1)) + +![xss](javascript:alert(1)) + +[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +[xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +[xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +[xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +![xss]( data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +![xss](data://text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) + +![xss](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) \ No newline at end of file diff --git a/test/data/xss_text_encoding.html b/test/data/xss_text_encoding.html new file mode 100644 index 0000000..e6b3fc5 --- /dev/null +++ b/test/data/xss_text_encoding.html @@ -0,0 +1,7 @@ +

    <script>alert(1)</script>

    +

    <script>

    +

    alert(1)

    +

    </script>

    +

    <script> +alert(1) +</script>

    \ No newline at end of file diff --git a/test/data/xss_text_encoding.md b/test/data/xss_text_encoding.md new file mode 100644 index 0000000..b1051a2 --- /dev/null +++ b/test/data/xss_text_encoding.md @@ -0,0 +1,12 @@ + + + + + + \ No newline at end of file