From 5be449194308a66676cc1323dc9c2d1b770dbc9e Mon Sep 17 00:00:00 2001 From: Emanuil Rusev Date: Thu, 17 Apr 2014 10:59:35 +0300 Subject: [PATCH] make parser class more extensible --- Parsedown.php | 2295 +++++++++++------------ README.md | 4 +- tests/data/code_block.html | 12 +- tests/data/deeply_nested_list.html | 12 +- tests/data/escaping.html | 4 +- tests/data/fenced_code_block.html | 14 +- tests/data/tab-indented_code_block.html | 6 +- tests/data/text_reference.html | 3 +- tests/data/text_reference.md | 5 +- tests/data/whitespace.html | 4 +- 10 files changed, 1153 insertions(+), 1206 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 2ff7d3a..bf7bf9f 100755 --- a/Parsedown.php +++ b/Parsedown.php @@ -17,7 +17,6 @@ class Parsedown { # # Philosophy - # # Markdown is intended to be easy-to-read by humans - those of us who read # line by line, left to right, top to bottom. In order to take advantage of @@ -26,25 +25,9 @@ class Parsedown # to each other. # - # Setters - # + # ~ - # Enables GFM line breaks. - - function setBreaksEnabled($breaksEnabled) - { - $this->breaksEnabled = $breaksEnabled; - - return $this; - } - - private $breaksEnabled = false; - - # - # Methods - # - - function parse($text) + function text($text) { # standardize line breaks $text = str_replace("\r\n", "\n", $text); @@ -60,10 +43,7 @@ class Parsedown $lines = explode("\n", $text); # iterate through lines to identify blocks - $blocks = $this->findBlocks($lines); - - # iterate through blocks to build markup - $markup = $this->compile($blocks); + $markup = $this->lines($lines); # trim line breaks $markup = trim($markup, "\n"); @@ -72,851 +52,796 @@ class Parsedown } # - # Private + # Setters + # - private function findBlocks(array $lines, $blockContext = null) + function setBreaksEnabled($breaksEnabled) { - $block = null; + $this->breaksEnabled = $breaksEnabled; - $context = null; - $contextData = null; + return $this; + } + + private $breaksEnabled; + + # + # Blocks + # + + protected $blockMarkers = array( + '#' => array('Atx'), + '*' => array('Rule', 'List'), + '+' => array('List'), + '-' => array('Setext', 'Table', 'Rule', 'List'), + '0' => array('List'), + '1' => array('List'), + '2' => array('List'), + '3' => array('List'), + '4' => array('List'), + '5' => array('List'), + '6' => array('List'), + '7' => array('List'), + '8' => array('List'), + '9' => array('List'), + '<' => array('Markup'), + '=' => array('Setext'), + '>' => array('Quote'), + '[' => array('Reference'), + '_' => array('Rule'), + '`' => array('FencedCode'), + '|' => array('Table'), + '~' => array('FencedCode'), + ); + + # Draft + protected $definitionMarkers = array( + '[' => array('Reference'), + ); + + protected $unmarkedBlockTypes = array( + 'CodeBlock', + ); + + private function lines(array $lines) + { + $CurrentBlock = null; foreach ($lines as $line) { - $indentedLine = $line; + $indent = 0; - $indentation = 0; - - while(isset($line[$indentation]) and $line[$indentation] === ' ') + while (true) { - $indentation++; - } - - if ($indentation > 0) - { - $line = ltrim($line); - } - - # ~ - - switch ($context) - { - case null: - - $contextData = null; - - if ($line === '') + if (isset($line[$indent])) + { + if ($line[$indent] === ' ') { - continue 2; - } - - break; - - # ~~~ javascript - # var message = 'Hello!'; - - case 'fenced code': - - if ($line === '') - { - $block['content'][0]['content'] .= "\n"; - - continue 2; - } - - if (preg_match('/^[ ]*'.$contextData['marker'].'{3,}[ ]*$/', $line)) - { - $context = null; + $indent ++; } else { - if ($block['content'][0]['content']) - { - $block['content'][0]['content'] .= "\n"; - } - - $string = htmlspecialchars($indentedLine, ENT_NOQUOTES, 'UTF-8'); - - $block['content'][0]['content'] .= $string; - } - - continue 2; - - case 'markup': - - if (stripos($line, $contextData['start']) !== false) # opening tag - { - $contextData['depth']++; - } - - if (stripos($line, $contextData['end']) !== false) # closing tag - { - if ($contextData['depth'] > 0) - { - $contextData['depth']--; - } - else - { - $context = null; - } - } - - $block['content'] .= "\n".$indentedLine; - - continue 2; - - case 'li': - - if ($line === '') - { - $contextData['interrupted'] = true; - - continue 2; - } - - if ($contextData['indentation'] === $indentation and preg_match('/^'.$contextData['marker'].'[ ]+(.*)/', $line, $matches)) - { - if (isset($contextData['interrupted'])) - { - $nestedBlock['content'] []= ''; - - unset($contextData['interrupted']); - } - - unset($nestedBlock); - - $nestedBlock = array( - 'name' => 'li', - 'content type' => 'lines', - 'content' => array( - $matches[1], - ), - ); - - $block['content'] []= & $nestedBlock; - - continue 2; - } - - if (empty($contextData['interrupted'])) - { - $value = $line; - - if ($indentation > $contextData['baseline']) - { - $value = str_repeat(' ', $indentation - $contextData['baseline']) . $value; - } - - $nestedBlock['content'] []= $value; - - continue 2; - } - - if ($indentation > 0) - { - $nestedBlock['content'] []= ''; - - $value = $line; - - if ($indentation > $contextData['baseline']) - { - $value = str_repeat(' ', $indentation - $contextData['baseline']) . $value; - } - - $nestedBlock['content'] []= $value; - - unset($contextData['interrupted']); - - continue 2; - } - - $context = null; - - break; - - case 'quote': - - if ($line === '') - { - $contextData['interrupted'] = true; - - continue 2; - } - - if (preg_match('/^>[ ]?(.*)/', $line, $matches)) - { - $block['content'] []= $matches[1]; - - continue 2; - } - - if (empty($contextData['interrupted'])) - { - $block['content'] []= $line; - - continue 2; - } - - $context = null; - - break; - - case 'code': - - if ($line === '') - { - $contextData['interrupted'] = true; - - continue 2; - } - - if ($indentation >= 4) - { - if (isset($contextData['interrupted'])) - { - $block['content'][0]['content'] .= "\n"; - - unset($contextData['interrupted']); - } - - $block['content'][0]['content'] .= "\n"; - - $string = htmlspecialchars($line, ENT_NOQUOTES, 'UTF-8'); - $string = str_repeat(' ', $indentation - 4) . $string; - - $block['content'][0]['content'] .= $string; - - continue 2; - } - - $context = null; - - break; - - case 'table': - - if ($line === '') - { - $context = null; - - continue 2; - } - - if (strpos($line, '|') !== false) - { - $nestedBlocks = array(); - - $substring = preg_replace('/^[|][ ]*/', '', $line); - $substring = preg_replace('/[|]?[ ]*$/', '', $substring); - - $parts = explode('|', $substring); - - foreach ($parts as $index => $part) - { - $substring = trim($part); - - $nestedBlock = array( - 'name' => 'td', - 'content type' => 'line', - 'content' => $substring, - ); - - if (isset($contextData['alignments'][$index])) - { - $nestedBlock['attributes'] = array( - 'align' => $contextData['alignments'][$index], - ); - } - - $nestedBlocks []= $nestedBlock; - } - - $nestedBlock = array( - 'name' => 'tr', - 'content type' => 'blocks', - 'content' => $nestedBlocks, - ); - - $block['content'][1]['content'] []= $nestedBlock; - - continue 2; - } - - $context = null; - - break; - - case 'paragraph': - - if ($line === '') - { - $block['name'] = 'p'; # dense li - - $context = null; - - continue 2; - } - - if ($line[0] === '=' and chop($line, '=') === '') - { - $block['name'] = 'h1'; - - $context = null; - - continue 2; - } - - if ($line[0] === '-' and chop($line, '-') === '') - { - $block['name'] = 'h2'; - - $context = null; - - continue 2; - } - - if (strpos($line, '|') !== false and strpos($block['content'], '|') !== false and chop($line, ' -:|') === '') - { - $values = array(); - - $substring = trim($line, ' |'); - - $parts = explode('|', $substring); - - foreach ($parts as $part) - { - $substring = trim($part); - - $value = null; - - if ($substring[0] === ':') - { - $value = 'left'; - } - - if (substr($substring, -1) === ':') - { - $value = $value === 'left' ? 'center' : 'right'; - } - - $values []= $value; - } - - # ~ - - $nestedBlocks = array(); - - $substring = preg_replace('/^[|][ ]*/', '', $block['content']); - $substring = preg_replace('/[|]?[ ]*$/', '', $substring); - - $parts = explode('|', $substring); - - foreach ($parts as $index => $part) - { - $substring = trim($part); - - $nestedBlock = array( - 'name' => 'th', - 'content type' => 'line', - 'content' => $substring, - ); - - if (isset($values[$index])) - { - $value = $values[$index]; - - $nestedBlock['attributes'] = array( - 'align' => $value, - ); - } - - $nestedBlocks []= $nestedBlock; - } - - # ~ - - $block = array( - 'name' => 'table', - 'content type' => 'blocks', - 'content' => array(), - ); - - $block['content'] []= array( - 'name' => 'thead', - 'content type' => 'blocks', - 'content' => array(), - ); - - $block['content'] []= array( - 'name' => 'tbody', - 'content type' => 'blocks', - 'content' => array(), - ); - - $block['content'][0]['content'] []= array( - 'name' => 'tr', - 'content type' => 'blocks', - 'content' => array(), - ); - - $block['content'][0]['content'][0]['content'] = $nestedBlocks; - - # ~ - - $context = 'table'; - - $contextData = array( - 'alignments' => $values, - ); - - # ~ - - continue 2; - } - - break; - - default: - - throw new Exception('Unrecognized context - '.$context); - } - - if ($indentation >= 4) - { - $blocks []= $block; - - $string = htmlspecialchars($line, ENT_NOQUOTES, 'UTF-8'); - $string = str_repeat(' ', $indentation - 4) . $string; - - $block = array( - 'name' => 'pre', - 'content type' => 'blocks', - 'content' => array( - array( - 'name' => 'code', - 'content type' => null, - 'content' => $string, - ), - ), - ); - - $context = 'code'; - - continue; - } - - switch ($line[0]) - { - case '#': - - if (isset($line[1])) - { - $blocks []= $block; - - $level = 1; - - while (isset($line[$level]) and $line[$level] === '#') - { - $level++; - } - - $string = trim($line, '# '); - $string = $this->parseLine($string); - - $block = array( - 'name' => 'h'.$level, - 'content type' => 'line', - 'content' => $string, - ); - - $context = null; - - continue 2; - } - - break; - - case '<': - - $position = strpos($line, '>'); - - if ($position > 1) - { - $substring = substr($line, 1, $position - 1); - - $substring = chop($substring); - - if (substr($substring, -1) === '/') - { - $isClosing = true; - - $substring = substr($substring, 0, -1); - } - - $position = strpos($substring, ' '); - - if ($position) - { - $name = substr($substring, 0, $position); - } - else - { - $name = $substring; - } - - $name = strtolower($name); - - if ($name[0] == 'h' and strpos('r123456', $name[1]) !== false) # hr, h1, h2, ... - { - if ($name == 'hr') - { - $isClosing = true; - } - } - elseif ( ! ctype_alpha($name)) - { - break; - } - - if (in_array($name, self::$textLevelElements)) - { - break; - } - - $blocks []= $block; - - $block = array( - 'name' => null, - 'content type' => null, - 'content' => $indentedLine, - ); - - if (isset($isClosing)) - { - unset($isClosing); - - continue 2; - } - - $context = 'markup'; - $contextData = array( - 'start' => '<'.$name.'>', - 'end' => '', - 'depth' => 0, - ); - - if (stripos($line, $contextData['end']) !== false) - { - $context = null; - } - - continue 2; - } - - break; - - case '>': - - if (preg_match('/^>[ ]?(.*)/', $line, $matches)) - { - $blocks []= $block; - - $block = array( - 'name' => 'blockquote', - 'content type' => 'lines', - 'content' => array( - $matches[1], - ), - ); - - $context = 'quote'; - $contextData = array(); - - continue 2; - } - - break; - - case '[': - - $position = strpos($line, ']:'); - - if ($position) - { - $reference = array(); - - $label = substr($line, 1, $position - 1); - $label = strtolower($label); - - $substring = substr($line, $position + 2); - $substring = trim($substring); - - if ($substring === '') - { - break; - } - - if ($substring[0] === '<') - { - $position = strpos($substring, '>'); - - if ($position === false) - { - break; - } - - $reference['link'] = substr($substring, 1, $position - 1); - - $substring = substr($substring, $position + 1); - } - else - { - $position = strpos($substring, ' '); - - if ($position === false) - { - $reference['link'] = $substring; - - $substring = false; - } - else - { - $reference['link'] = substr($substring, 0, $position); - - $substring = substr($substring, $position + 1); - } - } - - if ($substring !== false) - { - if ($substring[0] !== '"' and $substring[0] !== "'" and $substring[0] !== '(') - { - break; - } - - $lastChar = substr($substring, -1); - - if ($lastChar !== '"' and $lastChar !== "'" and $lastChar !== ')') - { - break; - } - - $reference['title'] = substr($substring, 1, -1); - } - - $this->referenceMap[$label] = $reference; - - continue 2; - } - - break; - - case '`': - case '~': - - if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\w+)?[ ]*$/', $line, $matches)) - { - $blocks []= $block; - - $block = array( - 'name' => 'pre', - 'content type' => 'blocks', - 'content' => array( - array( - 'name' => 'code', - 'content type' => null, - 'content' => '', - ), - ), - ); - - if (isset($matches[2])) - { - $block['content'][0]['attributes'] = array( - 'class' => 'language-'.$matches[2], - ); - } - - $context = 'fenced code'; - $contextData = array( - 'marker' => $matches[1][0], - ); - - continue 2; - } - - break; - - case '-': - case '*': - case '_': - - if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $line)) - { - $blocks []= $block; - - $block = array( - 'name' => 'hr', - 'content' => null, - ); - - continue 2; - } - } - - switch (true) - { - case $line[0] <= '-' and preg_match('/^([*+-][ ]+)(.*)/', $line, $matches): - case $line[0] <= '9' and preg_match('/^([0-9]+[.][ ]+)(.*)/', $line, $matches): - - $blocks []= $block; - - $name = $line[0] >= '0' ? 'ol' : 'ul'; - - $block = array( - 'name' => $name, - 'content type' => 'blocks', - 'content' => array(), - ); - - unset($nestedBlock); - - $nestedBlock = array( - 'name' => 'li', - 'content type' => 'lines', - 'content' => array( - $matches[2], - ), - ); - - $block['content'] []= & $nestedBlock; - - $baseline = $indentation + strlen($matches[1]); - - $marker = $line[0] >= '0' ? '[0-9]+[.]' : '[*+-]'; - - $context = 'li'; - $contextData = array( - 'indentation' => $indentation, - 'baseline' => $baseline, - 'marker' => $marker, - 'lines' => array( - $matches[2], - ), - ); - - continue 2; - } - - if ($context === 'paragraph') - { - $block['content'] .= "\n".$line; - - continue; - } - else - { - $blocks []= $block; - - $block = array( - 'name' => 'p', - 'content type' => 'line', - 'content' => $line, - ); - - if ($blockContext === 'li' and empty($blocks[1])) - { - $block['name'] = null; - } - - $context = 'paragraph'; - } - } - - if ($blockContext === 'li' and $block['name'] === null) - { - return $block['content']; - } - - $blocks []= $block; - - unset($blocks[0]); - - return $blocks; - } - - private function compile(array $blocks) - { - $markup = ''; - - foreach ($blocks as $block) - { - $markup .= "\n"; - - if (isset($block['name'])) - { - $markup .= '<'.$block['name']; - - if (isset($block['attributes'])) - { - foreach ($block['attributes'] as $name => $value) - { - $markup .= ' '.$name.'="'.$value.'"'; + break; } } - - if ($block['content'] === null) + else # blank line { - $markup .= ' />'; + if (isset($CurrentBlock)) + { + $CurrentBlock['interrupted'] = true; + } + + continue 2; + } + } + + $text = $indent > 0 ? substr($line, $indent) : $line; + + # ~ + + $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); + + # Multiline block types define "addTo" methods. + + if (isset($CurrentBlock['incomplete'])) + { + $Block = $this->{'addTo'.$CurrentBlock['type']}($Line, $CurrentBlock); + + if (isset($Block)) + { + $CurrentBlock = $Block; continue; } else { - $markup .= '>'; + unset($CurrentBlock['incomplete']); + + if (method_exists($this, 'complete'.$CurrentBlock['type'])) + { + $CurrentBlock = $this->{'complete'.$CurrentBlock['type']}($CurrentBlock); + } } } - switch ($block['content type']) + # ~ + + $blockTypes = $this->unmarkedBlockTypes; + + $marker = $text[0]; + + if (isset($this->blockMarkers[$marker])) { - case null: + foreach ($this->blockMarkers[$marker] as $blockType) + { + $blockTypes []= $blockType; + } + } - $markup .= $block['content']; + # + # ~ - break; + foreach ($blockTypes as $blockType) + { + # Block types define "identify" methods. - case 'line': + $Block = $this->{'identify'.$blockType}($Line, $CurrentBlock); - $markup .= $this->parseLine($block['content']); + if (isset($Block)) + { + $Block['type'] = $blockType; - break; - - case 'lines': - - $result = $this->findBlocks($block['content'], $block['name']); - - if (is_string($result)) # dense li + if ( ! isset($Block['identified'])) # » { - $markup .= $this->parseLine($result); + $elements []= $CurrentBlock['element']; - break; + $Block['identified'] = true; } - $markup .= $this->compile($result); + # Multiline block types define "addTo" methods. - break; + if (method_exists($this, 'addTo'.$blockType)) + { + $Block['incomplete'] = true; + } - case 'blocks': + $CurrentBlock = $Block; - $markup .= $this->compile($block['content']); - - break; + continue 2; + } } - if (isset($block['name'])) + # ~ + + if ($CurrentBlock['type'] === 'Paragraph' and ! isset($CurrentBlock['interrupted'])) { - $markup .= ''; + $CurrentBlock['element']['text'] .= "\n".$text; } + else + { + $elements []= $CurrentBlock['element']; + + $CurrentBlock = array( + 'type' => 'Paragraph', + 'identified' => true, + 'element' => array( + 'name' => 'p', + 'text' => $text, + 'handler' => 'line', + ), + ); + } + } + + $elements []= $CurrentBlock['element']; + + unset($elements[0]); + + # ~ + + $markup = $this->elements($elements); + + # ~ + + return $markup; + } + + # + # Atx + + protected function identifyAtx($Line) + { + if (isset($Line['text'][1])) + { + $level = 1; + + while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') + { + $level ++; + } + + $text = trim($Line['text'], '# '); + + $Block = array( + 'element' => array( + 'name' => 'h'.$level, + 'text' => $text, + 'handler' => 'line', + ), + ); + + return $Block; + } + } + + # + # Rule + + protected function identifyRule($Line) + { + if (preg_match('/^(['.$Line['text'][0].'])([ ]{0,2}\1){2,}[ ]*$/', $Line['text'])) + { + $Block = array( + 'element' => array( + 'name' => 'hr' + ), + ); + + return $Block; + } + } + + # + # Reference + + protected function identifyReference($Line) + { + if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) + { + $label = strtolower($matches[1]); + + $this->references[$label] = array( + 'url' => $matches[2], + ); + + if (isset($matches[3])) + { + $this->references[$label]['title'] = $matches[3]; + } + + $Block = array( + 'element' => null, + ); + + return $Block; + } + } + + # + # Setext + + protected function identifySetext($Line, array $Block = null) + { + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) + { + return; + } + + if (chop($Line['text'], $Line['text'][0]) === '') + { + $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; + + return $Block; + } + } + + # + # Markup + + protected function identifyMarkup($Line) + { + if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>\/]*)?(\/?)[ ]*>/', $Line['text'], $matches)) + { + if (in_array($matches[1], $this->textLevelElements)) + { + return; + } + + $Block = array( + 'element' => $Line['body'], + ); + + if ($matches[2] or $matches[1] === 'hr' or preg_match('/<\/'.$matches[1].'>[ ]*$/', $Line['text'])) + { + $Block['closed'] = true; + } + else + { + $Block['depth'] = 0; + $Block['start'] = '<'.$matches[1].'>'; + $Block['end'] = ''; + } + + return $Block; + } + } + + protected function addToMarkup($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + if (stripos($Line['text'], $Block['start']) !== false) # opening tag + { + $Block['depth'] ++; + } + + if (stripos($Line['text'], $Block['end']) !== false) # closing tag + { + if ($Block['depth'] > 0) + { + $Block['depth'] --; + } + else + { + $Block['closed'] = true; + } + } + + $Block['element'] .= "\n".$Line['body']; + + return $Block; + } + + # + # Fenced Code + + protected function identifyFencedCode($Line) + { + if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*(\w+)?[ ]*$/', $Line['text'], $matches)) + { + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if (isset($matches[2])) + { + $class = 'language-'.$matches[2]; + + $Element['attributes'] = array( + 'class' => $class, + ); + } + + $Block = array( + 'char' => $Line['text'][0], + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => $Element, + ), + ); + + return $Block; + } + } + + protected function addToFencedCode($Line, $Block) + { + if (isset($Block['complete'])) + { + return; + } + + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) + { + $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + + $Block['complete'] = true; + + return $Block; + } + + $string = htmlspecialchars($Line['text'], ENT_NOQUOTES, 'UTF-8'); + + $Block['element']['text']['text'] .= "\n".$string;; + + return $Block; + } + + # + # List + + protected function identifyList($Line) + { + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + + if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'indent' => $Line['indent'], + 'pattern' => $pattern, + 'element' => array( + 'name' => $name, + 'handler' => 'elements', + ), + ); + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $matches[2], + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + } + + protected function addToList($Line, array $Block) + { + if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'[ ]+(.*)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['li']['text'] []= ''; + + unset($Block['interrupted']); + } + + unset($Block['li']); + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $matches[1], + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $text = preg_replace('/^[ ]{0,2}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + return $Block; + } + + if ($Line['indent'] > 0) + { + $Block['li']['text'] []= ''; + + $text = preg_replace('/^[ ]{0,2}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + unset($Block['interrupted']); + + return $Block; + } + } + + # + # Quote + + protected function identifyQuote($Line) + { + if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'element' => array( + 'name' => 'blockquote', + 'handler' => 'lines', + 'text' => (array) $matches[1], + ), + ); + + return $Block; + } + } + + protected function addToQuote($Line, array $Block) + { + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text'] []= ''; + } + + $Block['element']['text'] []= $matches[1]; + + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $Block['element']['text'] []= $Line['text']; + + return $Block; + } + } + + # + # Table + + protected function identifyTable($Line, array $Block = null) + { + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) + { + return; + } + + if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') + { + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + continue; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, -1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['text']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'text' => $headerCell, + 'handler' => 'line', + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'align' => $alignment, + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'handler' => 'elements', + ), + ); + + $Block['element']['text'] []= array( + 'name' => 'thead', + 'handler' => 'elements', + ); + + $Block['element']['text'] []= array( + 'name' => 'tbody', + 'handler' => 'elements', + 'text' => array(), + ); + + $Block['element']['text'][0]['text'] []= array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $HeaderElements, + ); + + return $Block; + } + } + + protected function addToTable($Line, array $Block) + { + if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + { + $Elements = array(); + + $row = $Line['text']; + + $row = trim($row); + $row = trim($row, '|'); + + $cells = explode('|', $row); + + foreach ($cells as $index => $cell) + { + $cell = trim($cell); + + $Element = array( + 'name' => 'td', + 'handler' => 'line', + 'text' => $cell, + ); + + if (isset($Block['alignments'][$index])) + { + $Element['attributes'] = array( + 'align' => $Block['alignments'][$index], + ); + } + + $Elements []= $Element; + } + + $Element = array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $Elements, + ); + + $Block['element']['text'][1]['text'] []= $Element; + + return $Block; + } + } + + # + # Code + + protected function identifyCodeBlock($Line) + { + if ($Line['indent'] >= 4) + { + $text = substr($Line['body'], 4); + $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); + + $Block = array( + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => array( + 'name' => 'code', + 'text' => $text, + ), + ), + ); + + return $Block; + } + } + + protected function addToCodeBlock($Line, $Block) + { + if ($Line['indent'] >= 4) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + $Block['element']['text']['text'] .= "\n"; + + $text = substr($Line['body'], 4); + $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); + + $Block['element']['text']['text'] .= $text; + + return $Block; + } + } + + # + # ~ + # + + private function element(array $Element) + { + $markup = '<'.$Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + $markup .= ' '.$name.'="'.$value.'"'; + } + } + + if (isset($Element['text'])) + { + $markup .= '>'; + + if (isset($Element['handler'])) + { + $markup .= $this->$Element['handler']($Element['text']); + } + else + { + $markup .= $Element['text']; + } + + $markup .= ''; + } + else + { + $markup .= ' />'; + } + + return $markup; + } + + private function elements(array $Elements) + { + $markup = ''; + + foreach ($Elements as $Element) + { + if ($Element === null) + { + continue; + } + + $markup .= "\n"; + + if (is_string($Element)) # because of markup + { + $markup .= $Element; + + continue; + } + + $markup .= $this->element($Element); } $markup .= "\n"; @@ -924,358 +849,394 @@ class Parsedown return $markup; } - private function parseLine($text, $markers = array(" \n", '![', '&', '*', '<', '[', '\\', '_', '`', 'http', '~~')) + # + # Spans + # + + protected $spanMarkers = array( + '!' => array('Link'), # ? + '&' => array('Ampersand'), + '*' => array('Emphasis'), + '/' => array('Url'), + '<' => array('UrlTag', 'EmailTag', 'Tag', 'LessThan'), + '[' => array('Link'), + '_' => array('Emphasis'), + '`' => array('InlineCode'), + '~' => array('Strikethrough'), + '\\' => array('EscapeSequence'), + ); + + protected $spanMarkerList = '*_!&[spanMarkerList)) { - $closestMarker = null; - $closestMarkerIndex = 0; - $closestMarkerPosition = null; + $marker = $markedExcerpt[0]; - foreach ($markers as $index => $marker) + $markerPosition += strpos($remainder, $marker); + + foreach ($this->spanMarkers[$marker] as $spanType) { - $markerPosition = strpos($text, $marker); + $handler = 'identify'.$spanType; - if ($markerPosition === false) + $Span = $this->$handler($markedExcerpt, $text); + + if (isset($Span)) { - unset($markers[$index]); + # The identified span can be ahead of the marker. - continue; - } + if (isset($Span['position']) and $Span['position'] > $markerPosition) + { + continue; + } - if ($closestMarker === null or $markerPosition < $closestMarkerPosition) - { - $closestMarker = $marker; - $closestMarkerIndex = $index; - $closestMarkerPosition = $markerPosition; + # Spans that start at the position of their marker don't have to set a position. + + if ( ! isset($Span['position'])) + { + $Span['position'] = $markerPosition; + } + + $unmarkedText = substr($text, 0, $Span['position']); + + $markup .= $this->readPlainText($unmarkedText); + + $markup .= isset($Span['element']) ? $this->element($Span['element']) : $Span['markup']; + + $text = substr($text, $Span['position'] + $Span['extent']); + + $remainder = $text; + + $markerPosition = 0; + + continue 2; } } - # ~ + $remainder = substr($markedExcerpt, 1); - if ($closestMarker === null or isset($text[$closestMarkerPosition + 1]) === false) + $markerPosition ++; + } + + $markup .= $this->readPlainText($text); + + return $markup; + } + + # + # ~ + # + + protected function identifyUrl($excerpt, $text) + { + if ( ! isset($excerpt[1]) or $excerpt[1] !== '/') + { + return; + } + + if (preg_match('/\bhttps?:[\/]{2}[^\s]+\b\/*/ui', $text, $matches, PREG_OFFSET_CAPTURE)) + { + $url = str_replace(array('&', '<'), array('&', '<'), $matches[0][0]); + + return array( + 'extent' => strlen($matches[0][0]), + 'position' => $matches[0][1], + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + protected function identifyAmpersand($excerpt) + { + if ( ! preg_match('/^&#?\w+;/', $excerpt)) + { + return array( + 'markup' => '&', + 'extent' => 1, + ); + } + } + + protected function identifyStrikethrough($excerpt) + { + if ( ! isset($excerpt[1])) + { + return; + } + + if ($excerpt[1] === $excerpt[0] and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $excerpt, $matches)) + { + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'del', + 'text' => $matches[1], + 'handler' => 'line', + ), + ); + } + } + + protected function identifyEscapeSequence($excerpt) + { + if (in_array($excerpt[1], $this->specialCharacters)) + { + return array( + 'markup' => $excerpt[1], + 'extent' => 2, + ); + } + } + + protected function identifyLessThan() + { + return array( + 'markup' => '<', + 'extent' => 1, + ); + } + + protected function identifyUrlTag($excerpt) + { + if (strpos($excerpt, '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $excerpt, $matches)) + { + $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + protected function identifyEmailTag($excerpt) + { + if (strpos($excerpt, '>') !== false and preg_match('/<(\S+?@\S+?)>/', $excerpt, $matches)) + { + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $matches[1], + 'attributes' => array( + 'href' => 'mailto:'.$matches[1], + ), + ), + ); + } + } + + protected function identifyTag($excerpt) + { + if (strpos($excerpt, '>') !== false and preg_match('/^<\/?\w.*?>/', $excerpt, $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + } + + protected function identifyInlineCode($excerpt) + { + $marker = $excerpt[0]; + + if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), + 'element' => array( + 'name' => 'code', + 'text' => $text, + ), + ); + } + } + + protected function identifyLink($excerpt) + { + $extent = $excerpt[0] === '!' ? 1 : 0; + + if (strpos($excerpt, ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $excerpt, $matches)) + { + $Link = array('text' => $matches[1], 'label' => strtolower($matches[1])); + + $extent += strlen($matches[0]); + + $substring = substr($excerpt, $extent); + + if (preg_match('/^\s*\[(.+?)\]/', $substring, $matches)) { - $markup .= $text; + $Link['label'] = strtolower($matches[1]); - break; + if (isset($this->references[$Link['label']])) + { + $Link += $this->references[$Link['label']]; + + $extent += strlen($matches[0]); + } + else + { + return; + } + } + elseif ($this->references and isset($this->references[$Link['label']])) + { + $Link += $this->references[$Link['label']]; + + if (preg_match('/^[ ]*\[\]/', $substring, $matches)) + { + $extent += strlen($matches[0]); + } + } + elseif (preg_match('/^\([ ]*(.*?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*\)/', $substring, $matches)) + { + $Link['url'] = $matches[1]; + + if (isset($matches[2])) + { + $Link['title'] = $matches[2]; + } + + $extent += strlen($matches[0]); } else { - $markup .= substr($text, 0, $closestMarkerPosition); + return; } - - $text = substr($text, $closestMarkerPosition); - - # ~ - - unset($markers[$closestMarkerIndex]); - - # ~ - - switch ($closestMarker) - { - case " \n": - - $markup .= '
'."\n"; - - $offset = 3; - - break; - - case '![': - case '[': - - if (strpos($text, ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $text, $matches)) - { - $element = array( - '!' => $text[0] === '!', - 'text' => $matches[1], - ); - - $offset = strlen($matches[0]); - - if ($element['!']) - { - $offset++; - } - - $remainingText = substr($text, $offset); - - if ($remainingText[0] === '(' and preg_match('/\([ ]*(.*?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*\)/', $remainingText, $matches)) - { - $element['link'] = $matches[1]; - - if (isset($matches[2])) - { - $element['title'] = $matches[2]; - } - - $offset += strlen($matches[0]); - } - elseif ($this->referenceMap) - { - $reference = $element['text']; - - if (preg_match('/^\s*\[(.*?)\]/', $remainingText, $matches)) - { - $reference = $matches[1] === '' ? $element['text'] : $matches[1]; - - $offset += strlen($matches[0]); - } - - $reference = strtolower($reference); - - if (isset($this->referenceMap[$reference])) - { - $element['link'] = $this->referenceMap[$reference]['link']; - - if (isset($this->referenceMap[$reference]['title'])) - { - $element['title'] = $this->referenceMap[$reference]['title']; - } - } - else - { - unset($element); - } - } - else - { - unset($element); - } - } - - if (isset($element)) - { - $element['link'] = str_replace('&', '&', $element['link']); - $element['link'] = str_replace('<', '<', $element['link']); - - if ($element['!']) - { - $markup .= ''.$element['text'].'parseLine($element['text'], $markers); - - $markup .= ''; - } - - unset($element); - } - else - { - $markup .= $closestMarker; - - $offset = $closestMarker === '![' ? 2 : 1; - } - - break; - - case '&': - - if (preg_match('/^&#?\w+;/', $text, $matches)) - { - $markup .= $matches[0]; - - $offset = strlen($matches[0]); - } - else - { - $markup .= '&'; - - $offset = 1; - } - - break; - - case '*': - case '_': - - if ($text[1] === $closestMarker and preg_match(self::$strongRegex[$closestMarker], $text, $matches)) - { - $markers[$closestMarkerIndex] = $closestMarker; - $matches[1] = $this->parseLine($matches[1], $markers); - - $markup .= ''.$matches[1].''; - } - elseif (preg_match(self::$emRegex[$closestMarker], $text, $matches)) - { - $markers[$closestMarkerIndex] = $closestMarker; - $matches[1] = $this->parseLine($matches[1], $markers); - - $markup .= ''.$matches[1].''; - } - - if (isset($matches) and $matches) - { - $offset = strlen($matches[0]); - } - else - { - $markup .= $closestMarker; - - $offset = 1; - } - - break; - - case '<': - - if (strpos($text, '>') !== false) - { - if ($text[1] === 'h' and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $text, $matches)) - { - $elementUrl = $matches[1]; - $elementUrl = str_replace('&', '&', $elementUrl); - $elementUrl = str_replace('<', '<', $elementUrl); - - $markup .= ''.$elementUrl.''; - - $offset = strlen($matches[0]); - } - elseif (strpos($text, '@') > 1 and preg_match('/<(\S+?@\S+?)>/', $text, $matches)) - { - $markup .= ''.$matches[1].''; - - $offset = strlen($matches[0]); - } - elseif (preg_match('/^<\/?\w.*?>/', $text, $matches)) - { - $markup .= $matches[0]; - - $offset = strlen($matches[0]); - } - else - { - $markup .= '<'; - - $offset = 1; - } - } - else - { - $markup .= '<'; - - $offset = 1; - } - - break; - - case '\\': - - if (in_array($text[1], self::$specialCharacters)) - { - $markup .= $text[1]; - - $offset = 2; - } - else - { - $markup .= '\\'; - - $offset = 1; - } - - break; - - case '`': - - if (preg_match('/^(`+)[ ]*(.+?)[ ]*(?'.$elementText.''; - - $offset = strlen($matches[0]); - } - else - { - $markup .= '`'; - - $offset = 1; - } - - break; - - case 'http': - - if (preg_match('/^https?:[\/]{2}[^\s]+\b\/*/ui', $text, $matches)) - { - $elementUrl = $matches[0]; - $elementUrl = str_replace('&', '&', $elementUrl); - $elementUrl = str_replace('<', '<', $elementUrl); - - $markup .= ''.$elementUrl.''; - - $offset = strlen($matches[0]); - } - else - { - $markup .= 'http'; - - $offset = 4; - } - - break; - - case '~~': - - if (preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $text, $matches)) - { - $matches[1] = $this->parseLine($matches[1], $markers); - - $markup .= ''.$matches[1].''; - - $offset = strlen($matches[0]); - } - else - { - $markup .= '~~'; - - $offset = 2; - } - - break; - } - - if (isset($offset)) - { - $text = substr($text, $offset); - } - - $markers[$closestMarkerIndex] = $closestMarker; + } + else + { + return; + } + + $url = str_replace(array('&', '<'), array('&', '<'), $Link['url']); + + if ($excerpt[0] === '!') + { + $Element = array( + 'name' => 'img', + 'attributes' => array( + 'alt' => $Link['text'], + 'src' => $url, + ), + ); + } + else + { + $Element = array( + 'name' => 'a', + 'handler' => 'line', + 'text' => $Link['text'], + 'attributes' => array( + 'href' => $url, + ), + ); + } + + if (isset($Link['title'])) + { + $Element['attributes']['title'] = $Link['title']; + } + + return array( + 'extent' => $extent, + 'element' => $Element, + ); + } + + protected function identifyEmphasis($excerpt) + { + if ( ! isset($excerpt[1])) + { + return; + } + + $marker = $excerpt[0]; + + if ($excerpt[1] === $marker and preg_match($this->strongRegex[$marker], $excerpt, $matches)) + { + $emphasis = 'strong'; + } + elseif (preg_match($this->emRegex[$marker], $excerpt, $matches)) + { + $emphasis = 'em'; + } + else + { + return; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => $emphasis, + 'handler' => 'line', + 'text' => $matches[1], + ), + ); + } + + # + # ~ + + protected function readPlainText($text) + { + $breakMarker = $this->breaksEnabled ? "\n" : " \n"; + + $text = str_replace($breakMarker, "
\n", $text); + + return $text; + } + + # + # ~ + # + + protected function li($lines) + { + $markup = $this->lines($lines); + + $trimmedMarkup = trim($markup); + + if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '

') + { + $markup = $trimmedMarkup; + $markup = substr($markup, 3); + + $position = strpos($markup, "

"); + + $markup = substr_replace($markup, '', $position, 4); } return $markup; } # - # Static + # Multiton + # static function instance($name = 'default') { @@ -1284,7 +1245,7 @@ class Parsedown return self::$instances[$name]; } - $instance = new Parsedown(); + $instance = new self(); self::$instances[$name] = $instance; @@ -1298,37 +1259,39 @@ class Parsedown # /** - * @deprecated in favor of "setBreaksEnabled" + * @deprecated in favor of "text" */ - function set_breaks_enabled($breaks_enabled) + function parse($text) { - return $this->setBreaksEnabled($breaks_enabled); + $markup = $this->text($text); + + return $markup; } # # Fields # - private $referenceMap = array(); + protected $references = array(); # » Definitions['reference'] # # Read-only - private static $strongRegex = array( + protected $specialCharacters = array( + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', + ); + + protected $strongRegex = array( '*' => '/^[*]{2}((?:[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', '_' => '/^__((?:[^_]|_[^_]*_)+?)__(?!_)/us', ); - private static $emRegex = array( + protected $emRegex = array( '*' => '/^[*]((?:[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', '_' => '/^_((?:[^_]|__[^_]*__)+?)_(?!_)\b/us', ); - private static $specialCharacters = array( - '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', - ); - - private static $textLevelElements = array( + protected $textLevelElements = array( 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'sub', 'code', 'strike', 'marquee', diff --git a/README.md b/README.md index d020143..5d3070a 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Include `Parsedown.php` or install [the composer package](https://packagist.org/ ### Example ``` php -$parsedown = new Parsedown(); +$Parsedown = new Parsedown(); -echo $parsedown->parse('Hello _Parsedown_!'); # prints:

Hello Parsedown!

+echo $Parsedown->text('Hello _Parsedown_!'); # prints:

Hello Parsedown!

``` diff --git a/tests/data/code_block.html b/tests/data/code_block.html index 27b3760..6964ccc 100644 --- a/tests/data/code_block.html +++ b/tests/data/code_block.html @@ -1,12 +1,8 @@ -
-<?php
+
<?php
 
 $message = 'Hello World!';
-echo $message;
-
+echo $message;

-
-> not a quote
+
> not a quote
 - not a list item
-[not a reference]: http://foo.com
-
\ No newline at end of file +[not a reference]: http://foo.com
\ No newline at end of file diff --git a/tests/data/deeply_nested_list.html b/tests/data/deeply_nested_list.html index 245f57b..d2c7e5a 100644 --- a/tests/data/deeply_nested_list.html +++ b/tests/data/deeply_nested_list.html @@ -1,16 +1,12 @@ - +
  • li
  • \ No newline at end of file diff --git a/tests/data/escaping.html b/tests/data/escaping.html index b0bdfec..64676cb 100644 --- a/tests/data/escaping.html +++ b/tests/data/escaping.html @@ -1,6 +1,4 @@

    escaped *emphasis*.

    escaped \*emphasis\* in a code span

    -
    -escaped \*emphasis\* in a code block
    -
    +
    escaped \*emphasis\* in a code block

    \ ` * _ { } [ ] ( ) > # + - . !

    \ No newline at end of file diff --git a/tests/data/fenced_code_block.html b/tests/data/fenced_code_block.html index 548f0df..8bdabba 100644 --- a/tests/data/fenced_code_block.html +++ b/tests/data/fenced_code_block.html @@ -1,12 +1,6 @@ -
    -<?php
    +
    <?php
     
     $message = 'fenced code block';
    -echo $message;
    -
    -
    -tilde
    -
    -
    -echo 'language identifier';
    -
    \ No newline at end of file +echo $message;
    +
    tilde
    +
    echo 'language identifier';
    \ No newline at end of file diff --git a/tests/data/tab-indented_code_block.html b/tests/data/tab-indented_code_block.html index d1b7a03..7c140de 100644 --- a/tests/data/tab-indented_code_block.html +++ b/tests/data/tab-indented_code_block.html @@ -1,8 +1,6 @@ -
    -<?php
    +
    <?php
     
     $message = 'Hello World!';
     echo $message;
     
    -echo "following a blank line";
    -
    \ No newline at end of file +echo "following a blank line";
    \ No newline at end of file diff --git a/tests/data/text_reference.html b/tests/data/text_reference.html index 26587f5..11e4d37 100644 --- a/tests/data/text_reference.html +++ b/tests/data/text_reference.html @@ -3,5 +3,6 @@

    [one][404] with no definition

    multiline one defined on 2 lines

    -

    one with an upper case label

    +

    one with a mixed case label and an upper case definition

    +

    one with the a label on the next line

    link

    \ No newline at end of file diff --git a/tests/data/text_reference.md b/tests/data/text_reference.md index 7768e30..1a66a5c 100644 --- a/tests/data/text_reference.md +++ b/tests/data/text_reference.md @@ -11,8 +11,11 @@ [multiline one][website] defined on 2 lines -[one][label] with an upper case label +[one][Label] with a mixed case label and an upper case definition [LABEL]: http://example.com +[one] +[1] with the a label on the next line + [`link`][website] \ No newline at end of file diff --git a/tests/data/whitespace.html b/tests/data/whitespace.html index db5428f..f2dd7a0 100644 --- a/tests/data/whitespace.html +++ b/tests/data/whitespace.html @@ -1,3 +1 @@ -
    -code
    -
    \ No newline at end of file +
    code
    \ No newline at end of file