diff --git a/Parsedown.php b/Parsedown.php index f3f3e41..c0b8c5f 100755 --- a/Parsedown.php +++ b/Parsedown.php @@ -77,85 +77,114 @@ class Parsedown # split text into lines $lines = explode("\n", $text); - # convert lines into html - $text = $this->parse_block_elements($lines); + # iterate through lines to identify blocks + $blocks = $this->find_blocks($lines); - # remove trailing line breaks - $text = chop($text, "\n"); + # iterate through blocks to build markup + $markup = $this->compile($blocks); - return $text; + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; } # # Private - private function parse_block_elements(array $lines, $context = '') + private function compile(array $blocks) { - $blocks = array(); + $markup = ''; - $block = array( - 'type' => '', - ); - - foreach ($lines as $line) + foreach ($blocks as $block) { - # context + $markup .= "\n"; - switch ($block['type']) + if (isset($block['name'])) { - case 'fenced': + $markup .= '<'.$block['name']; - if ( ! isset($block['closed'])) + if (isset($block['attributes'])) + { + foreach ($block['attributes'] as $name => $value) { - if (preg_match('/^[ ]*'.$block['fence'][0].'{3,}[ ]*$/', $line)) - { - $block['closed'] = true; - } - else - { - if ($block['text'] !== '') - { - $block['text'] .= "\n"; - } - - $block['text'] .= $line; - } - - continue 2; + $markup .= ' '.$name.'="'.$value.'"'; } + } + + if ($block['content'] === null) + { + $markup .= ' />'; + + continue; + } + else + { + $markup .= '>'; + } + } + + switch ($block['content type']) + { + case 'markup': + + $markup .= $block['content']; break; - case 'markup': + case 'markdown': - if ( ! isset($block['closed'])) + $markup .= $this->parse_span_elements($block['content']); + + break; + + case 'markdown lines': + + $result = $this->find_blocks($block['content'], $block['name']); + + if (is_string($result)) # dense li { - if (stripos($line, $block['start']) !== false) # opening tag - { - $block['depth']++; - } + $markup .= $this->parse_span_elements($result); - if (stripos($line, $block['end']) !== false) # closing tag - { - if ($block['depth'] > 0) - { - $block['depth']--; - } - else - { - $block['closed'] = true; - } - } - - $block['text'] .= "\n".$line; - - continue 2; + break; } + $markup .= $this->compile($result); + + break; + + case 'blocks': + + $markup .= $this->compile($block['content']); + break; } - # ~ + if (isset($block['name'])) + { + $markup .= ''; + } + } + + $markup .= "\n"; + + return $markup; + } + + private function find_blocks(array $lines, $block_context = null) + { + $block_tree = array(); + + $blocks = & $block_tree; + + $block = null; + + $context = null; + $context_data = null; + + foreach ($lines as $line) + { + $indented_line = $line; $indentation = 0; @@ -164,117 +193,271 @@ class Parsedown $indentation++; } - $outdented_line = $indentation > 0 ? ltrim($line) : $line; - - # blank - - if ($outdented_line === '') + if ($indentation > 0) { - $block['interrupted'] = true; + $line = ltrim($line); + } + + # ~ + + switch ($context) + { + case null: + + $context_data = null; + + if ($line === '') + { + continue 2; + } + + break; + + # ~~~ javascript + # var message = 'Hello!'; + + case 'fenced code': + + if ($line === '') + { + $block['content'][0]['content'] .= "\n"; + + continue 2; + } + + if (preg_match('/^[ ]*'.$context_data['marker'].'{3,}[ ]*$/', $line)) + { + $context = null; + } + else + { + if ($block['content'][0]['content']) + { + $block['content'][0]['content'] .= "\n"; + } + + $string = htmlspecialchars($line, ENT_NOQUOTES, 'UTF-8'); + + $block['content'][0]['content'] .= $string; + } + + continue 2; + + case 'markup': + + if (stripos($line, $context_data['start']) !== false) # opening tag + { + $context_data['depth']++; + } + + if (stripos($line, $context_data['end']) !== false) # closing tag + { + if ($context_data['depth'] > 0) + { + $context_data['depth']--; + } + else + { + $context = null; + } + } + + $block['content'] .= "\n".$indented_line; + + continue 2; + + case 'li': + + if ($line === '') + { + $context_data['interrupted'] = true; + + continue 2; + } + + if ($context_data['indentation'] === $indentation and preg_match('/^'.$context_data['marker'].'[ ]+(.*)/', $line, $matches)) + { + if (isset($context_data['interrupted'])) + { + $nested_block['content'] []= ''; + + unset($context_data['interrupted']); + } + + unset($nested_block); + + $nested_block = array( + 'name' => 'li', + 'content type' => 'markdown lines', + 'content' => array( + $matches[1], + ), + ); + + $block['content'] []= & $nested_block; + + continue 2; + } + + if (empty($context_data['interrupted'])) + { + $value = $line; + + if ($indentation > $context_data['baseline']) + { + $value = str_repeat(' ', $indentation - $context_data['baseline']) . $value; + } + + $nested_block['content'] []= $value; + + continue 2; + } + + if ($indentation > 0) + { + $nested_block['content'] []= ''; + + $value = $line; + + if ($indentation > $context_data['baseline']) + { + $value = str_repeat(' ', $indentation - $context_data['baseline']) . $value; + } + + $nested_block['content'] []= $value; + + unset($context_data['interrupted']); + + continue 2; + } + + $context = null; + + break; + + case 'quote': + + if ($line === '') + { + $context_data['interrupted'] = true; + + continue 2; + } + + if (preg_match('/^>[ ]?(.*)/', $line, $matches)) + { + $block['content'] []= $matches[1]; + + continue 2; + } + + if (empty($context_data['interrupted'])) + { + $block['content'] []= $line; + + continue 2; + } + + $context = null; + + break; + + case 'code': + + if ($line === '') + { + $context_data['interrupted'] = true; + + continue 2; + } + + if ($indentation >= 4) + { + if (isset($context_data['interrupted'])) + { + $block['content'][0]['content'] .= "\n"; + + unset($context_data['interrupted']); + } + + $block['content'][0]['content'] .= "\n"; + + $string = htmlspecialchars($line, ENT_NOQUOTES, 'UTF-8'); + $string = str_repeat(' ', $indentation - 4) . $string; + + $block['content'][0]['content'] .= $string; + + continue 2; + } + + $context = null; + + break; + + case 'paragraph': + + if ($line === '') + { + $block['name'] = 'p'; # dense li + + $context = null; + + continue 2; + } + + if ($line[0] === '=' and chop($line, '=') === '') + { + $block['name'] = 'h1'; + + $context = null; + + continue 2; + } + + if ($line[0] === '-' and chop($line, '-') === '') + { + $block['name'] = 'h2'; + + $context = null; + + continue 2; + } + + break; + + default: + + throw new Exception('Unrecognized context - '.$context); + } + + if ($indentation >= 4) + { + $blocks []= $block; + + $string = htmlspecialchars($line, ENT_NOQUOTES, 'UTF-8'); + $string = str_repeat(' ', $indentation - 4) . $string; + + $block = array( + 'name' => 'pre', + 'content type' => 'blocks', + 'content' => array( + array( + 'name' => 'code', + 'content type' => 'markup', + 'content' => $string, + ), + ), + ); + + $context = 'code'; continue; } - # context - - switch ($block['type']) - { - case 'quote': - - if ( ! isset($block['interrupted'])) - { - $line = preg_replace('/^[ ]*>[ ]?/', '', $line); - - $block['lines'] []= $line; - - continue 2; - } - - break; - - case 'li': - - if ($block['indentation'] === $indentation and preg_match('/^'.$block['marker'].'[ ]+(.*)/', $outdented_line, $matches)) - { - unset($block['last']); - - $blocks []= $block; - - $block['last'] = true; - $block['lines'] = array($matches[1]); - - unset($block['first']); - unset($block['interrupted']); - - continue 2; - } - - if ( ! isset($block['interrupted'])) - { - $line = preg_replace('/^[ ]{0,'.$block['baseline'].'}/', '', $line); - - $block['lines'] []= $line; - - continue 2; - } - elseif ($line[0] === ' ') - { - $block['lines'] []= ''; - - $line = preg_replace('/^[ ]{0,'.$block['baseline'].'}/', '', $line); - - $block['lines'] []= $line; - - unset($block['interrupted']); - - continue 2; - } - - break; - } - - # indentation sensitive types - switch ($line[0]) { - case ' ': - - # code - - if ($indentation >= 4) - { - $code_line = substr($line, 4); - - if ($block['type'] === 'code') - { - if (isset($block['interrupted'])) - { - $block['text'] .= "\n"; - - unset($block['interrupted']); - } - - $block['text'] .= "\n".$code_line; - } - else - { - $blocks []= $block; - - $block = array( - 'type' => 'code', - 'text' => $code_line, - ); - } - - continue 2; - } - - break; - case '#': - # atx heading (#) - if (isset($line[1])) { $blocks []= $block; @@ -286,59 +469,29 @@ class Parsedown $level++; } + $string = trim($line, '# '); + $string = $this->parse_span_elements($string); + $block = array( - 'type' => 'heading', - 'text' => trim($line, '# '), - 'level' => $level, + 'name' => 'h'.$level, + 'content type' => 'markdown', + 'content' => $string, ); - continue 2; - } - - break; - - case '-': - case '=': - - # setext heading (===) - - if ($block['type'] === 'paragraph' and isset($block['interrupted']) === false) - { - $chopped_line = chop($line); - - $i = 1; - - while (isset($chopped_line[$i])) - { - if ($chopped_line[$i] !== $line[0]) - { - break 2; - } - - $i++; - } - - $block['type'] = 'heading'; - - $block['level'] = $line[0] === '-' ? 2 : 1; + $context = null; continue 2; } break; - } - # indentation insensitive types - - switch ($outdented_line[0]) - { case '<': - $position = strpos($outdented_line, '>'); + $position = strpos($line, '>'); if ($position > 1) { - $substring = substr($outdented_line, 1, $position - 1); + $substring = substr($line, 1, $position - 1); $substring = chop($substring); @@ -359,13 +512,15 @@ class Parsedown { $name = $substring; } + $name = strtolower($name); - // hr,h1,h2,h3,h4,h5,h6 cases - if ($name[0] == 'h' and strpos('r123456', $name[1]) !== false) + if ($name[0] == 'h' and strpos('r123456', $name[1]) !== false) # hr, h1, h2, ... { if ($name == 'hr') - $is_self_closing = 1; + { + $is_self_closing = true; + } } elseif ( ! ctype_alpha($name)) { @@ -379,29 +534,29 @@ class Parsedown $blocks []= $block; + $block = array( + 'name' => null, + 'content type' => 'markup', + 'content' => $indented_line, + ); + if (isset($is_self_closing)) { - $block = array( - 'type' => 'self-closing tag', - 'text' => $outdented_line, - ); - unset($is_self_closing); continue 2; } - $block = array( - 'type' => 'markup', - 'text' => $outdented_line, + $context = 'markup'; + $context_data = array( 'start' => '<'.$name.'>', 'end' => '', 'depth' => 0, ); - if (stripos($outdented_line, $block['end'])) + if (stripos($line, $context_data['end']) !== false) { - $block['closed'] = true; + $context = null; } continue 2; @@ -411,19 +566,21 @@ class Parsedown case '>': - # quote - - if (preg_match('/^>[ ]?(.*)/', $outdented_line, $matches)) + if (preg_match('/^>[ ]?(.*)/', $line, $matches)) { $blocks []= $block; $block = array( - 'type' => 'quote', - 'lines' => array( + 'name' => 'blockquote', + 'content type' => 'markdown lines', + 'content' => array( $matches[1], ), ); + $context = 'quote'; + $context_data = array(); + continue 2; } @@ -431,18 +588,16 @@ class Parsedown case '[': - # reference - - $position = strpos($outdented_line, ']:'); + $position = strpos($line, ']:'); if ($position) { $reference = array(); - $label = substr($outdented_line, 1, $position - 1); + $label = substr($line, 1, $position - 1); $label = strtolower($label); - $substring = substr($outdented_line, $position + 2); + $substring = substr($line, $position + 2); $substring = trim($substring); if ($substring === '') @@ -508,251 +663,135 @@ class Parsedown case '`': case '~': - # fenced code block - - if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $outdented_line, $matches)) + if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\w+)?[ ]*$/', $line, $matches)) { $blocks []= $block; $block = array( - 'type' => 'fenced', - 'text' => '', - 'fence' => $matches[1], + 'name' => 'pre', + 'content type' => 'blocks', + 'content' => array( + array( + 'name' => 'code', + 'content type' => 'markup', + 'content' => '', + ), + ), ); if (isset($matches[2])) { - $block['language'] = $matches[2]; + $block['content'][0]['attributes'] = array( + 'class' => 'language-'.$matches[2], + ); } + $context = 'fenced code'; + $context_data = array( + 'marker' => $matches[1][0], + ); + continue 2; } break; - case '*': - case '+': case '-': + case '*': case '_': - # hr - - if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $outdented_line)) + if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $line)) { $blocks []= $block; $block = array( - 'type' => 'rule', + 'name' => 'hr', + 'content' => null, ); continue 2; } - - # li - - if (preg_match('/^([*+-][ ]+)(.*)/', $outdented_line, $matches)) - { - $blocks []= $block; - - $baseline = $indentation + strlen($matches[1]); - - $block = array( - 'type' => 'li', - 'indentation' => $indentation, - 'baseline' => $baseline, - 'marker' => '[*+-]', - 'first' => true, - 'last' => true, - 'lines' => array(), - ); - - $block['lines'] []= preg_replace('/^[ ]{0,4}/', '', $matches[2]); - - continue 2; - } } - # li - - if ($outdented_line[0] <= '9' and preg_match('/^(\d+[.][ ]+)(.*)/', $outdented_line, $matches)) + switch (true) { - $blocks []= $block; + case $line[0] <= '-' and preg_match('/^([*+-][ ]+)(.*)/', $line, $matches): + case $line[0] <= '9' and preg_match('/^([0-9]+[.][ ]+)(.*)/', $line, $matches): - $baseline = $indentation + strlen($matches[1]); - - $block = array( - 'type' => 'li', - 'indentation' => $indentation, - 'baseline' => $baseline, - 'marker' => '\d+[.]', - 'first' => true, - 'last' => true, - 'ordered' => true, - 'lines' => array(), - ); - - $block['lines'] []= preg_replace('/^[ ]{0,4}/', '', $matches[2]); - - continue; - } - - # paragraph - - if ($block['type'] === 'paragraph') - { - if (isset($block['interrupted'])) - { $blocks []= $block; - $block['text'] = $line; + $name = $line[0] >= '0' ? 'ol' : 'ul'; - unset($block['interrupted']); - } - else - { - if ($this->breaks_enabled) - { - $block['text'] .= ' '; - } + $block = array( + 'name' => $name, + 'content type' => 'blocks', + 'content' => array(), + ); - $block['text'] .= "\n".$line; - } + unset($nested_block); + + $nested_block = array( + 'name' => 'li', + 'content type' => 'markdown lines', + 'content' => array( + $matches[2], + ), + ); + + $block['content'] []= & $nested_block; + + $baseline = $indentation + strlen($matches[1]); + + $marker = $line[0] >= '0' ? '[0-9]+[.]' : '[*+-]'; + + $context = 'li'; + $context_data = array( + 'indentation' => $indentation, + 'baseline' => $baseline, + 'marker' => $marker, + 'lines' => array( + $matches[2], + ), + ); + + continue 2; + } + + if ($context === 'paragraph') + { + $block['content'] .= "\n".$line; + + continue; } else { $blocks []= $block; $block = array( - 'type' => 'paragraph', - 'text' => $line, + 'name' => 'p', + 'content type' => 'markdown', + 'content' => $line, ); + + if ($block_context === 'li' and empty($blocks[1])) + { + $block['name'] = null; + } + + $context = 'paragraph'; } } + if ($block_context === 'li' and $block['name'] === null) + { + return $block['content']; + } + $blocks []= $block; unset($blocks[0]); - # $blocks ยป HTML - - $markup = ''; - - foreach ($blocks as $block) - { - switch ($block['type']) - { - case 'paragraph': - - $text = $this->parse_span_elements($block['text']); - - if ($context === 'li' and $markup === '') - { - if (isset($block['interrupted'])) - { - $markup .= "\n".'

'.$text.'

'."\n"; - } - else - { - $markup .= $text; - - if (isset($blocks[2])) - { - $markup .= "\n"; - } - } - } - else - { - $markup .= '

'.$text.'

'."\n"; - } - - break; - - case 'quote': - - $text = $this->parse_block_elements($block['lines']); - - $markup .= '
'."\n".$text.'
'."\n"; - - break; - - case 'code': - - $text = htmlspecialchars($block['text'], ENT_NOQUOTES, 'UTF-8'); - - $markup .= '
'.$text.'
'."\n"; - - break; - - case 'fenced': - - $text = htmlspecialchars($block['text'], ENT_NOQUOTES, 'UTF-8'); - - $markup .= '
'."\n"; - - break; - - case 'heading': - - $text = $this->parse_span_elements($block['text']); - - $markup .= ''.$text.''."\n"; - - break; - - case 'rule': - - $markup .= '
'."\n"; - - break; - - case 'li': - - if (isset($block['first'])) - { - $type = isset($block['ordered']) ? 'ol' : 'ul'; - - $markup .= '<'.$type.'>'."\n"; - } - - if (isset($block['interrupted']) and ! isset($block['last'])) - { - $block['lines'] []= ''; - } - - $text = $this->parse_block_elements($block['lines'], 'li'); - - $markup .= '
  • '.$text.'
  • '."\n"; - - if (isset($block['last'])) - { - $type = isset($block['ordered']) ? 'ol' : 'ul'; - - $markup .= ''."\n"; - } - - break; - - case 'markup': - - $markup .= $block['text']."\n"; - - break; - - default: - - $markup .= $block['text']."\n"; - } - } - - return $markup; + return $blocks; } private function parse_span_elements($text, $markers = array(" \n", '![', '&', '*', '<', '[', '\\', '_', '`', 'http', '~~')) diff --git a/tests/data/code_block.html b/tests/data/code_block.html index 20f8de4..27b3760 100644 --- a/tests/data/code_block.html +++ b/tests/data/code_block.html @@ -1,8 +1,12 @@ -
    <?php
    +
    +<?php
     
    -$message = 'Hello World!'; 
    -echo $message;
    +$message = 'Hello World!'; +echo $message;
    +

    -
    > not a quote 
    +
    +> not a quote
     - not a list item
    -[not a reference]: http://foo.com
    \ No newline at end of file +[not a reference]: http://foo.com
    +
    \ No newline at end of file diff --git a/tests/data/code_block.md b/tests/data/code_block.md index 59104cb..9eb0fd9 100644 --- a/tests/data/code_block.md +++ b/tests/data/code_block.md @@ -1,11 +1,10 @@ not a quote + > not a quote - not a list item - [not a reference]: http://foo.com - \ No newline at end of file + [not a reference]: http://foo.com \ No newline at end of file diff --git a/tests/data/deeply_nested_list.html b/tests/data/deeply_nested_list.html index 0163d65..245f57b 100644 --- a/tests/data/deeply_nested_list.html +++ b/tests/data/deeply_nested_list.html @@ -1,13 +1,11 @@