From f8119fa3cb81d70e8425783f22f0e29015526ea4 Mon Sep 17 00:00:00 2001
From: Emanuil <4thmail@gmail.com>
Date: Tue, 24 Sep 2013 01:19:17 +0300
Subject: [PATCH] separate compiling from parsing
---
Parsedown.php | 476 +++++++++++++++++++++++++++++---------------------
1 file changed, 274 insertions(+), 202 deletions(-)
diff --git a/Parsedown.php b/Parsedown.php
index 68c7495..376c01d 100755
--- a/Parsedown.php
+++ b/Parsedown.php
@@ -89,10 +89,12 @@ class Parsedown
# ~
- $text = trim($text, "\n");
$text = preg_replace('/\n\s*\n/', "\n\n", $text);
+ $text = trim($text, "\n");
- $text = $this->parse_lines($text);
+ $lines = explode("\n", $text);
+
+ $text = $this->parse_block_elements($lines);
# Decodes escape sequences (leaves out backslashes).
@@ -110,270 +112,340 @@ class Parsedown
# Private Methods
#
- private function parse_lines($text, $context = null)
+ private function parse_block_elements(array $lines, $context = '')
{
- $lines = explode("\n", $text);
- $lines []= null;
+ $elements = array();
- $line_count = count($lines);
+ $element = array(
+ 'type' => '',
+ );
- $markup = '';
-
- foreach ($lines as $index => $line)
+ foreach ($lines as $line)
{
- # ~
+ # Empty
- if (isset($line) and $line !== '' and $line[0] >= 'A')
+ if ($line === '')
{
- $simple_line = $line;
+ $element['interrupted'] = true;
- unset($line);
- }
-
- # Setext Heading (-)
-
- if (isset($line) and $line !== '' and isset($paragraph) and preg_match('/^[-]+[ ]*$/', $line))
- {
- $setext_heading_text = $this->parse_inline_elements($paragraph);
-
- $markup .= '
'.$setext_heading_text.'
'."\n";
-
- unset($paragraph, $line);
+ $element['type'] === 'code' and $element['text'] .= "\n";
continue;
}
- # Rule
+ # Lazy Blockquote
- if (isset($line) and $line !== '' and preg_match('/^[ ]{0,3}([-*_])([ ]{0,2}\1){2,}[ ]*$/', $line))
+ if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
{
- $rule = true;
+ $line = preg_replace('/^[ ]*>[ ]?/', '', $line);
- unset($line);
- }
- elseif (isset($rule))
- {
- $markup .= '
'."\n";
+ $element['lines'] []= $line;
- unset($rule);
+ continue;
}
- # List
+ # Lazy List Item
- # Unlike other types, consequent lines of type "list items" may not
- # belong to the same block.
-
- if (isset($line) and $line !== '' and preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches)) # list item
+ if ($element['type'] === 'li')
{
- $list_item_indentation = $matches[1];
- $list_item_type = ($matches[2] === '-' or $matches[2] === '+' or $matches[2] === '*')
- ? 'ul'
- : 'ol';
-
- if (isset($list_items)) # subsequent
+ if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
- if ($list_item_indentation === $list_indentation and $list_item_type === $list_type)
+ if ($element['indentation'] !== $matches[1])
{
- $list_items []= $list_item;
-
- $list_item = $matches[3];
+ $element['lines'] []= $line;
}
else
{
- $list_item .= "\n".$line;
+ unset($element['last']);
+
+ $elements []= $element;
+
+ $element = array(
+ 'type' => 'li',
+ 'indentation' => $matches[1],
+ 'last' => true,
+ 'lines' => array(
+ preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+ ),
+ );
}
- }
- else # first
- {
- $list_indentation = $list_item_indentation;
- $list_type = $list_item_type;
- $list_item = $matches[3];
-
- $list_items = array();
+ continue;
}
- unset($line);
- }
- elseif (isset($list_items)) # incomplete list item
- {
- if (isset($line) and ($line === '' or $line[0] === ' '))
+ if (isset($element['interrupted']))
{
- $line and $line = preg_replace('/^[ ]{0,4}/', '', $line);;
-
- $list_item .= "\n".$line;
-
- unset($line);
- }
- else # line is consumed or does not belong to the list item
- {
- $list_item = rtrim($list_item, "\n");
-
- $list_items []= $list_item;
-
- $markup .= '<'.$list_type.'>'."\n";
-
- foreach ($list_items as $list_item)
+ if ($line[0] === ' ')
{
- $list_item_text = strpos($list_item, "\n") !== false
- ? $this->parse_lines($list_item, 'li')
- : $this->parse_inline_elements($list_item);
+ $element['lines'] []= '';
- $markup .= ''.$list_item_text.''."\n";
+ $line = preg_replace('/^[ ]{0,4}/', '', $line);;
+
+ $element['lines'] []= $line;
+
+ continue;
}
-
- $markup .= ''.$list_type.'>'."\n";
-
- unset($list_items);
- }
- }
-
- # Code Block
-
- if (isset($line) and $line !== '' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
- {
- if (isset($code_block))
- {
- $code_block .= "\n".$matches[1];
}
else
{
- $code_block = $matches[1];
- }
-
- unset($line);
- }
- elseif (isset($code_block))
- {
- if (isset($line) and $line === '')
- {
- $code_block .= "\n";
+ $line = preg_replace('/^[ ]{0,4}/', '', $line);;
+
+ $element['lines'] []= $line;
- # ยป continue;
- }
- else
- {
- $code_block = rtrim($code_block);
-
- $code_block_text = htmlentities($code_block, ENT_NOQUOTES);
-
- # Decodes encoded escape sequences if present.
- strpos($code_block_text, "\x1A\\") !== FALSE and $code_block_text = strtr($code_block_text, $this->escape_sequence_map);
-
- $markup .= ''.$code_block_text.'
'."\n";
-
- unset($code_block);
+ continue;
}
}
- # Blockquote
+ # Quick Paragraph
- if (isset($line) and $line !== '' and preg_match('/^[ ]*>[ ]?(.*)/', $line, $matches))
+ if ($line[0] >= 'A')
{
- if (isset($blockquote))
- {
- $blockquote .= "\n".$matches[1];
- }
- else
- {
- $blockquote = $matches[1];
- }
-
- unset($line);
- }
- elseif (isset($blockquote))
- {
- if (isset($line) and $line === '')
- {
- $blockquote .= "\n";
- }
- else
- {
- $blockquote = $this->parse_lines($blockquote);
-
- $markup .= ''."\n".$blockquote.'
'."\n";
-
- unset($blockquote);
- }
+ goto paragraph; # trust me
}
- # Atx Heading
+ # Setext Header (---)
- if (isset($line) and $line !== '' and $line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
+ if ($element['type'] === 'p' and preg_match('/^[-]+[ ]*$/', $line))
{
- $atx_heading_level = strlen($matches[1]);
-
- $atx_heading = $this->parse_inline_elements($matches[2]);
-
- unset($line);
- }
- elseif (isset($atx_heading))
- {
- $markup .= ''.$atx_heading.''."\n";
-
- unset($atx_heading);
- }
-
- # Setext Heading (=)
-
- if (isset($line) and $line !== '' and isset($paragraph) and preg_match('/^[=]+[ ]*$/', $line))
- {
- $setext_heading_text = $this->parse_inline_elements($paragraph);
-
- $markup .= ''.$setext_heading_text.'
'."\n";
-
- unset($paragraph, $line);
+ $element['type'] = 'h.';
+ $element['level'] = 2;
continue;
}
- # Paragraph
+ # Horizontal Rule
- if (isset($simple_line))
+ if (preg_match('/^[ ]{0,3}([-*_])([ ]{0,2}\1){2,}[ ]*$/', $line))
{
- $line = $simple_line;
+ $elements []= $element;
- unset($simple_line);
+ $element = array(
+ 'type' => 'hr',
+ );
+
+ continue;
}
- if (isset($line) and $line !== '')
+ # List Item
+
+ if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
{
- substr($line, -2) === ' ' and $line = substr_replace($line, '
', -2);
-
- if (isset($paragraph))
- {
- $paragraph .= "\n".$line;
- }
- else
- {
- $paragraph = $line;
- }
- }
- elseif (isset($paragraph))
- {
- $paragraph_text = $this->parse_inline_elements($paragraph);
+ $elements []= $element;
- if ($context === 'li')
+ $element = array(
+ 'type' => 'li',
+ 'ordered' => isset($matches[2][1]),
+ 'indentation' => $matches[1],
+ 'last' => true,
+ 'lines' => array(
+ preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+ ),
+ );
+
+ continue;
+ }
+
+ # Code
+
+ if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
+ {
+ if ($element['type'] === 'code')
{
- if ( ! $markup and $index + 1 === $line_count)
+ $element['text'] .= "\n".$matches[1];
+ }
+ else
+ {
+ $elements []= $element;
+
+ $element = array(
+ 'type' => 'code',
+ 'text' => $matches[1],
+ );
+ }
+
+ continue;
+ }
+
+ # Atx Header (#)
+
+ if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
+ {
+ $elements []= $element;
+
+ $level = strlen($matches[1]);
+
+ $element = array(
+ 'type' => 'h.',
+ 'text' => $matches[2],
+ 'level' => $level,
+ );
+
+ continue;
+ }
+
+ # Blockquote
+
+ if (preg_match('/^[ ]*>[ ]?(.*)/', $line, $matches))
+ {
+ if ($element['type'] === 'blockquote')
+ {
+ if (isset($element['interrupted']))
{
- $text_is_simple = true;
- }
- else
- {
- $markup or $markup .= "\n";
+ $element['lines'] []= '';
+
+ unset($element['interrupted']);
}
- $markup .= isset($text_is_simple)
- ? $paragraph_text
- : ''.$paragraph_text.'
'."\n";
+ $element['lines'] []= $matches[1];
}
- else
+ else
{
- $markup .= ''.$paragraph_text.'
'."\n";
+ $elements []= $element;
+
+ $element = array(
+ 'type' => 'blockquote',
+ 'lines' => array(
+ $matches[1],
+ ),
+ );
}
- unset($paragraph);
+ continue;
+ }
+
+ # Setext Header (===)
+
+ if ($element['type'] === 'p' and preg_match('/^[=]+[ ]*$/', $line))
+ {
+ $element['type'] = 'h.';
+ $element['level'] = 1;
+
+ continue;
+ }
+
+ # ~
+
+ paragraph:
+
+ if ($element['type'] === 'p')
+ {
+ if (isset($element['interrupted']))
+ {
+ $elements []= $element;
+
+ $element['text'] = $line;
+
+ unset($element['interrupted']);
+ }
+ else
+ {
+ $element['text'] .= "\n".$line;
+ }
+ }
+ else
+ {
+ $elements []= $element;
+
+ $element = array(
+ 'type' => 'p',
+ 'text' => $line,
+ );
+ }
+ }
+
+ $elements []= $element;
+
+ array_shift($elements);
+
+ #
+ # ~
+ #
+
+ $markup = '';
+
+ foreach ($elements as $index => $element)
+ {
+ switch ($element['type'])
+ {
+ case 'li':
+
+ if (isset($element['ordered'])) # first
+ {
+ $list_type = $element['ordered'] ? 'ol' : 'ul';
+
+ $markup .= '<'.$list_type.'>'."\n";
+ }
+
+ if (isset($element['interrupted']) and ! isset($element['last']))
+ {
+ $element['lines'] []= '';
+ }
+
+ $text = $this->parse_block_elements($element['lines'], 'li');
+
+ $markup .= ''.$text.''."\n";
+
+ isset($element['last']) and $markup .= ''.$list_type.'>'."\n";
+
+ break;
+
+ case 'p':
+
+ $text = $this->parse_inline_elements($element['text']);
+
+ $text = preg_replace('/[ ]{2}\n/', '
'."\n", $text);
+
+ if ($context === 'li' and $index === 0)
+ {
+ if (isset($element['interrupted']))
+ {
+ $markup .= "\n".''.$text.'
'."\n";
+ }
+ else
+ {
+ $markup .= $text;
+ }
+ }
+ else
+ {
+ $markup .= ''.$text.'
'."\n";
+ }
+
+ break;
+
+ case 'code':
+
+ $text = rtrim($element['text'], "\n");
+
+ $text = htmlentities($text, ENT_NOQUOTES);
+
+ strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
+
+ $markup .= ''.$text.'
'."\n";
+
+ break;
+
+ case 'blockquote':
+
+ $text = $this->parse_block_elements($element['lines']);
+
+ $markup .= ''."\n".$text.'
'."\n";
+
+ break;
+
+ case 'h.':
+
+ $text = $this->parse_inline_elements($element['text']);
+
+ $markup .= ''.$text.''."\n";
+
+ break;
+
+ case 'hr':
+
+ $markup .= '
'."\n";
+
+ break;
}
}