improve parsing of emphasis

improve comments
parse code span after recursive types to resolve #44
2023-08-10 21:13:06 +03:00 · 2013-12-07 17:21:36 +02:00 · 2013-12-07 10:54:05 +02:00 · 2013-12-06 01:43:55 +02:00 · 2013-12-06 00:45:26 +02:00 · 2013-12-06 00:29:51 +02:00
22 changed files with 585 additions and 394 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -5,7 +5,3 @@ php:
  - 5.4
  - 5.3
  - 5.2
-
-matrix:
-  allow_failures:
-    - php: 5.2
--- a/Parsedown.php
+++ b/Parsedown.php
@ -46,17 +46,17 @@ class Parsedown

 	function parse($text)
 	{
-		# Removes UTF-8 BOM and marker characters.
+		# removes UTF-8 BOM and marker characters
 		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);

-		# Removes \r characters.
+		# removes \r characters
 		$text = str_replace("\r\n", "\n", $text);
 		$text = str_replace("\r", "\n", $text);

-		# Replaces tabs with spaces.
+		# replaces tabs with spaces
 		$text = str_replace("\t", '    ', $text);

-		# Encodes escape sequences.
+		# encodes escape sequences

 		if (strpos($text, '\\') !== FALSE)
 		{
@ -84,7 +84,7 @@ class Parsedown

 		$text = $this->parse_block_elements($lines);

-		# Decodes escape sequences (leaves out backslashes).
+		# decodes escape sequences

 		foreach ($this->escape_sequence_map as $code => $escape_sequence)
 		{
@ -110,16 +110,40 @@ class Parsedown

 		foreach ($lines as $line)
 		{
-			# Block-Level HTML
+			# fenced elements

-			if ($element['type'] === 'block' and ! isset($element['closed']))
+			switch ($element['type'])
 			{
-				if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open>
+				case 'fenced_code_block':
+
+					if ( ! isset($element['closed']))
+					{
+						if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
+						{
+							$element['closed'] = true;
+						}
+						else
+						{
+							$element['text'] !== '' and $element['text'] .= "\n";
+
+							$element['text'] .= $line;
+						}
+
+						continue 2;
+					}
+
+					break;
+
+				case 'markup':
+
+					if ( ! isset($element['closed']))
+					{
+						if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
 						{
 							$element['depth']++;
 						}

-				if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close>
+						if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
 						{
 							$element['depth'] > 0
 								? $element['depth']--
@ -128,10 +152,13 @@ class Parsedown

 						$element['text'] .= "\n".$line;

-				continue;
+						continue 2;
 					}

-			# Empty
+					break;
+			}
+
+			# *

 			if ($line === '')
 			{
@ -140,21 +167,25 @@ class Parsedown
 				continue;
 			}

-			# Lazy Blockquote
+			# composite elements

-			if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
+			switch ($element['type'])
+			{
+				case 'blockquote':
+
+					if ( ! isset($element['interrupted']))
 					{
 						$line = preg_replace('/^[ ]*>[ ]?/', '', $line);

 						$element['lines'] []= $line;

-				continue;
+						continue 2;
 					}

-			# Lazy List Item
+					break;
+
+				case 'li':

-			if ($element['type'] === 'li')
-			{
 					if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
 					{
 						if ($element['indentation'] !== $matches[1])
@ -177,7 +208,7 @@ class Parsedown
 							);
 						}

-					continue;
+						continue 2;
 					}

 					if (isset($element['interrupted']))
@ -186,40 +217,49 @@ class Parsedown
 						{
 							$element['lines'] []= '';

-						$line = preg_replace('/^[ ]{0,4}/', '', $line);;
+							$line = preg_replace('/^[ ]{0,4}/', '', $line);

 							$element['lines'] []= $line;

-						continue;
+							unset($element['interrupted']);
+
+							continue 2;
 						}
 					}
 					else
 					{
-					$line = preg_replace('/^[ ]{0,4}/', '', $line);;
+						$line = preg_replace('/^[ ]{0,4}/', '', $line);

 						$element['lines'] []= $line;

-					continue;
-				}
+						continue 2;
 					}

-			# Quick Paragraph
+					break;
+			}

-			if ($line[0] >= 'a' or $line[0] >= 'A' and $line[0] <= 'Z')
+			# indentation sensitive types
+
+			$deindented_line = $line;
+
+			switch ($line[0])
 			{
-				goto paragraph;
+				case ' ':
+
+					# ~
+
+					$deindented_line = ltrim($line);
+
+					if ($deindented_line === '')
+					{
+						continue 2;
 					}

-			# Code Block
+					# code block

-			if ($line[0] === ' ' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
+					if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
 					{
-				if (trim($line) === '')
-				{
-					continue;
-				}
-
-				if ($element['type'] === 'code')
+						if ($element['type'] === 'code_block')
 						{
 							if (isset($element['interrupted']))
 							{
@ -235,27 +275,21 @@ class Parsedown
 							$elements []= $element;

 							$element = array(
-						'type' => 'code',
+								'type' => 'code_block',
 								'text' => $matches[1],
 							);
 						}

-				continue;
+						continue 2;
 					}

-			# Setext Header (---)
+					break;

-			if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
-			{
-				$element['type'] = 'h.';
-				$element['level'] = 2;
+				case '#':

-				continue;
-			}
+					# atx heading (#)

-			# Atx Header (#)
-
-			if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
+					if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
 					{
 						$elements []= $element;

@ -267,56 +301,85 @@ class Parsedown
 							'level' => $level,
 						);

-				continue;
+						continue 2;
 					}

-			# Setext Header (===)
+					break;
+
+				case '-':
+
+					# setext heading (---)
+
+					if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
+					{
+						$element['type'] = 'h.';
+						$element['level'] = 2;
+
+						continue 2;
+					}
+
+					break;
+
+				case '=':
+
+					# setext heading (===)

 					if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
 					{
 						$element['type'] = 'h.';
 						$element['level'] = 1;

-				continue;
+						continue 2;
 					}

-			# ~
-
-			$pure_line = $line[0] !== ' ' ? $line : ltrim($line);
-
-			if ($pure_line === '')
-			{
-				continue;
+					break;
 			}

-			# Link Reference
+			# indentation insensitive types

-			if ($pure_line[0] === '[' and preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $pure_line, $matches))
+			switch ($deindented_line[0])
 			{
-				$label = strtolower($matches[1]);
-				$url = trim($matches[2], '<>');
+				case '<':

-				$this->reference_map[$label] = $url;
+					# self-closing tag

-				continue;
+					if (preg_match('{^<.+?/>$}', $deindented_line))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => '',
+							'text' => $deindented_line,
+						);
+
+						continue 2;
 					}

-			# Blockquote
+					# opening tag

-			if ($pure_line[0] === '>' and preg_match('/^>[ ]?(.*)/', $pure_line, $matches))
+					if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
 					{
-				if ($element['type'] === 'blockquote')
-				{
-					if (isset($element['interrupted']))
-					{
-						$element['lines'] []= '';
+						$elements []= $element;

-						unset($element['interrupted']);
+						$element = array(
+							'type' => 'markup',
+							'subtype' => strtolower($matches[1]),
+							'text' => $deindented_line,
+							'depth' => 0,
+						);
+
+						preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
+
+						continue 2;
 					}

-					$element['lines'] []= $matches[1];
-				}
-				else
+					break;
+
+				case '>':
+
+					# quote
+
+					if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
 					{
 						$elements []= $element;

@ -326,51 +389,57 @@ class Parsedown
 								$matches[1],
 							),
 						);
+
+						continue 2;
 					}

-				continue;
-			}
+					break;

-			# HTML
+				case '[':

-			if ($pure_line[0] === '<')
+					# reference
+
+					if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
 					{
-				# Block-Level HTML <self-closing/>
+						$label = strtolower($matches[1]);

-				if (preg_match('{^<.+?/>$}', $pure_line))
+						$this->reference_map[$label] = trim($matches[2], '<>');;
+
+						continue 2;
+					}
+
+					break;
+
+				case '`':
+				case '~':
+
+					# fenced code block
+
+					if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
 					{
 						$elements []= $element;

 						$element = array(
-						'type' => '',
-						'text' => $pure_line,
+							'type' => 'fenced_code_block',
+							'text' => '',
+							'fence' => $matches[1],
 						);

-					continue;
+						isset($matches[2]) and $element['language'] = $matches[2];
+
+						continue 2;
 					}

-				# Block-Level HTML <open>
+					break;

-				if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $pure_line, $matches))
-				{
-					$elements []= $element;
+				case '*':
+				case '+':
+				case '-':
+				case '_':

-					$element = array(
-						'type' => 'block',
-						'subtype' => strtolower($matches[1]),
-						'text' => $pure_line,
-						'depth' => 0,
-					);
+					# hr

-					preg_match('{</'.$matches[1].'>\s*$}', $pure_line) and $element['closed'] = true;
-
-					continue;
-				}
-			}
-
-			# Horizontal Rule
-
-			if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $pure_line))
+					if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
 					{
 						$elements []= $element;

@ -378,31 +447,49 @@ class Parsedown
 							'type' => 'hr',
 						);

-				continue;
+						continue 2;
 					}

-			# List Item
+					# li

-			if (preg_match('/^([ ]*)(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
+					if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
 					{
 						$elements []= $element;

 						$element = array(
 							'type' => 'li',
-					'ordered' => isset($matches[2][1]),
+							'ordered' => false,
 							'indentation' => $matches[1],
 							'last' => true,
 							'lines' => array(
-						preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+								preg_replace('/^[ ]{0,4}/', '', $matches[2]),
+							),
+						);
+
+						continue 2;
+					}
+			}
+
+			# li
+
+			if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
+			{
+				$elements []= $element;
+
+				$element = array(
+					'type' => 'li',
+					'ordered' => true,
+					'indentation' => $matches[1],
+					'last' => true,
+					'lines' => array(
+						preg_replace('/^[ ]{0,4}/', '', $matches[2]),
 					),
 				);

 				continue;
 			}

-			# ~
-
-			paragraph:
+			# paragraph

 			if ($element['type'] === 'p')
 			{
@ -432,7 +519,7 @@ class Parsedown

 		$elements []= $element;

-		array_shift($elements);
+		unset($elements[0]);

 		#
 		# ~
@ -440,10 +527,71 @@ class Parsedown

 		$markup = '';

-		foreach ($elements as $index => $element)
+		foreach ($elements as $element)
 		{
 			switch ($element['type'])
 			{
+				case 'p':
+
+					$text = $this->parse_span_elements($element['text']);
+
+					$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
+
+					if ($context === 'li' and $markup === '')
+					{
+						if (isset($element['interrupted']))
+						{
+							$markup .= "\n".'<p>'.$text.'</p>'."\n";
+						}
+						else
+						{
+							$markup .= $text;
+						}
+					}
+					else
+					{
+						$markup .= '<p>'.$text.'</p>'."\n";
+					}
+
+					break;
+
+				case 'blockquote':
+
+					$text = $this->parse_block_elements($element['lines']);
+
+					$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
+
+					break;
+
+				case 'code_block':
+				case 'fenced_code_block':
+
+					$text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8');
+
+					strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
+
+					$markup .= isset($element['language'])
+						? '<pre><code class="language-'.$element['language'].'">'.$text.'</code></pre>'
+						: '<pre><code>'.$text.'</code></pre>';
+
+					$markup .= "\n";
+
+					break;
+
+				case 'h.':
+
+					$text = $this->parse_span_elements($element['text']);
+
+					$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
+
+					break;
+
+				case 'hr':
+
+					$markup .= '<hr />'."\n";
+
+					break;
+
 				case 'li':

 					if (isset($element['ordered'])) # first
@ -466,62 +614,6 @@ class Parsedown

 					break;

-				case 'p':
-
-					$text = $this->parse_inline_elements($element['text']);
-
-					$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
-
-					if ($context === 'li' and $index === 0)
-					{
-						if (isset($element['interrupted']))
-						{
-							$markup .= "\n".'<p>'.$text.'</p>'."\n";
-						}
-						else
-						{
-							$markup .= $text;
-						}
-					}
-					else
-					{
-						$markup .= '<p>'.$text.'</p>'."\n";
-					}
-
-					break;
-
-				case 'code':
-
-					$text = htmlentities($element['text'], ENT_NOQUOTES);
-
-					strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
-
-					$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
-
-					break;
-
-				case 'blockquote':
-
-					$text = $this->parse_block_elements($element['lines']);
-
-					$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
-
-					break;
-
-				case 'h.':
-
-					$text = $this->parse_inline_elements($element['text']);
-
-					$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
-
-					break;
-
-				case 'hr':
-
-					$markup .= '<hr />'."\n";
-
-					break;
-
 				default:

 					$markup .= $element['text']."\n";
@ -531,46 +623,15 @@ class Parsedown
 		return $markup;
 	}

-	private function parse_inline_elements($text)
+	private function parse_span_elements($text)
 	{
 		$map = array();

 		$index = 0;

-		# Code Span
+		# inline link / inline image (recursive)

-		if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
-		{
-			foreach ($matches as $matches)
-			{
-				$element_text = $matches[1];
-				$element_text = htmlentities($element_text, ENT_NOQUOTES);
-
-				# Decodes escape sequences.
-
-				$this->escape_sequence_map
-					and strpos($element_text, "\x1A") !== FALSE
-					and $element_text = strtr($element_text, $this->escape_sequence_map);
-
-				# Composes element.
-
-				$element = '<code>'.$element_text.'</code>';
-
-				# Encodes element.
-
-				$code = "\x1A".'$'.$index;
-
-				$text = str_replace($matches[0], $code, $text);
-
-				$map[$code] = $element;
-
-				$index ++;
-			}
-		}
-
-		# Inline Link / Image
-
-		if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
+		if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER))
 		{
 			foreach ($matches as $matches)
 			{
@ -582,9 +643,9 @@ class Parsedown
 				{
 					$element = '<img alt="'.$matches[3].'" src="'.$url.'">';
 				}
-				else
+				else # link
 				{
-					$element_text = $this->parse_inline_elements($matches[3]);
+					$element_text = $this->parse_span_elements($matches[3]);

 					$element = '<a href="'.$url.'">'.$element_text.'</a>';
 				}
@ -601,7 +662,7 @@ class Parsedown
 			}
 		}

-		# Reference(d) Link / Image
+		# reference link / reference image (recursive)

 		if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
 		{
@ -623,9 +684,9 @@ class Parsedown
 					{
 						$element = '<img alt="'.$matches[2].'" src="'.$url.'">';
 					}
-					else # anchor
+					else # link
 					{
-						$element_text = $this->parse_inline_elements($matches[2]);
+						$element_text = $this->parse_span_elements($matches[2]);

 						$element = '<a href="'.$url.'">'.$element_text.'</a>';
 					}
@ -643,10 +704,46 @@ class Parsedown
 			}
 		}

-		# Automatic Links
+		# code span

-		if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
+		if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
 		{
+			foreach ($matches as $matches)
+			{
+				$element_text = $matches[1];
+				$element_text = htmlspecialchars($element_text, ENT_NOQUOTES, 'UTF-8');
+
+				# decodes escape sequences
+
+				$this->escape_sequence_map
+					and strpos($element_text, "\x1A") !== FALSE
+					and $element_text = strtr($element_text, $this->escape_sequence_map);
+
+				# composes element
+
+				$element = '<code>'.$element_text.'</code>';
+
+				# encodes element
+
+				$code = "\x1A".'$'.$index;
+
+				$text = str_replace($matches[0], $code, $text);
+
+				$map[$code] = $element;
+
+				$index ++;
+			}
+		}
+
+		# automatic link
+
+		if (strpos($text, '://') !== FALSE)
+		{
+			switch (TRUE)
+			{
+				case preg_match_all('{<(https?:[/]{2}[^\s]+)>}i', $text, $matches, PREG_SET_ORDER):
+				case preg_match_all('{\b(https?:[/]{2}[^\s]+)\b}i', $text, $matches, PREG_SET_ORDER):
+
 					foreach ($matches as $matches)
 					{
 						$url = $matches[1];
@ -667,6 +764,9 @@ class Parsedown

 						$index ++;
 					}
+
+					break;
+			}
 		}

 		# ~
@ -676,16 +776,23 @@ class Parsedown

 		# ~

+		if (strpos($text, '~~') !== FALSE)
+		{
+			$text = preg_replace('/~~(?=\S)(.+?)(?<=\S)~~/s', '<del>$1</del>', $text);
+		}
+
 		if (strpos($text, '_') !== FALSE)
 		{
-			$text = preg_replace('/__(?=\S)(.+?)(?<=\S)__(?!_)/s', '<strong>$1</strong>', $text);
-			$text = preg_replace('/_(?=\S)(.+?)(?<=\S)_/s', '<em>$1</em>', $text);
+			$text = preg_replace('/__(?=\S)([^_]+?)(?<=\S)__/s', '<strong>$1</strong>', $text, -1, $count);
+			$text = preg_replace('/(\b|_)_(?=\S)([^_]+?)(?<=\S)_(\b|_)/s', '$1<em>$2</em>$3', $text);
+			$text = preg_replace('/__(?=\S)([^_]+?)(?<=\S)__/s', '<strong>$1</strong>', $text, -1, $count);
 		}

 		if (strpos($text, '*') !== FALSE)
 		{
-			$text = preg_replace('/\*\*(?=\S)(.+?)(?<=\S)\*\*(?!\*)/s', '<strong>$1</strong>', $text);
-			$text = preg_replace('/\*(?=\S)(.+?)(?<=\S)\*/s', '<em>$1</em>', $text);
+			$text = preg_replace('/\*\*(?=\S)([^*]+?)(?<=\S)\*\*/s', '<strong>$1</strong>', $text);
+			$text = preg_replace('/\*(?=\S)([^*]+?)(?<=\S)\*/s', '<em>$1</em>', $text);
+			$text = preg_replace('/\*\*(?=\S)([^*]+?)(?<=\S)\*\*/s', '<strong>$1</strong>', $text);
 		}

 		$text = strtr($text, $map);
--- a/README.md
+++ b/README.md
@ -1,8 +1,20 @@
-## Parsedown PHP
+## Parsedown

-Parsedown PHP is a parser for Markdown. It reads Markdown the way people do. First, it breaks texts into lines. Then, it looks at how these lines start and relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown PHP is (very) fast and consistent.
+Better [Markdown](http://daringfireball.net/projects/markdown/) parser for PHP.

-[Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)
+***
+
+[demo](http://parsedown.org/demo) &middot; [tests](http://parsedown.org/tests/)
+
+***
+
+### Features
+
+* [fast](http://parsedown.org/speed)
+* [consistent](http://parsedown.org/consistency)
+* [GitHub Flavored](https://help.github.com/articles/github-flavored-markdown)
+* [tested](https://travis-ci.org/erusev/parsedown) in PHP 5.2, 5.3, 5.4 and 5.5
+* friendly to international input

 ### Installation

--- a/tests/Test.php
+++ b/tests/Test.php
@ -20,20 +20,29 @@ class Test extends PHPUnit_Framework_TestCase
 	{
 		$provider = array();

-		$DirectoryIterator = new DirectoryIterator(__DIR__ . '/' . self::provider_dir);
+		$path = dirname(__FILE__).'/';
+
+		$DirectoryIterator = new DirectoryIterator($path . '/' . self::provider_dir);

 		foreach ($DirectoryIterator as $Item)
 		{
-			if ($Item->isFile() and $Item->getExtension() === 'md')
+			if ($Item->isFile())
 			{
+				$filename = $Item->getFilename();
+
+				$extension = pathinfo($filename, PATHINFO_EXTENSION);
+
+				if ($extension !== 'md')
+					continue;
+
 				$basename = $Item->getBasename('.md');

-				$markdown = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.md');
+				$markdown = file_get_contents($path . '/' . self::provider_dir . $basename . '.md');

 				if (!$markdown)
 					continue;

-				$expected_markup = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.html');
+				$expected_markup = file_get_contents($path . '/' . self::provider_dir . $basename . '.html');
 				$expected_markup = str_replace("\r\n", "\n", $expected_markup);
 				$expected_markup = str_replace("\r", "\n", $expected_markup);

@ -44,4 +53,3 @@ class Test extends PHPUnit_Framework_TestCase
 		return $provider;
 	}
 }
-
--- a/tests/data/em_strong.html
+++ b/tests/data/em_strong.html
@ -1,5 +1,6 @@
-<p><strong><em>em strong</em></strong></p>
-<p><strong><em>one</em> at the start</strong></p>
-<p><strong>one at the <em>end</em></strong></p>
-<p><strong>one <em>in the</em> middle</strong></p>
-<p><strong>one with <em>asterisks</em></strong></p>
+<p><strong><em>em strong</em> strong</strong></p>
+<p><strong>strong <em>em strong</em></strong></p>
+<p><strong>strong <em>em strong</em> strong</strong></p>
+<p><strong><em>em strong</em> strong</strong></p>
+<p><strong>strong <em>em strong</em></strong></p>
+<p><strong>strong <em>em strong</em> strong</strong></p>
--- a/tests/data/em_strong.md
+++ b/tests/data/em_strong.md
@ -1,9 +1,11 @@
-___em strong___
+___em strong_ strong__

-___one_ at the start__
+__strong _em strong___

-__one at the _end___
+__strong _em strong_ strong__

-__one _in the_ middle__
+***em strong* strong**

-**one with *asterisks***
+**strong *em strong***
+
+**strong *em strong* strong**
--- a/tests/data/emphasis.html
+++ b/tests/data/emphasis.html
@ -2,4 +2,6 @@
 <p><em>multiline
 emphasis</em></p>
 <p>_ this _ is not an emphasis, neither is _ this_, _this _, or _this*</p>
+<p>this_is_not_an_emphasis</p>
 <p>an empty emphasis __ ** is not an emphasis</p>
+<p>*mixed *<em>double and</em> single asterisk** spans</p>
--- a/tests/data/emphasis.md
+++ b/tests/data/emphasis.md
@ -5,4 +5,8 @@ emphasis_

 _ this _ is not an emphasis, neither is _ this_, _this _, or _this*

+this_is_not_an_emphasis
+
 an empty emphasis __ ** is not an emphasis
+
+*mixed **double and* single asterisk** spans
--- a/tests/data/fenced_code_block.html
+++ b/tests/data/fenced_code_block.html
@ -0,0 +1,5 @@
+<pre><code>&lt;?php
+
+$message = 'fenced code block';
+echo $message;</code></pre>
+<pre><code>tilde</code></pre>
--- a/tests/data/fenced_code_block.md
+++ b/tests/data/fenced_code_block.md
@ -0,0 +1,10 @@
+```
+<?php
+
+$message = 'fenced code block';
+echo $message;
+```
+
+~~~
+tilde
+~~~
--- a/tests/data/inline_link.html
+++ b/tests/data/inline_link.html
@ -1,2 +1,3 @@
 <p><a href="http://example.com">link</a></p>
+<p><a href="http://example.com"><code>link</code></a></p>
 <p><a href="http://example.com"><img alt="MD Logo" src="http://parsedown.org/md.png"></a></p>
--- a/tests/data/inline_link.md
+++ b/tests/data/inline_link.md
@ -1,3 +1,5 @@
 [link](http://example.com)

+[`link`](http://example.com)
+
 [![MD Logo](http://parsedown.org/md.png)](http://example.com)
--- a/tests/data/multiline_list_paragraph.html
+++ b/tests/data/multiline_list_paragraph.html
@ -0,0 +1,7 @@
+<ul>
+<li>
+<p>li</p>
+<p>line
+line</p>
+</li>
+</ul>
--- a/tests/data/multiline_list_paragraph.md
+++ b/tests/data/multiline_list_paragraph.md
@ -0,0 +1,4 @@
+- li
+
+  line
+  line
--- a/tests/data/strikethrough.html
+++ b/tests/data/strikethrough.html
@ -0,0 +1,3 @@
+<p><del>strikethrough</del></p>
+<p>in the <del>middle</del> of a sentence</p>
+<p>in the middle of a w<del>or</del>d</p>
--- a/tests/data/strikethrough.md
+++ b/tests/data/strikethrough.md
@ -0,0 +1,5 @@
+~~strikethrough~~
+
+in the ~~middle~~ of a sentence
+
+in the middle of a w~~or~~d
--- a/tests/data/strong_em.html
+++ b/tests/data/strong_em.html
@ -0,0 +1,6 @@
+<p><em><strong>strong em</strong></em> </p>
+<p><em>em <strong>strong em</strong></em></p>
+<p><em><strong>strong em</strong> em</em></p>
+<p><em><strong>strong em</strong></em></p>
+<p><em>em <strong>strong em</strong></em></p>
+<p><em><strong>strong em</strong> em</em></p>
--- a/tests/data/strong_em.md
+++ b/tests/data/strong_em.md
@ -0,0 +1,11 @@
+***strong em*** 
+
+*em **strong em***
+
+***strong em** em*
+
+___strong em___
+
+_em __strong em___
+
+___strong em__ em_
--- a/tests/data/text_reference.html
+++ b/tests/data/text_reference.html
@ -4,3 +4,4 @@
 <p><a href="http://example.com">multiline
 one</a> defined on 2 lines</p>
 <p><a href="http://example.com">one</a> with an upper case label</p>
+<p><a href="http://example.com"><code>link</code></a></p>
--- a/tests/data/text_reference.md
+++ b/tests/data/text_reference.md
@ -14,3 +14,5 @@ one][website] defined on 2 lines
 [one][label] with an upper case label

 [LABEL]: http://example.com
+
+[`link`][website]
--- a/tests/data/url_autolinking.html
+++ b/tests/data/url_autolinking.html
@ -0,0 +1 @@
+<p>Here's an autolink <a href="http://example.com">http://example.com</a>.</p>
--- a/tests/data/url_autolinking.md
+++ b/tests/data/url_autolinking.md
@ -0,0 +1 @@
+Here's an autolink http://example.com.
Author	SHA1	Message	Date
Emanuil Rusev	51a08fad85	improve parsing of emphasis	2013-12-07 17:21:36 +02:00
Emanuil Rusev	7fb08f334a	improve comments	2013-12-07 10:54:05 +02:00
Emanuil Rusev	85ad014f74	parse code span after recursive types to resolve #44	2013-12-06 01:43:55 +02:00
Emanuil Rusev	22336a1bcc	simplify special characters test	2013-12-06 00:45:26 +02:00
Emanuil Rusev	f713e380ee	add comment for automatic link	2013-12-06 00:29:51 +02:00
Emanuil Rusev	5b01915a63	interrupted list items should not add nonexistent empty lines	2013-12-06 00:15:17 +02:00
Emanuil Rusev	18d112a614	improve readme	2013-12-03 23:19:50 +02:00
Emanuil Rusev	1b9641ad03	improve readme	2013-12-03 22:49:50 +02:00
Emanuil Rusev	8baf537c12	resolve #40	2013-12-02 23:26:43 +02:00
Emanuil Rusev	05823567bc	simplify comments	2013-12-02 23:02:15 +02:00
Emanuil Rusev	b7029ab176	improve readme	2013-12-01 00:10:30 +02:00
Emanuil Rusev	102a947c7a	improve readme	2013-11-23 15:58:58 +02:00
Emanuil Rusev	7bb70186c1	simplify test for em strong	2013-11-23 13:35:15 +02:00
Emanuil Rusev	3225c66863	*strong em inside of em* should produce valid markup	2013-11-23 13:19:06 +02:00
Emanuil Rusev	d6dc5ba25b	update introduction text to match website	2013-11-23 09:26:44 +02:00
Emanuil Rusev	f5451a9eff	Merge pull request #37 from hkdobrev/htmlspecialshars-utf8	2013-11-22 13:23:21 -08:00
Haralan Dobrev	849a89b121	Use UTF-8 encoding for htmlspecialchars. See #36 . Prior to PHP 5.4.0 the default encoding for `htmlentities()` and `htmlspecialchars` is "ISO-8859-1". For PHP 5.4+ is "UTF-8". This ensures always the right encoding is used no matter the PHP version and the locale settings.	2013-11-22 23:06:20 +02:00
Emanuil Rusev	28064a63b3	simplify encoding of special characters	2013-11-22 21:57:21 +02:00
Emanuil Rusev	800aac5b56	Merge pull request #36 from josephok/patch-1	2013-11-22 11:21:38 -08:00
josephok	b15d40e8a3	Update Parsedown.php Changes the htmlentities() to htmlspecialchars(). The htmlentities() has some problems encoding non-english words(like Chinese)	2013-11-22 23:05:26 +08:00
Emanuil Rusev	ddc5b7e2dd	implement URL auto-linking	2013-11-22 00:20:45 +02:00
Emanuil Rusev	5a563008aa	implement GFM strikethrough	2013-11-21 13:39:00 +02:00
Emanuil Rusev	b6f795962f	resolve #21	2013-11-21 00:59:30 +02:00
Emanuil Rusev	cdb2646063	update readme to match website	2013-11-20 23:10:03 +02:00
Emanuil Rusev	e3b8026e39	build should no longer allow failures	2013-11-18 22:39:44 +02:00
Emanuil Rusev	d96f668c42	update test case to make it run on PHP 5.2	2013-11-18 22:29:15 +02:00
Emanuil Rusev	96bf75bd91	remove goto to provide support for PHP 5.2	2013-11-18 21:42:00 +02:00
Emanuil Rusev	67b51794d8	implement fenced code block to resolve #2	2013-11-17 16:52:31 +02:00
Emanuil Rusev	a9d6232705	array_shift » unset to simplify code base and improve performance	2013-11-17 13:21:49 +02:00
Emanuil Rusev	b91629ad94	organize evaluation blocks into switch statements to improve code readability	2013-11-17 12:48:01 +02:00
Emanuil Rusev	24d300ea5d	$pure_line » $deindented_line	2013-11-17 01:52:40 +02:00
Emanuil Rusev	d54712b989	simplify comments	2013-11-17 01:52:40 +02:00
Emanuil Rusev	6ef043ba7d	arrange compile cases	2013-11-17 01:52:40 +02:00
Emanuil Rusev	fe27b70bdb	block » markup	2013-11-17 01:52:40 +02:00
Emanuil Rusev	18d3dbf4f6	simplify comments	2013-11-17 01:52:40 +02:00
Emanuil Rusev	4758f58f73	remove double semicolons	2013-11-17 01:52:40 +02:00
Emanuil Rusev	5fa3eb1b2f	parse_inline_elements » parse_span_elements to match the specs	2013-11-17 01:52:40 +02:00
Emanuil Rusev	38300323a6	simplify readme	2013-11-16 18:45:13 +02:00
Emanuil Rusev	96609329b9	improve readme	2013-11-16 09:51:01 +02:00
				`@ -0,0 +1 @@`
				`<p>Here's an autolink <a href="http://example.com">http://example.com</a>.</p>`