Merge pull request #37 from hkdobrev/htmlspecialshars-utf8

Use UTF-8 encoding for htmlspecialchars. See #36 .
Prior to PHP 5.4.0 the default encoding for `htmlentities()` and `htmlspecialchars` is "ISO-8859-1". For PHP 5.4+ is "UTF-8". This ensures always the right encoding is used no matter the PHP version and the locale settings.
2023-08-10 21:13:06 +03:00 · 2013-11-22 13:23:21 -08:00 · 2013-11-22 23:06:20 +02:00 · 2013-11-22 21:57:21 +02:00 · 2013-11-22 11:21:38 -08:00 · 2013-11-22 23:05:26 +08:00
14 changed files with 491 additions and 353 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -4,8 +4,4 @@ php:
  - 5.5
  - 5.4
  - 5.3
-  - 5.2
-
-matrix:
-  allow_failures:
-    - php: 5.2
+  - 5.2
--- a/Parsedown.php
+++ b/Parsedown.php
@ -46,17 +46,17 @@ class Parsedown

 	function parse($text)
 	{
-		# Removes UTF-8 BOM and marker characters.
+		# removes UTF-8 BOM and marker characters
 		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);

-		# Removes \r characters.
+		# removes \r characters
 		$text = str_replace("\r\n", "\n", $text);
 		$text = str_replace("\r", "\n", $text);

-		# Replaces tabs with spaces.
+		# replaces tabs with spaces
 		$text = str_replace("\t", '    ', $text);

-		# Encodes escape sequences.
+		# encodes escape sequences

 		if (strpos($text, '\\') !== FALSE)
 		{
@ -84,7 +84,7 @@ class Parsedown

 		$text = $this->parse_block_elements($lines);

-		# Decodes escape sequences (leaves out backslashes).
+		# decodes escape sequences

 		foreach ($this->escape_sequence_map as $code => $escape_sequence)
 		{
@ -110,28 +110,56 @@ class Parsedown

 		foreach ($lines as $line)
 		{
-			# Block-Level HTML
+			#
+			# fenced elements

-			if ($element['type'] === 'block' and ! isset($element['closed']))
+			switch ($element['type'])
 			{
-				if (preg_match('{<'.$element['subtype'].'>$}', $line)) # <open>
-				{
-					$element['depth']++;
-				}
+				case 'fenced_code_block':

-				if (preg_match('{</'.$element['subtype'].'>$}', $line)) # </close>
-				{
-					$element['depth'] > 0
-						? $element['depth']--
-						: $element['closed'] = true;
-				}
+					if ( ! isset($element['closed']))
+					{
+						if (preg_match('/^[ ]*'.$element['fence'][0].'{3,}[ ]*$/', $line))
+						{
+							$element['closed'] = true;
+						}
+						else
+						{
+							$element['text'] !== '' and $element['text'] .= "\n";

-				$element['text'] .= "\n".$line;
+							$element['text'] .= $line;
+						}

-				continue;
+						continue 2;
+					}
+
+					break;
+
+				case 'markup':
+
+					if ( ! isset($element['closed']))
+					{
+						if (preg_match('{<'.$element['subtype'].'>$}', $line)) # opening tag
+						{
+							$element['depth']++;
+						}
+
+						if (preg_match('{</'.$element['subtype'].'>$}', $line)) # closing tag
+						{
+							$element['depth'] > 0
+								? $element['depth']--
+								: $element['closed'] = true;
+						}
+
+						$element['text'] .= "\n".$line;
+
+						continue 2;
+					}
+
+					break;
 			}

-			# Empty
+			# *

 			if ($line === '')
 			{
@ -140,269 +168,330 @@ class Parsedown
 				continue;
 			}

-			# Lazy Blockquote
+			#
+			# composite elements

-			if ($element['type'] === 'blockquote' and ! isset($element['interrupted']))
+			switch ($element['type'])
 			{
-				$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
+				case 'blockquote':

-				$element['lines'] []= $line;
-
-				continue;
-			}
-
-			# Lazy List Item
-
-			if ($element['type'] === 'li')
-			{
-				if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
-				{
-					if ($element['indentation'] !== $matches[1])
+					if ( ! isset($element['interrupted']))
 					{
+						$line = preg_replace('/^[ ]*>[ ]?/', '', $line);
+
 						$element['lines'] []= $line;
+
+						continue 2;
+					}
+
+					break;
+
+				case 'li':
+
+					if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
+					{
+						if ($element['indentation'] !== $matches[1])
+						{
+							$element['lines'] []= $line;
+						}
+						else
+						{
+							unset($element['last']);
+
+							$elements []= $element;
+
+							$element = array(
+								'type' => 'li',
+								'indentation' => $matches[1],
+								'last' => true,
+								'lines' => array(
+									preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+								),
+							);
+						}
+
+						continue 2;
+					}
+
+					if (isset($element['interrupted']))
+					{
+						if ($line[0] === ' ')
+						{
+							$element['lines'] []= '';
+
+							$line = preg_replace('/^[ ]{0,4}/', '', $line);
+
+							$element['lines'] []= $line;
+
+							continue 2;
+						}
 					}
 					else
 					{
-						unset($element['last']);
+						$line = preg_replace('/^[ ]{0,4}/', '', $line);

+						$element['lines'] []= $line;
+
+						continue 2;
+					}
+
+					break;
+			}
+
+			#
+			# indentation sensitive types
+
+			$deindented_line = $line;
+
+			switch ($line[0])
+			{
+				case ' ':
+
+					# ~
+
+					$deindented_line = ltrim($line);
+
+					if ($deindented_line === '')
+					{
+						continue 2;
+					}
+
+					# code block
+
+					if (preg_match('/^[ ]{4}(.*)/', $line, $matches))
+					{
+						if ($element['type'] === 'code_block')
+						{
+							if (isset($element['interrupted']))
+							{
+								$element['text'] .= "\n";
+
+								unset ($element['interrupted']);
+							}
+
+							$element['text'] .= "\n".$matches[1];
+						}
+						else
+						{
+							$elements []= $element;
+
+							$element = array(
+								'type' => 'code_block',
+								'text' => $matches[1],
+							);
+						}
+
+						continue 2;
+					}
+
+					break;
+
+				case '#':
+
+					# atx heading (#)
+
+					if (preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
+					{
+						$elements []= $element;
+
+						$level = strlen($matches[1]);
+
+						$element = array(
+							'type' => 'h.',
+							'text' => $matches[2],
+							'level' => $level,
+						);
+
+						continue 2;
+					}
+
+					break;
+
+				case '-':
+
+					# setext heading (---)
+
+					if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
+					{
+						$element['type'] = 'h.';
+						$element['level'] = 2;
+
+						continue 2;
+					}
+
+					break;
+
+				case '=':
+
+					# setext heading (===)
+
+					if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
+					{
+						$element['type'] = 'h.';
+						$element['level'] = 1;
+
+						continue 2;
+					}
+
+					break;
+			}
+
+			#
+			# indentation insensitive types
+
+			switch ($deindented_line[0])
+			{
+				case '<':
+
+					# self-closing tag
+
+					if (preg_match('{^<.+?/>$}', $deindented_line))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => '',
+							'text' => $deindented_line,
+						);
+
+						continue 2;
+					}
+
+					# opening tag
+
+					if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $deindented_line, $matches))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => 'markup',
+							'subtype' => strtolower($matches[1]),
+							'text' => $deindented_line,
+							'depth' => 0,
+						);
+
+						preg_match('{</'.$matches[1].'>\s*$}', $deindented_line) and $element['closed'] = true;
+
+						continue 2;
+					}
+
+					break;
+
+				case '>':
+
+					# quote
+
+					if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => 'blockquote',
+							'lines' => array(
+								$matches[1],
+							),
+						);
+
+						continue 2;
+					}
+
+					break;
+
+				case '[':
+
+					# reference
+
+					if (preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $deindented_line, $matches))
+					{
+						$label = strtolower($matches[1]);
+
+						$this->reference_map[$label] = trim($matches[2], '<>');;
+
+						continue 2;
+					}
+
+					break;
+
+				case '`':
+				case '~':
+
+					# fenced code block
+
+					if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => 'fenced_code_block',
+							'text' => '',
+							'fence' => $matches[1],
+						);
+
+						isset($matches[2]) and $element['language'] = $matches[2];
+
+						continue 2;
+					}
+
+					break;
+
+				case '*':
+				case '+':
+				case '-':
+				case '_':
+
+					# hr
+
+					if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line))
+					{
+						$elements []= $element;
+
+						$element = array(
+							'type' => 'hr',
+						);
+
+						continue 2;
+					}
+
+					# li
+
+					if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches))
+					{
 						$elements []= $element;

 						$element = array(
 							'type' => 'li',
+							'ordered' => false,
 							'indentation' => $matches[1],
 							'last' => true,
 							'lines' => array(
-								preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+								preg_replace('/^[ ]{0,4}/', '', $matches[2]),
 							),
 						);
+
+						continue 2;
 					}
-
-					continue;
-				}
-
-				if (isset($element['interrupted']))
-				{
-					if ($line[0] === ' ')
-					{
-						$element['lines'] []= '';
-
-						$line = preg_replace('/^[ ]{0,4}/', '', $line);;
-
-						$element['lines'] []= $line;
-
-						continue;
-					}
-				}
-				else
-				{
-					$line = preg_replace('/^[ ]{0,4}/', '', $line);;
-
-					$element['lines'] []= $line;
-
-					continue;
-				}
 			}

-			# Quick Paragraph
+			# li

-			if ($line[0] >= 'a' or $line[0] >= 'A' and $line[0] <= 'Z')
-			{
-				goto paragraph;
-			}
-
-			# Code Block
-
-			if ($line[0] === ' ' and preg_match('/^[ ]{4}(.*)/', $line, $matches))
-			{
-				if (trim($line) === '')
-				{
-					continue;
-				}
-
-				if ($element['type'] === 'code')
-				{
-					if (isset($element['interrupted']))
-					{
-						$element['text'] .= "\n";
-
-						unset ($element['interrupted']);
-					}
-
-					$element['text'] .= "\n".$matches[1];
-				}
-				else
-				{
-					$elements []= $element;
-
-					$element = array(
-						'type' => 'code',
-						'text' => $matches[1],
-					);
-				}
-
-				continue;
-			}
-
-			# Setext Header (---)
-
-			if ($line[0] === '-' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[-]+[ ]*$/', $line))
-			{
-				$element['type'] = 'h.';
-				$element['level'] = 2;
-
-				continue;
-			}
-
-			# Atx Header (#)
-
-			if ($line[0] === '#' and preg_match('/^(#{1,6})[ ]*(.+?)[ ]*#*$/', $line, $matches))
-			{
-				$elements []= $element;
-
-				$level = strlen($matches[1]);
-
-				$element = array(
-					'type' => 'h.',
-					'text' => $matches[2],
-					'level' => $level,
-				);
-
-				continue;
-			}
-
-			# Setext Header (===)
-
-			if ($line[0] === '=' and $element['type'] === 'p' and ! isset($element['interrupted']) and preg_match('/^[=]+[ ]*$/', $line))
-			{
-				$element['type'] = 'h.';
-				$element['level'] = 1;
-
-				continue;
-			}
-
-			# ~
-
-			$pure_line = $line[0] !== ' ' ? $line : ltrim($line);
-
-			if ($pure_line === '')
-			{
-				continue;
-			}
-
-			# Link Reference
-
-			if ($pure_line[0] === '[' and preg_match('/^\[(.+?)\]:[ ]*([^ ]+)/', $pure_line, $matches))
-			{
-				$label = strtolower($matches[1]);
-				$url = trim($matches[2], '<>');
-
-				$this->reference_map[$label] = $url;
-
-				continue;
-			}
-
-			# Blockquote
-
-			if ($pure_line[0] === '>' and preg_match('/^>[ ]?(.*)/', $pure_line, $matches))
-			{
-				if ($element['type'] === 'blockquote')
-				{
-					if (isset($element['interrupted']))
-					{
-						$element['lines'] []= '';
-
-						unset($element['interrupted']);
-					}
-
-					$element['lines'] []= $matches[1];
-				}
-				else
-				{
-					$elements []= $element;
-
-					$element = array(
-						'type' => 'blockquote',
-						'lines' => array(
-							$matches[1],
-						),
-					);
-				}
-
-				continue;
-			}
-
-			# HTML
-
-			if ($pure_line[0] === '<')
-			{
-				# Block-Level HTML <self-closing/>
-
-				if (preg_match('{^<.+?/>$}', $pure_line))
-				{
-					$elements []= $element;
-
-					$element = array(
-						'type' => '',
-						'text' => $pure_line,
-					);
-
-					continue;
-				}
-
-				# Block-Level HTML <open>
-
-				if (preg_match('{^<(\w+)(?:[ ].*?)?>}', $pure_line, $matches))
-				{
-					$elements []= $element;
-
-					$element = array(
-						'type' => 'block',
-						'subtype' => strtolower($matches[1]),
-						'text' => $pure_line,
-						'depth' => 0,
-					);
-
-					preg_match('{</'.$matches[1].'>\s*$}', $pure_line) and $element['closed'] = true;
-
-					continue;
-				}
-			}
-
-			# Horizontal Rule
-
-			if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $pure_line))
-			{
-				$elements []= $element;
-
-				$element = array(
-					'type' => 'hr',
-				);
-
-				continue;
-			}
-
-			# List Item
-
-			if (preg_match('/^([ ]*)(\d+[.]|[*+-])[ ](.*)/', $line, $matches))
+			if ($deindented_line[0] <= '9' and $deindented_line >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches))
 			{
 				$elements []= $element;

 				$element = array(
 					'type' => 'li',
-					'ordered' => isset($matches[2][1]),
+					'ordered' => true,
 					'indentation' => $matches[1],
 					'last' => true,
 					'lines' => array(
-						preg_replace('/^[ ]{0,4}/', '', $matches[3]),
+						preg_replace('/^[ ]{0,4}/', '', $matches[2]),
 					),
 				);

 				continue;
 			}

-			# ~
-
-			paragraph:
+			# paragraph

 			if ($element['type'] === 'p')
 			{
@ -432,7 +521,7 @@ class Parsedown

 		$elements []= $element;

-		array_shift($elements);
+		unset($elements[0]);

 		#
 		# ~
@ -440,10 +529,67 @@ class Parsedown

 		$markup = '';

-		foreach ($elements as $index => $element)
+		foreach ($elements as $element)
 		{
 			switch ($element['type'])
 			{
+				case 'p':
+
+					$text = $this->parse_span_elements($element['text']);
+
+					$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
+
+					if ($context === 'li' and $markup === '')
+					{
+						if (isset($element['interrupted']))
+						{
+							$markup .= "\n".'<p>'.$text.'</p>'."\n";
+						}
+						else
+						{
+							$markup .= $text;
+						}
+					}
+					else
+					{
+						$markup .= '<p>'.$text.'</p>'."\n";
+					}
+
+					break;
+
+				case 'blockquote':
+
+					$text = $this->parse_block_elements($element['lines']);
+
+					$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
+
+					break;
+
+				case 'code_block':
+				case 'fenced_code_block':
+
+					$text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8');
+
+					strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
+
+					$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
+
+					break;
+
+				case 'h.':
+
+					$text = $this->parse_span_elements($element['text']);
+
+					$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
+
+					break;
+
+				case 'hr':
+
+					$markup .= '<hr />'."\n";
+
+					break;
+
 				case 'li':

 					if (isset($element['ordered'])) # first
@ -466,62 +612,6 @@ class Parsedown

 					break;

-				case 'p':
-
-					$text = $this->parse_inline_elements($element['text']);
-
-					$text = preg_replace('/[ ]{2}\n/', '<br />'."\n", $text);
-
-					if ($context === 'li' and $index === 0)
-					{
-						if (isset($element['interrupted']))
-						{
-							$markup .= "\n".'<p>'.$text.'</p>'."\n";
-						}
-						else
-						{
-							$markup .= $text;
-						}
-					}
-					else
-					{
-						$markup .= '<p>'.$text.'</p>'."\n";
-					}
-
-					break;
-
-				case 'code':
-
-					$text = htmlentities($element['text'], ENT_NOQUOTES);
-
-					strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map);
-
-					$markup .= '<pre><code>'.$text.'</code></pre>'."\n";
-
-					break;
-
-				case 'blockquote':
-
-					$text = $this->parse_block_elements($element['lines']);
-
-					$markup .= '<blockquote>'."\n".$text.'</blockquote>'."\n";
-
-					break;
-
-				case 'h.':
-
-					$text = $this->parse_inline_elements($element['text']);
-
-					$markup .= '<h'.$element['level'].'>'.$text.'</h'.$element['level'].'>'."\n";
-
-					break;
-
-				case 'hr':
-
-					$markup .= '<hr />'."\n";
-
-					break;
-
 				default:

 					$markup .= $element['text']."\n";
@ -531,32 +621,32 @@ class Parsedown
 		return $markup;
 	}

-	private function parse_inline_elements($text)
+	private function parse_span_elements($text)
 	{
 		$map = array();

 		$index = 0;

-		# Code Span
+		# code span

 		if (strpos($text, '`') !== FALSE and preg_match_all('/`(.+?)`/', $text, $matches, PREG_SET_ORDER))
 		{
 			foreach ($matches as $matches)
 			{
 				$element_text = $matches[1];
-				$element_text = htmlentities($element_text, ENT_NOQUOTES);
+				$element_text = htmlspecialchars($element_text, ENT_NOQUOTES, 'UTF-8');

-				# Decodes escape sequences.
+				# decodes escape sequences

 				$this->escape_sequence_map
 					and strpos($element_text, "\x1A") !== FALSE
 					and $element_text = strtr($element_text, $this->escape_sequence_map);

-				# Composes element.
+				# composes element

 				$element = '<code>'.$element_text.'</code>';

-				# Encodes element.
+				# encodes element

 				$code = "\x1A".'$'.$index;

@ -568,7 +658,7 @@ class Parsedown
 			}
 		}

-		# Inline Link / Image
+		# inline link or image

 		if (strpos($text, '](') !== FALSE and preg_match_all('/(!?)(\[((?:[^\[\]]|(?2))*)\])\((.*?)\)/', $text, $matches, PREG_SET_ORDER)) # inline
 		{
@ -584,7 +674,7 @@ class Parsedown
 				}
 				else
 				{
-					$element_text = $this->parse_inline_elements($matches[3]);
+					$element_text = $this->parse_span_elements($matches[3]);

 					$element = '<a href="'.$url.'">'.$element_text.'</a>';
 				}
@ -601,7 +691,7 @@ class Parsedown
 			}
 		}

-		# Reference(d) Link / Image
+		# reference link or image

 		if ($this->reference_map and strpos($text, '[') !== FALSE and preg_match_all('/(!?)\[(.+?)\](?:\n?[ ]?\[(.*?)\])?/ms', $text, $matches, PREG_SET_ORDER))
 		{
@ -625,7 +715,7 @@ class Parsedown
 					}
 					else # anchor
 					{
-						$element_text = $this->parse_inline_elements($matches[2]);
+						$element_text = $this->parse_span_elements($matches[2]);

 						$element = '<a href="'.$url.'">'.$element_text.'</a>';
 					}
@ -643,29 +733,35 @@ class Parsedown
 			}
 		}

-		# Automatic Links
-
-		if (strpos($text, '<') !== FALSE and preg_match_all('/<((https?|ftp|dict):[^\^\s]+?)>/i', $text, $matches, PREG_SET_ORDER))
+		if (strpos($text, '://') !== FALSE)
 		{
-			foreach ($matches as $matches)
+			switch (TRUE)
 			{
-				$url = $matches[1];
+				case preg_match_all('{<(https?:[/]{2}[^\s]+)>}i', $text, $matches, PREG_SET_ORDER):
+				case preg_match_all('{\b(https?:[/]{2}[^\s]+)\b}i', $text, $matches, PREG_SET_ORDER):

-				strpos($url, '&') !== FALSE and $url = preg_replace('/&(?!#?\w+;)/', '&amp;', $url);
+					foreach ($matches as $matches)
+					{
+						$url = $matches[1];

-				$element = '<a href=":href">:text</a>';
-				$element = str_replace(':text', $url, $element);
-				$element = str_replace(':href', $url, $element);
+						strpos($url, '&') !== FALSE and $url = preg_replace('/&(?!#?\w+;)/', '&amp;', $url);

-				# ~
+						$element = '<a href=":href">:text</a>';
+						$element = str_replace(':text', $url, $element);
+						$element = str_replace(':href', $url, $element);

-				$code = "\x1A".'$'.$index;
+						# ~

-				$text = str_replace($matches[0], $code, $text);
+						$code = "\x1A".'$'.$index;

-				$map[$code] = $element;
+						$text = str_replace($matches[0], $code, $text);

-				$index ++;
+						$map[$code] = $element;
+
+						$index ++;
+					}
+
+					break;
 			}
 		}

@ -676,10 +772,15 @@ class Parsedown

 		# ~

+		if (strpos($text, '~~') !== FALSE)
+		{
+			$text = preg_replace('/~~(?=\S)(.+?)(?<=\S)~~/s', '<del>$1</del>', $text);
+		}
+
 		if (strpos($text, '_') !== FALSE)
 		{
 			$text = preg_replace('/__(?=\S)(.+?)(?<=\S)__(?!_)/s', '<strong>$1</strong>', $text);
-			$text = preg_replace('/_(?=\S)(.+?)(?<=\S)_/s', '<em>$1</em>', $text);
+			$text = preg_replace('/\b_(?=\S)(.+?)(?<=\S)_\b/s', '<em>$1</em>', $text);
 		}

 		if (strpos($text, '*') !== FALSE)
@ -692,4 +793,4 @@ class Parsedown

 		return $text;
 	}
-}
+}
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
-## Parsedown PHP
+## Parsedown

-Parsedown PHP is a parser for Markdown. It reads Markdown the way people do. First, it breaks texts into lines. Then, it looks at how these lines start and relate to each other. Finally, it looks for special characters to identify inline elements. As a result, Parsedown PHP is (very) fast and consistent.
+Fast, consistent and easy to use [Markdown][1] parser for PHP.

 [Home](http://parsedown.org) &middot; [Demo](http://parsedown.org/explorer/) &middot; [Tests](http://parsedown.org/tests/)

@ -17,3 +17,5 @@ $result = Parsedown::instance()->parse($text);

 echo $result; # prints: <p>Hello <strong>Parsedown</strong>!</p>
 ```
+
+[1]: http://daringfireball.net/projects/markdown/
--- a/tests/Test.php
+++ b/tests/Test.php
@ -5,7 +5,7 @@ include 'Parsedown.php';
 class Test extends PHPUnit_Framework_TestCase
 {
 	const provider_dir = 'data/';
-	
+
 	/**
 	 * @dataProvider provider
 	 */
@ -15,33 +15,41 @@ class Test extends PHPUnit_Framework_TestCase

 		$this->assertEquals($expected_markup, $actual_markup);
 	}
-	
+
 	function provider()
 	{
 		$provider = array();
-		
-		$DirectoryIterator = new DirectoryIterator(__DIR__ . '/' . self::provider_dir);
-		
+
+		$path = dirname(__FILE__).'/';
+
+		$DirectoryIterator = new DirectoryIterator($path . '/' . self::provider_dir);
+
 		foreach ($DirectoryIterator as $Item)
 		{
-			if ($Item->isFile() and $Item->getExtension() === 'md')
+			if ($Item->isFile())
 			{
+				$filename = $Item->getFilename();
+
+				$extension = pathinfo($filename, PATHINFO_EXTENSION);
+
+				if ($extension !== 'md')
+					continue;
+
 				$basename = $Item->getBasename('.md');
-				
-				$markdown = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.md');
-				
+
+				$markdown = file_get_contents($path . '/' . self::provider_dir . $basename . '.md');
+
 				if (!$markdown)
 					continue;
-				
-				$expected_markup = file_get_contents(__DIR__ . '/' . self::provider_dir . $basename . '.html');
+
+				$expected_markup = file_get_contents($path . '/' . self::provider_dir . $basename . '.html');
 				$expected_markup = str_replace("\r\n", "\n", $expected_markup);
 				$expected_markup = str_replace("\r", "\n", $expected_markup);
-				
+
 				$provider [] = array($markdown, $expected_markup);
 			}
 		}
-		
+
 		return $provider;
 	}
-}
-
+}
--- a/tests/data/emphasis.html
+++ b/tests/data/emphasis.html
@ -2,4 +2,5 @@
 <p><em>multiline
 emphasis</em></p>
 <p>_ this _ is not an emphasis, neither is _ this_, _this _, or _this*</p>
+<p>this_is_not_an_emphasis</p>
 <p>an empty emphasis __ ** is not an emphasis</p>
--- a/tests/data/emphasis.md
+++ b/tests/data/emphasis.md
@ -5,4 +5,6 @@ emphasis_

 _ this _ is not an emphasis, neither is _ this_, _this _, or _this*

+this_is_not_an_emphasis
+
 an empty emphasis __ ** is not an emphasis
--- a/tests/data/fenced_code_block.html
+++ b/tests/data/fenced_code_block.html
@ -0,0 +1,5 @@
+<pre><code>&lt;?php
+
+$message = 'fenced code block';
+echo $message;</code></pre>
+<pre><code>tilde</code></pre>
--- a/tests/data/fenced_code_block.md
+++ b/tests/data/fenced_code_block.md
@ -0,0 +1,10 @@
+```
+<?php
+
+$message = 'fenced code block';
+echo $message;
+```
+
+~~~
+tilde
+~~~
--- a/tests/data/special_characters.html
+++ b/tests/data/special_characters.html
@ -1,4 +1,5 @@
 <p>AT&amp;T has an ampersand in their name</p>
+<pre><code>Let's play some cards ♠ ♣ ♥ ♦</code></pre>
 <p>AT&amp;T is another way to write it</p>
 <p>this &amp; that</p>
 <p>4 &lt; 5 and 6 > 5</p>
--- a/tests/data/special_characters.md
+++ b/tests/data/special_characters.md
@ -1,5 +1,7 @@
 AT&T has an ampersand in their name

+    Let's play some cards ♠ ♣ ♥ ♦
+
 AT&T is another way to write it

 this & that
--- a/tests/data/strikethrough.html
+++ b/tests/data/strikethrough.html
@ -0,0 +1,3 @@
+<p><del>strikethrough</del></p>
+<p>in the <del>middle</del> of a sentence</p>
+<p>in the middle of a w<del>or</del>d</p>
--- a/tests/data/strikethrough.md
+++ b/tests/data/strikethrough.md
@ -0,0 +1,5 @@
+~~strikethrough~~
+
+in the ~~middle~~ of a sentence
+
+in the middle of a w~~or~~d
--- a/tests/data/url_autolinking.html
+++ b/tests/data/url_autolinking.html
@ -0,0 +1 @@
+<p>Here's an autolink <a href="http://example.com">http://example.com</a>.</p>
--- a/tests/data/url_autolinking.md
+++ b/tests/data/url_autolinking.md
@ -0,0 +1 @@
+Here's an autolink http://example.com.
Author	SHA1	Message	Date
Emanuil Rusev	f5451a9eff	Merge pull request #37 from hkdobrev/htmlspecialshars-utf8	2013-11-22 13:23:21 -08:00
Haralan Dobrev	849a89b121	Use UTF-8 encoding for htmlspecialchars. See #36 . Prior to PHP 5.4.0 the default encoding for `htmlentities()` and `htmlspecialchars` is "ISO-8859-1". For PHP 5.4+ is "UTF-8". This ensures always the right encoding is used no matter the PHP version and the locale settings.	2013-11-22 23:06:20 +02:00
Emanuil Rusev	28064a63b3	simplify encoding of special characters	2013-11-22 21:57:21 +02:00
Emanuil Rusev	800aac5b56	Merge pull request #36 from josephok/patch-1	2013-11-22 11:21:38 -08:00
josephok	b15d40e8a3	Update Parsedown.php Changes the htmlentities() to htmlspecialchars(). The htmlentities() has some problems encoding non-english words(like Chinese)	2013-11-22 23:05:26 +08:00
Emanuil Rusev	ddc5b7e2dd	implement URL auto-linking	2013-11-22 00:20:45 +02:00
Emanuil Rusev	5a563008aa	implement GFM strikethrough	2013-11-21 13:39:00 +02:00
Emanuil Rusev	b6f795962f	resolve #21	2013-11-21 00:59:30 +02:00
Emanuil Rusev	cdb2646063	update readme to match website	2013-11-20 23:10:03 +02:00
Emanuil Rusev	e3b8026e39	build should no longer allow failures	2013-11-18 22:39:44 +02:00
Emanuil Rusev	d96f668c42	update test case to make it run on PHP 5.2	2013-11-18 22:29:15 +02:00
Emanuil Rusev	96bf75bd91	remove goto to provide support for PHP 5.2	2013-11-18 21:42:00 +02:00
Emanuil Rusev	67b51794d8	implement fenced code block to resolve #2	2013-11-17 16:52:31 +02:00
Emanuil Rusev	a9d6232705	array_shift » unset to simplify code base and improve performance	2013-11-17 13:21:49 +02:00
Emanuil Rusev	b91629ad94	organize evaluation blocks into switch statements to improve code readability	2013-11-17 12:48:01 +02:00
Emanuil Rusev	24d300ea5d	$pure_line » $deindented_line	2013-11-17 01:52:40 +02:00
Emanuil Rusev	d54712b989	simplify comments	2013-11-17 01:52:40 +02:00
Emanuil Rusev	6ef043ba7d	arrange compile cases	2013-11-17 01:52:40 +02:00
Emanuil Rusev	fe27b70bdb	block » markup	2013-11-17 01:52:40 +02:00
Emanuil Rusev	18d3dbf4f6	simplify comments	2013-11-17 01:52:40 +02:00
Emanuil Rusev	4758f58f73	remove double semicolons	2013-11-17 01:52:40 +02:00
Emanuil Rusev	5fa3eb1b2f	parse_inline_elements » parse_span_elements to match the specs	2013-11-17 01:52:40 +02:00
Emanuil Rusev	38300323a6	simplify readme	2013-11-16 18:45:13 +02:00
Emanuil Rusev	96609329b9	improve readme	2013-11-16 09:51:01 +02:00
				`@ -0,0 +1 @@`
				`<p>Here's an autolink <a href="http://example.com">http://example.com</a>.</p>`