1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00
parsedown/Parsedown.php
Aidan Woods 054ba3c487
urlencode urls that are potentially unsafe:
this should break urls that attempt to include a protocol, or port (these are absolute URLs and should have a whitelisted protocol for use)
but URLs that are relative, or relative from the site root should be preserved (though characters non essential for the URL structure may be urlencoded)

this approach has significant advantages over attempting to locate something like `javascript:alert(1)` or `javascript:alert(1)` (which are both valid) because browsers have been known to ignore ridiculous characters when encountered (meaning something like `jav\ta\0\0script:alert(1)` would be xss :( ). Instead of trying to chase down a way to interpret a URL to decide whether there is a protocol, this approach ensures that two essential characters needed to achieve a colon are encoded `:` (obviously) and `;` (from `:`). If these characters appear in a relative URL then they are equivalent to their URL encoded form and so this change will be non breaking for that case.
2017-05-03 17:01:27 +01:00

1636 lines
39 KiB
PHP

<?php
#
#
# Parsedown
# http://parsedown.org
#
# (c) Emanuil Rusev
# http://erusev.com
#
# For the full license information, view the LICENSE file that was distributed
# with this source code.
#
#
class Parsedown
{
# ~
const version = '1.6.0';
# ~
function text($text)
{
# make sure no definitions are set
$this->DefinitionData = array();
# standardize line breaks
$text = str_replace(array("\r\n", "\r"), "\n", $text);
# remove surrounding line breaks
$text = trim($text, "\n");
# split text into lines
$lines = explode("\n", $text);
# iterate through lines to identify blocks
$markup = $this->lines($lines);
# trim line breaks
$markup = trim($markup, "\n");
return $markup;
}
#
# Setters
#
function setBreaksEnabled($breaksEnabled)
{
$this->breaksEnabled = $breaksEnabled;
return $this;
}
protected $breaksEnabled;
function setMarkupEscaped($markupEscaped)
{
$this->markupEscaped = $markupEscaped;
return $this;
}
protected $markupEscaped;
function setUrlsLinked($urlsLinked)
{
$this->urlsLinked = $urlsLinked;
return $this;
}
protected $urlsLinked = true;
function setSafeLinksEnabled($safeLinksEnabled)
{
$this->safeLinksEnabled = $safeLinksEnabled;
return $this;
}
protected $safeLinksEnabled = true;
protected $safeLinksWhitelist = array(
'http://',
'https://',
'ftp://',
'ftps://',
'mailto:',
'data:image/png;base64,',
'data:image/gif;base64,',
'data:image/jpeg;base64,',
);
#
# Lines
#
protected $BlockTypes = array(
'#' => array('Header'),
'*' => array('Rule', 'List'),
'+' => array('List'),
'-' => array('SetextHeader', 'Table', 'Rule', 'List'),
'0' => array('List'),
'1' => array('List'),
'2' => array('List'),
'3' => array('List'),
'4' => array('List'),
'5' => array('List'),
'6' => array('List'),
'7' => array('List'),
'8' => array('List'),
'9' => array('List'),
':' => array('Table'),
'<' => array('Comment', 'Markup'),
'=' => array('SetextHeader'),
'>' => array('Quote'),
'[' => array('Reference'),
'_' => array('Rule'),
'`' => array('FencedCode'),
'|' => array('Table'),
'~' => array('FencedCode'),
);
# ~
protected $unmarkedBlockTypes = array(
'Code',
);
#
# Blocks
#
protected function lines(array $lines)
{
$CurrentBlock = null;
foreach ($lines as $line)
{
if (chop($line) === '')
{
if (isset($CurrentBlock))
{
$CurrentBlock['interrupted'] = true;
}
continue;
}
if (strpos($line, "\t") !== false)
{
$parts = explode("\t", $line);
$line = $parts[0];
unset($parts[0]);
foreach ($parts as $part)
{
$shortage = 4 - mb_strlen($line, 'utf-8') % 4;
$line .= str_repeat(' ', $shortage);
$line .= $part;
}
}
$indent = 0;
while (isset($line[$indent]) and $line[$indent] === ' ')
{
$indent ++;
}
$text = $indent > 0 ? substr($line, $indent) : $line;
# ~
$Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
# ~
if (isset($CurrentBlock['continuable']))
{
$Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
if (isset($Block))
{
$CurrentBlock = $Block;
continue;
}
else
{
if ($this->isBlockCompletable($CurrentBlock['type']))
{
$CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
}
}
}
# ~
$marker = $text[0];
# ~
$blockTypes = $this->unmarkedBlockTypes;
if (isset($this->BlockTypes[$marker]))
{
foreach ($this->BlockTypes[$marker] as $blockType)
{
$blockTypes []= $blockType;
}
}
#
# ~
foreach ($blockTypes as $blockType)
{
$Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
if (isset($Block))
{
$Block['type'] = $blockType;
if ( ! isset($Block['identified']))
{
$Blocks []= $CurrentBlock;
$Block['identified'] = true;
}
if ($this->isBlockContinuable($blockType))
{
$Block['continuable'] = true;
}
$CurrentBlock = $Block;
continue 2;
}
}
# ~
if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
{
$CurrentBlock['element']['text'] .= "\n".$text;
}
else
{
$Blocks []= $CurrentBlock;
$CurrentBlock = $this->paragraph($Line);
$CurrentBlock['identified'] = true;
}
}
# ~
if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
{
$CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
}
# ~
$Blocks []= $CurrentBlock;
unset($Blocks[0]);
# ~
$markup = '';
foreach ($Blocks as $Block)
{
if (isset($Block['hidden']))
{
continue;
}
$markup .= "\n";
$markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
}
$markup .= "\n";
# ~
return $markup;
}
protected function isBlockContinuable($Type)
{
return method_exists($this, 'block'.$Type.'Continue');
}
protected function isBlockCompletable($Type)
{
return method_exists($this, 'block'.$Type.'Complete');
}
#
# Code
protected function blockCode($Line, $Block = null)
{
if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
{
return;
}
if ($Line['indent'] >= 4)
{
$text = substr($Line['body'], 4);
$Block = array(
'element' => array(
'name' => 'pre',
'handler' => 'element',
'text' => array(
'name' => 'code',
'text' => $text,
),
),
);
return $Block;
}
}
protected function blockCodeContinue($Line, $Block)
{
if ($Line['indent'] >= 4)
{
if (isset($Block['interrupted']))
{
$Block['element']['text']['text'] .= "\n";
unset($Block['interrupted']);
}
$Block['element']['text']['text'] .= "\n";
$text = substr($Line['body'], 4);
$Block['element']['text']['text'] .= $text;
return $Block;
}
}
protected function blockCodeComplete($Block)
{
$text = $Block['element']['text']['text'];
$Block['element']['text']['text'] = $text;
return $Block;
}
#
# Comment
protected function blockComment($Line)
{
if ($this->markupEscaped)
{
return;
}
if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
{
$Block = array(
'markup' => $Line['body'],
);
if (preg_match('/-->$/', $Line['text']))
{
$Block['closed'] = true;
}
return $Block;
}
}
protected function blockCommentContinue($Line, array $Block)
{
if (isset($Block['closed']))
{
return;
}
$Block['markup'] .= "\n" . $Line['body'];
if (preg_match('/-->$/', $Line['text']))
{
$Block['closed'] = true;
}
return $Block;
}
#
# Fenced Code
protected function blockFencedCode($Line)
{
if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches))
{
$Element = array(
'name' => 'code',
'text' => '',
);
if (isset($matches[1]))
{
$class = 'language-'.$matches[1];
$Element['attributes'] = array(
'class' => $class,
);
}
$Block = array(
'char' => $Line['text'][0],
'element' => array(
'name' => 'pre',
'handler' => 'element',
'text' => $Element,
),
);
return $Block;
}
}
protected function blockFencedCodeContinue($Line, $Block)
{
if (isset($Block['complete']))
{
return;
}
if (isset($Block['interrupted']))
{
$Block['element']['text']['text'] .= "\n";
unset($Block['interrupted']);
}
if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
{
$Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
$Block['complete'] = true;
return $Block;
}
$Block['element']['text']['text'] .= "\n".$Line['body'];
return $Block;
}
protected function blockFencedCodeComplete($Block)
{
$text = $Block['element']['text']['text'];
$Block['element']['text']['text'] = $text;
return $Block;
}
#
# Header
protected function blockHeader($Line)
{
if (isset($Line['text'][1]))
{
$level = 1;
while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
{
$level ++;
}
if ($level > 6)
{
return;
}
$text = trim($Line['text'], '# ');
$Block = array(
'element' => array(
'name' => 'h' . min(6, $level),
'text' => $text,
'handler' => 'line',
),
);
return $Block;
}
}
#
# List
protected function blockList($Line)
{
list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
{
$Block = array(
'indent' => $Line['indent'],
'pattern' => $pattern,
'element' => array(
'name' => $name,
'handler' => 'elements',
),
);
if($name === 'ol')
{
$listStart = stristr($matches[0], '.', true);
if($listStart !== '1')
{
$Block['element']['attributes'] = array('start' => $listStart);
}
}
$Block['li'] = array(
'name' => 'li',
'handler' => 'li',
'text' => array(
$matches[2],
),
);
$Block['element']['text'] []= & $Block['li'];
return $Block;
}
}
protected function blockListContinue($Line, array $Block)
{
if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
{
if (isset($Block['interrupted']))
{
$Block['li']['text'] []= '';
unset($Block['interrupted']);
}
unset($Block['li']);
$text = isset($matches[1]) ? $matches[1] : '';
$Block['li'] = array(
'name' => 'li',
'handler' => 'li',
'text' => array(
$text,
),
);
$Block['element']['text'] []= & $Block['li'];
return $Block;
}
if ($Line['text'][0] === '[' and $this->blockReference($Line))
{
return $Block;
}
if ( ! isset($Block['interrupted']))
{
$text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
$Block['li']['text'] []= $text;
return $Block;
}
if ($Line['indent'] > 0)
{
$Block['li']['text'] []= '';
$text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
$Block['li']['text'] []= $text;
unset($Block['interrupted']);
return $Block;
}
}
#
# Quote
protected function blockQuote($Line)
{
if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
{
$Block = array(
'element' => array(
'name' => 'blockquote',
'handler' => 'lines',
'text' => (array) $matches[1],
),
);
return $Block;
}
}
protected function blockQuoteContinue($Line, array $Block)
{
if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
{
if (isset($Block['interrupted']))
{
$Block['element']['text'] []= '';
unset($Block['interrupted']);
}
$Block['element']['text'] []= $matches[1];
return $Block;
}
if ( ! isset($Block['interrupted']))
{
$Block['element']['text'] []= $Line['text'];
return $Block;
}
}
#
# Rule
protected function blockRule($Line)
{
if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
{
$Block = array(
'element' => array(
'name' => 'hr'
),
);
return $Block;
}
}
#
# Setext
protected function blockSetextHeader($Line, array $Block = null)
{
if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
{
return;
}
if (chop($Line['text'], $Line['text'][0]) === '')
{
$Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
return $Block;
}
}
#
# Markup
protected function blockMarkup($Line)
{
if ($this->markupEscaped)
{
return;
}
if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
{
$element = strtolower($matches[1]);
if (in_array($element, $this->textLevelElements))
{
return;
}
$Block = array(
'name' => $matches[1],
'depth' => 0,
'markup' => $Line['text'],
);
$length = strlen($matches[0]);
$remainder = substr($Line['text'], $length);
if (trim($remainder) === '')
{
if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
{
$Block['closed'] = true;
$Block['void'] = true;
}
}
else
{
if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
{
return;
}
if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
{
$Block['closed'] = true;
}
}
return $Block;
}
}
protected function blockMarkupContinue($Line, array $Block)
{
if (isset($Block['closed']))
{
return;
}
if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
{
$Block['depth'] ++;
}
if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
{
if ($Block['depth'] > 0)
{
$Block['depth'] --;
}
else
{
$Block['closed'] = true;
}
}
if (isset($Block['interrupted']))
{
$Block['markup'] .= "\n";
unset($Block['interrupted']);
}
$Block['markup'] .= "\n".$Line['body'];
return $Block;
}
#
# Reference
protected function blockReference($Line)
{
if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
{
$id = strtolower($matches[1]);
$Data = array(
'url' => $matches[2],
'title' => null,
);
if (isset($matches[3]))
{
$Data['title'] = $matches[3];
}
$this->DefinitionData['Reference'][$id] = $Data;
$Block = array(
'hidden' => true,
);
return $Block;
}
}
#
# Table
protected function blockTable($Line, array $Block = null)
{
if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
{
return;
}
if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
{
$alignments = array();
$divider = $Line['text'];
$divider = trim($divider);
$divider = trim($divider, '|');
$dividerCells = explode('|', $divider);
foreach ($dividerCells as $dividerCell)
{
$dividerCell = trim($dividerCell);
if ($dividerCell === '')
{
continue;
}
$alignment = null;
if ($dividerCell[0] === ':')
{
$alignment = 'left';
}
if (substr($dividerCell, - 1) === ':')
{
$alignment = $alignment === 'left' ? 'center' : 'right';
}
$alignments []= $alignment;
}
# ~
$HeaderElements = array();
$header = $Block['element']['text'];
$header = trim($header);
$header = trim($header, '|');
$headerCells = explode('|', $header);
foreach ($headerCells as $index => $headerCell)
{
$headerCell = trim($headerCell);
$HeaderElement = array(
'name' => 'th',
'text' => $headerCell,
'handler' => 'line',
);
if (isset($alignments[$index]))
{
$alignment = $alignments[$index];
$HeaderElement['attributes'] = array(
'style' => 'text-align: '.$alignment.';',
);
}
$HeaderElements []= $HeaderElement;
}
# ~
$Block = array(
'alignments' => $alignments,
'identified' => true,
'element' => array(
'name' => 'table',
'handler' => 'elements',
),
);
$Block['element']['text'] []= array(
'name' => 'thead',
'handler' => 'elements',
);
$Block['element']['text'] []= array(
'name' => 'tbody',
'handler' => 'elements',
'text' => array(),
);
$Block['element']['text'][0]['text'] []= array(
'name' => 'tr',
'handler' => 'elements',
'text' => $HeaderElements,
);
return $Block;
}
}
protected function blockTableContinue($Line, array $Block)
{
if (isset($Block['interrupted']))
{
return;
}
if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
{
$Elements = array();
$row = $Line['text'];
$row = trim($row);
$row = trim($row, '|');
preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
foreach ($matches[0] as $index => $cell)
{
$cell = trim($cell);
$Element = array(
'name' => 'td',
'handler' => 'line',
'text' => $cell,
);
if (isset($Block['alignments'][$index]))
{
$Element['attributes'] = array(
'style' => 'text-align: '.$Block['alignments'][$index].';',
);
}
$Elements []= $Element;
}
$Element = array(
'name' => 'tr',
'handler' => 'elements',
'text' => $Elements,
);
$Block['element']['text'][1]['text'] []= $Element;
return $Block;
}
}
#
# ~
#
protected function paragraph($Line)
{
$Block = array(
'element' => array(
'name' => 'p',
'text' => $Line['text'],
'handler' => 'line',
),
);
return $Block;
}
#
# Inline Elements
#
protected $InlineTypes = array(
'"' => array('SpecialCharacter'),
'!' => array('Image'),
'&' => array('SpecialCharacter'),
'*' => array('Emphasis'),
':' => array('Url'),
'<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
'>' => array('SpecialCharacter'),
'[' => array('Link'),
'_' => array('Emphasis'),
'`' => array('Code'),
'~' => array('Strikethrough'),
'\\' => array('EscapeSequence'),
);
# ~
protected $inlineMarkerList = '!"*_&[:<>`~\\';
#
# ~
#
public function line($text)
{
$markup = '';
# $excerpt is based on the first occurrence of a marker
while ($excerpt = strpbrk($text, $this->inlineMarkerList))
{
$marker = $excerpt[0];
$markerPosition = strpos($text, $marker);
$Excerpt = array('text' => $excerpt, 'context' => $text);
foreach ($this->InlineTypes[$marker] as $inlineType)
{
$Inline = $this->{'inline'.$inlineType}($Excerpt);
if ( ! isset($Inline))
{
continue;
}
# makes sure that the inline belongs to "our" marker
if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
{
continue;
}
# sets a default inline position
if ( ! isset($Inline['position']))
{
$Inline['position'] = $markerPosition;
}
# the text that comes before the inline
$unmarkedText = substr($text, 0, $Inline['position']);
# compile the unmarked text
$markup .= $this->unmarkedText($unmarkedText);
# compile the inline
$markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
# remove the examined text
$text = substr($text, $Inline['position'] + $Inline['extent']);
continue 2;
}
# the marker does not belong to an inline
$unmarkedText = substr($text, 0, $markerPosition + 1);
$markup .= $this->unmarkedText($unmarkedText);
$text = substr($text, $markerPosition + 1);
}
$markup .= $this->unmarkedText($text);
return $markup;
}
#
# ~
#
protected function inlineCode($Excerpt)
{
$marker = $Excerpt['text'][0];
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
{
$text = $matches[2];
$text = preg_replace("/[ ]*\n/", ' ', $text);
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'code',
'text' => $text,
),
);
}
}
protected function inlineEmailTag($Excerpt)
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
{
$url = $matches[1];
if ( ! isset($matches[2]))
{
$url = 'mailto:' . $url;
}
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'a',
'text' => $matches[1],
'attributes' => array(
'href' => $url,
),
),
);
}
}
protected function inlineEmphasis($Excerpt)
{
if ( ! isset($Excerpt['text'][1]))
{
return;
}
$marker = $Excerpt['text'][0];
if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'strong';
}
elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'em';
}
else
{
return;
}
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => $emphasis,
'handler' => 'line',
'text' => $matches[1],
),
);
}
protected function inlineEscapeSequence($Excerpt)
{
if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
{
return array(
'markup' => $Excerpt['text'][1],
'extent' => 2,
);
}
}
protected function inlineImage($Excerpt)
{
if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
{
return;
}
$Excerpt['text']= substr($Excerpt['text'], 1);
$Link = $this->inlineLink($Excerpt);
if ($Link === null)
{
return;
}
$Inline = array(
'extent' => $Link['extent'] + 1,
'element' => array(
'name' => 'img',
'attributes' => array(
'src' => $Link['element']['attributes']['href'],
'alt' => $Link['element']['text'],
),
),
);
$Inline['element']['attributes'] += $Link['element']['attributes'];
unset($Inline['element']['attributes']['href']);
return $Inline;
}
protected function inlineLink($Excerpt)
{
$Element = array(
'name' => 'a',
'handler' => 'line',
'text' => null,
'attributes' => array(
'href' => null,
'title' => null,
),
);
$extent = 0;
$remainder = $Excerpt['text'];
if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
{
$Element['text'] = $matches[1];
$extent += strlen($matches[0]);
$remainder = substr($remainder, $extent);
}
else
{
return;
}
if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
{
$Element['attributes']['href'] = $matches[1];
if (isset($matches[2]))
{
$Element['attributes']['title'] = substr($matches[2], 1, - 1);
}
$extent += strlen($matches[0]);
}
else
{
if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
{
$definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
$definition = strtolower($definition);
$extent += strlen($matches[0]);
}
else
{
$definition = strtolower($Element['text']);
}
if ( ! isset($this->DefinitionData['Reference'][$definition]))
{
return;
}
$Definition = $this->DefinitionData['Reference'][$definition];
$Element['attributes']['href'] = $Definition['url'];
$Element['attributes']['title'] = $Definition['title'];
}
return array(
'extent' => $extent,
'element' => $Element,
);
}
protected function inlineMarkup($Excerpt)
{
if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false)
{
return;
}
if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
'extent' => strlen($matches[0]),
);
}
if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
'extent' => strlen($matches[0]),
);
}
if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
'extent' => strlen($matches[0]),
);
}
}
protected function inlineSpecialCharacter($Excerpt)
{
if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
{
return array(
'markup' => '&amp;',
'extent' => 1,
);
}
$SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
if (isset($SpecialCharacter[$Excerpt['text'][0]]))
{
return array(
'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
'extent' => 1,
);
}
}
protected function inlineStrikethrough($Excerpt)
{
if ( ! isset($Excerpt['text'][1]))
{
return;
}
if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
{
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'del',
'text' => $matches[1],
'handler' => 'line',
),
);
}
}
protected function inlineUrl($Excerpt)
{
if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
{
return;
}
if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
{
$url = $matches[0][0];
$Inline = array(
'extent' => strlen($matches[0][0]),
'position' => $matches[0][1],
'element' => array(
'name' => 'a',
'text' => $url,
'attributes' => array(
'href' => $url,
),
),
);
return $Inline;
}
}
protected function inlineUrlTag($Excerpt)
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
{
$url = $matches[1];
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'a',
'text' => $url,
'attributes' => array(
'href' => $url,
),
),
);
}
}
# ~
protected function unmarkedText($text)
{
if ($this->breaksEnabled)
{
$text = preg_replace('/[ ]*\n/', "<br />\n", $text);
}
else
{
$text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
$text = str_replace(" \n", "\n", $text);
}
return $text;
}
#
# Handlers
#
protected function element(array $Element)
{
$Element = $this->sanitiseElement($Element);
$markup = '<'.$Element['name'];
if (isset($Element['attributes']))
{
foreach ($Element['attributes'] as $name => $value)
{
if ($value === null)
{
continue;
}
$markup .= ' '.$name.'="'.self::escape($value).'"';
}
}
if (isset($Element['text']))
{
$markup .= '>';
if (isset($Element['handler']))
{
$markup .= $this->{$Element['handler']}($Element['text']);
}
else
{
$markup .= self::escape($Element['text'], true);
}
$markup .= '</'.$Element['name'].'>';
}
else
{
$markup .= ' />';
}
return $markup;
}
protected function elements(array $Elements)
{
$markup = '';
foreach ($Elements as $Element)
{
$markup .= "\n" . $this->element($Element);
}
$markup .= "\n";
return $markup;
}
# ~
protected function li($lines)
{
$markup = $this->lines($lines);
$trimmedMarkup = trim($markup);
if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
{
$markup = $trimmedMarkup;
$markup = substr($markup, 3);
$position = strpos($markup, "</p>");
$markup = substr_replace($markup, '', $position, 4);
}
return $markup;
}
#
# Deprecated Methods
#
function parse($text)
{
$markup = $this->text($text);
return $markup;
}
protected function sanitiseElement(array $Element)
{
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
static $safeUrlNameToAtt = array(
'a' => 'href',
'img' => 'src',
);
if (isset($safeUrlNameToAtt[$Element['name']]))
{
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
}
if ( ! empty($Element['attributes']))
{
foreach ($Element['attributes'] as $att => $val)
{
# filter out badly parsed attribute
if ( ! preg_match($goodAttribute, $att))
{
unset($Element['attributes'][$att]);
}
# dump onevent attribute
elseif (preg_match('/^on/i', $att))
{
unset($Element['attributes'][$att]);
}
}
}
return $Element;
}
protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
{
if ($this->safeLinksEnabled)
{
$safe = false;
foreach ($this->safeLinksWhitelist as $scheme)
{
if (stripos($Element['attributes'][$attribute], $scheme) === 0)
{
$safe = true;
break;
}
}
if ( ! $safe)
{
$Element['attributes'][$attribute] = preg_replace_callback(
'/[^\/#?&=%]++/',
function (array $match)
{
return urlencode($match[0]);
},
$Element['attributes'][$attribute]
);
}
}
return $Element;
}
#
# Static Methods
#
protected static function escape($text, $allowQuotes = false)
{
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
}
static function instance($name = 'default')
{
if (isset(self::$instances[$name]))
{
return self::$instances[$name];
}
$instance = new static();
self::$instances[$name] = $instance;
return $instance;
}
private static $instances = array();
#
# Fields
#
protected $DefinitionData;
#
# Read-Only
protected $specialCharacters = array(
'\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
);
protected $StrongRegex = array(
'*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
'_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
);
protected $EmRegex = array(
'*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
'_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
);
protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
protected $voidElements = array(
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
);
protected $textLevelElements = array(
'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
'i', 'rp', 'del', 'code', 'strike', 'marquee',
'q', 'rt', 'ins', 'font', 'strong',
's', 'tt', 'kbd', 'mark',
'u', 'xm', 'sub', 'nobr',
'sup', 'ruby',
'var', 'span',
'wbr', 'time',
);
}