1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Rewrite to use new internals

This commit is contained in:
Aidan Woods 2019-01-20 02:44:34 +00:00
parent 36cfb21908
commit e6e24a8d0d
No known key found for this signature in database
GPG Key ID: 9A6A8EFAA512BBB9

View File

@ -46,12 +46,24 @@ final class Parsedown
# ~ # ~
/** @var State */
private $State;
public function __construct(State $State = null)
{
$this->State = $State ?: new State;
}
/**
* @param string $text
* @return string
*/
public function text($text) public function text($text)
{ {
$Elements = $this->textElements($text); $StateRenderables = $this->textElements($text);
# convert to markup # convert to markup
$markup = $this->elements($Elements); $markup = $this->elements($this->State, $StateRenderables);
# trim line breaks # trim line breaks
$markup = \trim($markup, "\n"); $markup = \trim($markup, "\n");
@ -59,83 +71,93 @@ final class Parsedown
return $markup; return $markup;
} }
/**
* @param string $text
* @return StateRenderable[]
*/
protected function textElements($text) protected function textElements($text)
{ {
# iterate through lines to identify blocks # iterate through lines to identify blocks
return $this->linesElements(Lines::fromTextLines($text, 0)); return $this->lines(Lines::fromTextLines($text, 0));
} }
# #
# Lines # Lines
# #
/** @var array<array-key, class-string<Block>[]> */
protected $BlockTypes = [ protected $BlockTypes = [
'#' => ['Header'], '#' => [Header::class],
'*' => ['Rule', 'List'], '*' => [Rule::class, TList::class],
'+' => ['List'], '+' => [TList::class],
'-' => ['SetextHeader', 'Table', 'Rule', 'List'], '-' => [SetextHeader::class, Table::class, Rule::class, TList::class],
'0' => ['List'], '0' => [TList::class],
'1' => ['List'], '1' => [TList::class],
'2' => ['List'], '2' => [TList::class],
'3' => ['List'], '3' => [TList::class],
'4' => ['List'], '4' => [TList::class],
'5' => ['List'], '5' => [TList::class],
'6' => ['List'], '6' => [TList::class],
'7' => ['List'], '7' => [TList::class],
'8' => ['List'], '8' => [TList::class],
'9' => ['List'], '9' => [TList::class],
':' => ['Table'], ':' => [Table::class],
'<' => ['Comment', 'Markup'], '<' => [Comment::class, BlockMarkup::class],
'=' => ['SetextHeader'], '=' => [SetextHeader::class],
'>' => ['Quote'], '>' => [BlockQuote::class],
'[' => ['Reference'], '[' => [Reference::class],
'_' => ['Rule'], '_' => [Rule::class],
'`' => ['FencedCode'], '`' => [FencedCode::class],
'|' => ['Table'], '|' => [Table::class],
'~' => ['FencedCode'], '~' => [FencedCode::class],
]; ];
# ~ # ~
/** @var class-string<Block>[] */
protected $unmarkedBlockTypes = [ protected $unmarkedBlockTypes = [
'Code', IndentedCode::class,
]; ];
# #
# Blocks # Blocks
# #
protected function lines(Lines $Lines) /**
* @param int $indentOffset
* @return StateRenderable[]
*/
public function lines(Lines $Lines, $indentOffset = 0)
{ {
return $this->elements($this->linesElements($Lines)); /** @var StateRenderable[] */
} $StateRenderables = [];
/** @var Block|null */
/** @param int $indentOffset */ $Block = null;
protected function linesElements(Lines $Lines, array $nonNestables = [], $indentOffset = 0) /** @var Block|null */
{
$Elements = [];
$CurrentBlock = null; $CurrentBlock = null;
foreach ($Lines->contexts() as $Context) { foreach ($Lines->contexts() as $Context) {
if (isset($CurrentBlock) && $Context->previousEmptyLines() > 0) { if (
$CurrentBlock['interrupted'] = $Context->previousEmptyLines(); isset($CurrentBlock)
&& $CurrentBlock instanceof ContinuableBlock
&& $Context->previousEmptyLines() > 0
) {
$CurrentBlock = $CurrentBlock->interrupted(true);
} }
$Line = $Context->line(); $Line = $Context->line();
if (isset($CurrentBlock['continuable'])) { if (
$methodName = 'block' . $CurrentBlock['type'] . 'Continue'; isset($CurrentBlock)
$Block = $this->$methodName($Context, $CurrentBlock); && $CurrentBlock instanceof ContinuableBlock
&& ! $CurrentBlock instanceof Paragraph
) {
$Block = $CurrentBlock->continue($Context);
if (isset($Block)) { if (isset($Block)) {
$CurrentBlock = $Block; $CurrentBlock = $Block;
continue; continue;
} else {
if ($this->isBlockCompletable($CurrentBlock['type'])) {
$methodName = 'block' . $CurrentBlock['type'] . 'Complete';
$CurrentBlock = $this->$methodName($CurrentBlock);
}
} }
} }
@ -157,21 +179,17 @@ final class Parsedown
# ~ # ~
foreach ($blockTypes as $blockType) { foreach ($blockTypes as $blockType) {
$Block = $this->{"block$blockType"}($Context, $CurrentBlock); $Block = $blockType::build($Context, $CurrentBlock, $this->State);
if (isset($Block)) { if (isset($Block)) {
$Block['type'] = $blockType; if ($Block instanceof StateUpdatingBlock) {
$this->State = $this->State->mergingWith(
if (! isset($Block['identified'])) { $Block->latestState()
if (isset($CurrentBlock)) { );
$Elements[] = $this->extractElement($CurrentBlock);
}
$Block['identified'] = true;
} }
if ($this->isBlockContinuable($blockType)) { if (isset($CurrentBlock) && ! $Block->acquiredPrevious()) {
$Block['continuable'] = true; $StateRenderables[] = $CurrentBlock->stateRenderable($this);
} }
$CurrentBlock = $Block; $CurrentBlock = $Block;
@ -182,331 +200,171 @@ final class Parsedown
# ~ # ~
if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph') { if (isset($CurrentBlock) and $CurrentBlock instanceof Paragraph) {
$Block = $this->paragraphContinue($Context, $CurrentBlock); $Block = $CurrentBlock->continue($Context);
} }
if (isset($Block)) { if (isset($Block)) {
$CurrentBlock = $Block; $CurrentBlock = $Block;
} else { } else {
if (isset($CurrentBlock)) { if (isset($CurrentBlock)) {
$Elements[] = $this->extractElement($CurrentBlock); $StateRenderables[] = $CurrentBlock->stateRenderable($this);
} }
$CurrentBlock = $this->paragraph($Context); $CurrentBlock = Paragraph::build($Context);
$CurrentBlock['identified'] = true;
} }
} }
# ~ # ~
if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) {
$methodName = 'block' . $CurrentBlock['type'] . 'Complete';
$CurrentBlock = $this->$methodName($CurrentBlock);
}
# ~
if (isset($CurrentBlock)) { if (isset($CurrentBlock)) {
$Elements[] = $this->extractElement($CurrentBlock); $StateRenderables[] = $CurrentBlock->stateRenderable($this);
} }
# ~ # ~
return $Elements; return $StateRenderables;
}
protected function extractElement(array $Component)
{
if (! isset($Component['element'])) {
if (isset($Component['markup'])) {
$Component['element'] = ['rawHtml' => $Component['markup']];
} elseif (isset($Component['hidden'])) {
$Component['element'] = [];
}
}
return $Component['element'];
}
protected function isBlockContinuable($Type)
{
return \method_exists($this, 'block' . $Type . 'Continue');
}
protected function isBlockCompletable($Type)
{
return \method_exists($this, 'block' . $Type . 'Complete');
} }
# #
# Inline Elements # Inline Elements
# #
/** @var array<array-key, class-string<Inline>[]> */
protected $InlineTypes = [ protected $InlineTypes = [
'!' => ['Image'], '!' => [Image::class],
'&' => ['SpecialCharacter'], '&' => [SpecialCharacter::class],
'*' => ['Emphasis'], '*' => [Emphasis::class],
':' => ['Url'], ':' => [Url::class],
'<' => ['UrlTag', 'EmailTag', 'Markup'], '<' => [UrlTag::class, Email::class, InlineMarkup::class],
'[' => ['Link'], '[' => [Link::class],
'_' => ['Emphasis'], '_' => [Emphasis::class],
'`' => ['Code'], '`' => [Code::class],
'~' => ['Strikethrough'], '~' => [Strikethrough::class],
'\\' => ['EscapeSequence'], '\\' => [EscapeSequence::class],
]; ];
# ~ # ~
/** @var string */
protected $inlineMarkerList = '!*_&[:<`~\\'; protected $inlineMarkerList = '!*_&[:<`~\\';
# #
# ~ # ~
# #
public function line($text, $nonNestables = []) /**
* @param string $text
* @return string
*/
public function line($text)
{ {
return $this->elements($this->lineElements($text, $nonNestables)); return $this->elements($this->State, $this->lineElements($text));
} }
protected function lineElements($text, $nonNestables = []) /**
* @param string $text
* @return StateRenderable[]
*/
public function lineElements($text)
{ {
# standardize line breaks # standardize line breaks
$text = \str_replace(["\r\n", "\r"], "\n", $text); $text = \str_replace(["\r\n", "\r"], "\n", $text);
$Elements = []; /** @var StateRenderable[] */
$StateRenderables = [];
$nonNestables = (
empty($nonNestables)
? []
: \array_combine($nonNestables, $nonNestables)
);
# $excerpt is based on the first occurrence of a marker # $excerpt is based on the first occurrence of a marker
while ($excerpt = \strpbrk($text, $this->inlineMarkerList)) { for (
$marker = $excerpt[0]; $Excerpt = (new Excerpt($text, 0))->pushingOffsetTo($this->inlineMarkerList);
$Excerpt->text() !== '';
$markerPosition = \strlen($text) - \strlen($excerpt); $Excerpt = $Excerpt->pushingOffsetTo($this->inlineMarkerList)
) {
$Excerpt = ['text' => $excerpt, 'context' => $text]; $text = $Excerpt->text();
$marker = $text[0];
foreach ($this->InlineTypes[$marker] as $inlineType) { foreach ($this->InlineTypes[$marker] as $inlineType) {
# check to see if the current inline type is nestable in the current context # check to see if the current inline type is nestable in the current context
if (isset($nonNestables[$inlineType])) { $Inline = $inlineType::build($Excerpt, $this->State);
continue;
}
$Inline = $this->{"inline$inlineType"}($Excerpt);
if (! isset($Inline)) { if (! isset($Inline)) {
continue; continue;
} }
$startPosition = $Inline->modifyStartPositionTo();
if (! isset($startPosition)) {
$startPosition = $Excerpt->offset();
}
# makes sure that the inline belongs to "our" marker # makes sure that the inline belongs to "our" marker
if (isset($Inline['position']) and $Inline['position'] > $markerPosition) { if ($startPosition > $Excerpt->offset()) {
continue; continue;
} }
# sets a default inline position # the text that comes before the inline
# compile the unmarked text
if (! isset($Inline['position'])) { $StateRenderables[] = Plaintext::build($Excerpt->choppingUpToOffset($startPosition))
$Inline['position'] = $markerPosition; ->stateRenderable($this)
}
# cause the new element to 'inherit' our non nestables
$Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables'])
? \array_merge($Inline['element']['nonNestables'], $nonNestables)
: $nonNestables
; ;
# the text that comes before the inline
$unmarkedText = \substr($text, 0, $Inline['position']);
# compile the unmarked text
$InlineText = $this->inlineText($unmarkedText);
$Elements[] = $InlineText['element'];
# compile the inline # compile the inline
$Elements[] = $this->extractElement($Inline); $StateRenderables[] = $Inline->stateRenderable($this);
# remove the examined text # remove the examined text
$text = \substr($text, $Inline['position'] + $Inline['extent']); /** @psalm-suppress LoopInvalidation */
$Excerpt = $Excerpt->choppingFromOffset($startPosition + $Inline->width());
continue 2; continue 2;
} }
# the marker does not belong to an inline if (! isset($startPosition)) {
$startPosition = $Excerpt->offset();
$unmarkedText = \substr($text, 0, $markerPosition + 1);
$InlineText = $this->inlineText($unmarkedText);
$Elements[] = $InlineText['element'];
$text = \substr($text, $markerPosition + 1);
}
$InlineText = $this->inlineText($text);
$Elements[] = $InlineText['element'];
foreach ($Elements as &$Element) {
if (! isset($Element['autobreak'])) {
$Element['autobreak'] = false;
} }
# the marker does not belong to an inline # the marker does not belong to an inline
$autoBreakNext = ( $StateRenderables[] = Plaintext::build($Excerpt->choppingUpToOffset($startPosition + 1))
isset($Element['autobreak']) ->stateRenderable($this)
? $Element['autobreak'] : isset($Element['name']) ;
);
// (autobreak === false) covers both sides of an element
$autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext;
$markup .= ($autoBreak ? "\n" : '') . $this->element($Element); $text = \substr($Excerpt->text(), $startPosition + 1);
$autoBreak = $autoBreakNext; /** @psalm-suppress LoopInvalidation */
$Excerpt = $Excerpt->choppingFromOffset($startPosition + 1);
} }
$markup .= $autoBreak ? "\n" : ''; $StateRenderables[] = Plaintext::build($Excerpt->choppingFromOffset(0))
->stateRenderable($this)
;
return $markup; return $StateRenderables;
}
# ~
protected function li(Lines $Lines)
{
$Elements = $this->linesElements($Lines);
if (! $Lines->containsBlankLines()
and isset($Elements[0]) and isset($Elements[0]['name'])
and $Elements[0]['name'] === 'p'
) {
unset($Elements[0]['name']);
}
return $Elements;
} }
/** /**
* Replace occurrences $regexp with $Elements in $text. Return an array of * @param State $State
* elements representing the replacement. * @param StateRenderable[] $StateRenderables
* @return string
*/ */
protected static function pregReplaceElements($regexp, $Elements, $text) protected function elements(State $State, array $StateRenderables)
{ {
$newElements = []; return \array_reduce(
$StateRenderables,
while (\preg_match($regexp, $text, $matches, \PREG_OFFSET_CAPTURE)) { /**
$offset = $matches[0][1]; * @param string $html
$before = \substr($text, 0, $offset); * @return string
$after = \substr($text, $offset + \strlen($matches[0][0])); */
function ($html, StateRenderable $StateRenderable) use ($State) {
$newElements[] = ['text' => $before]; $Renderable = $StateRenderable->renderable($State);
return (
foreach ($Elements as $Element) { $html
$newElements[] = $Element; . ($Renderable instanceof Invisible ? '' : "\n")
} . $Renderable->getHtml()
);
$text = $after; },
} ''
);
$newElements[] = ['text' => $text];
return $newElements;
}
#
# Deprecated Methods
#
public function parse($text)
{
$markup = $this->text($text);
return $markup;
}
protected function sanitiseElement(array $Element)
{
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
static $safeUrlNameToAtt = [
'a' => 'href',
'img' => 'src',
];
if (! isset($Element['name'])) {
unset($Element['attributes']);
return $Element;
}
if (isset($safeUrlNameToAtt[$Element['name']])) {
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
}
if (! empty($Element['attributes'])) {
foreach ($Element['attributes'] as $att => $val) {
# filter out badly parsed attribute
if (! \preg_match($goodAttribute, $att)) {
unset($Element['attributes'][$att]);
}
# dump onevent attribute
elseif (self::striAtStart($att, 'on')) {
unset($Element['attributes'][$att]);
}
}
}
return $Element;
}
protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
{
foreach ($this->safeLinksWhitelist as $scheme) {
if (self::striAtStart($Element['attributes'][$attribute], $scheme)) {
return $Element;
}
}
$Element['attributes'][$attribute] = \str_replace(':', '%3A', $Element['attributes'][$attribute]);
return $Element;
}
#
# Static Methods
#
protected static function escape($text, $allowQuotes = false)
{
return \htmlspecialchars($text, $allowQuotes ? \ENT_NOQUOTES : \ENT_QUOTES, 'UTF-8');
}
protected static function striAtStart($string, $needle)
{
$len = \strlen($needle);
if ($len > \strlen($string)) {
return false;
} else {
return \strtolower(\substr($string, 0, $len)) === \strtolower($needle);
}
}
public static function instance($name = 'default')
{
if (isset(self::$instances[$name])) {
return self::$instances[$name];
}
$instance = new static();
self::$instances[$name] = $instance;
return $instance;
} }
} }