1
0
mirror of https://github.com/erusev/parsedown.git synced 2023-08-10 21:13:06 +03:00

Account for type 7 HTML blocks

These differ from type 6 blocks in a few ways (other than the HTML
element names given in the spec).

1. Type 7 blocks cannot interrupt paragraphs
2. Type 7 block must contain ONLY whitespace on the rest of the
   line, whereas type 6 blocks need only be followed by whitespace
   after the tag name, or have the opening tag be complete.
This commit is contained in:
Aidan Woods 2019-07-25 00:22:09 +02:00
parent f4fb5bd943
commit 34902bc80c
No known key found for this signature in database
GPG Key ID: 9A6A8EFAA512BBB9

View File

@ -15,18 +15,81 @@ use Erusev\Parsedown\State;
final class Markup implements ContinuableBlock
{
const REGEX_HTML_ATTRIBUTE = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
private const REGEX_HTML_ATTRIBUTE = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
/** @var array{2: string, 3: string, 4: string, 5: string} */
private static $simpleContainsEndConditions = [
private const BLOCK_ELEMENTS = [
'address' => true,
'article' => true,
'aside' => true,
'base' => true,
'basefont' => true,
'blockquote' => true,
'body' => true,
'caption' => true,
'center' => true,
'col' => true,
'colgroup' => true,
'dd' => true,
'details' => true,
'dialog' => true,
'dir' => true,
'div' => true,
'dl' => true,
'dt' => true,
'fieldset' => true,
'figcaption' => true,
'figure' => true,
'footer' => true,
'form' => true,
'frame' => true,
'frameset' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'head' => true,
'header' => true,
'hr' => true,
'html' => true,
'iframe' => true,
'legend' => true,
'li' => true,
'link' => true,
'main' => true,
'menu' => true,
'menuitem' => true,
'nav' => true,
'noframes' => true,
'ol' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'param' => true,
'section' => true,
'source' => true,
'summary' => true,
'table' => true,
'tbody' => true,
'td' => true,
'tfoot' => true,
'th' => true,
'thead' => true,
'title' => true,
'tr' => true,
'track' => true,
'ul' => true,
];
private const SIMPLE_CONTAINS_END_CONDITIONS = [
2 => '-->',
3 => '?>',
4 => '>',
5 => ']]>'
5 => ']]>',
];
/** @var array<string, string> */
private static $specialHtmlBlockTags = [
private const SPECIAL_HTML_BLOCK_TAGS = [
'script' => true,
'style' => true,
'pre' => true,
@ -87,17 +150,36 @@ final class Markup implements ContinuableBlock
return new self($rawLine, 5, self::closes12345TypeMarkup(5, $text));
}
if (\preg_match('/^<[\/]?+(\w++)(?:[ ]*+'.self::REGEX_HTML_ATTRIBUTE.')*+[ ]*+(\/)?>/', $text, $matches)) {
$element = \strtolower($matches[1]);
if (\preg_match('/^<([\/]?+)(\w++)(.*+)$/', $text, $matches)) {
$isClosing = ($matches[1] === '/');
$element = \strtolower($matches[2]);
$tail = $matches[3];
if (
\array_key_exists($element, Element::$TEXT_LEVEL_ELEMENTS)
|| \array_key_exists($element, self::$specialHtmlBlockTags)
if (\array_key_exists($element, self::BLOCK_ELEMENTS)
&& \preg_match('/^(?:\s|$|>|\/)/', $tail)
) {
return null;
return new self($rawLine, 6);
}
return new self($rawLine, 6);
if (! $isClosing && \preg_match(
'/^(?:[ ]*+'.self::REGEX_HTML_ATTRIBUTE.')*(?:[ ]*+)[\/]?+[>](.*+)$/',
$tail,
$matches
) || $isClosing && \preg_match(
'/^(?:[ ]*+)[\/]?+[>](.*+)$/',
$tail,
$matches
)
) {
$tail = $matches[1];
if (! \array_key_exists($element, self::SPECIAL_HTML_BLOCK_TAGS)
&& ! (isset($Block) && $Block instanceof Paragraph && $Context->previousEmptyLines() < 1)
&& \preg_match('/^\s*+$/', $tail)
) {
return new self($rawLine, 7);
}
}
}
return null;
@ -142,7 +224,7 @@ final class Markup implements ContinuableBlock
if (\preg_match('/<\/(?:script|pre|style)>/i', $text)) {
return true;
}
} elseif (\stripos($text, self::$simpleContainsEndConditions[$type]) !== false) {
} elseif (\stripos($text, self::SIMPLE_CONTAINS_END_CONDITIONS[$type]) !== false) {
return true;
}