mirror of
https://github.com/erusev/parsedown.git
synced 2023-08-10 21:13:06 +03:00
Account for type 7 HTML blocks
These differ from type 6 blocks in a few ways (other than the HTML element names given in the spec). 1. Type 7 blocks cannot interrupt paragraphs 2. Type 7 block must contain ONLY whitespace on the rest of the line, whereas type 6 blocks need only be followed by whitespace after the tag name, or have the opening tag be complete.
This commit is contained in:
parent
f4fb5bd943
commit
34902bc80c
@ -15,18 +15,81 @@ use Erusev\Parsedown\State;
|
|||||||
|
|
||||||
final class Markup implements ContinuableBlock
|
final class Markup implements ContinuableBlock
|
||||||
{
|
{
|
||||||
const REGEX_HTML_ATTRIBUTE = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
|
private const REGEX_HTML_ATTRIBUTE = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
|
||||||
|
|
||||||
/** @var array{2: string, 3: string, 4: string, 5: string} */
|
private const BLOCK_ELEMENTS = [
|
||||||
private static $simpleContainsEndConditions = [
|
'address' => true,
|
||||||
|
'article' => true,
|
||||||
|
'aside' => true,
|
||||||
|
'base' => true,
|
||||||
|
'basefont' => true,
|
||||||
|
'blockquote' => true,
|
||||||
|
'body' => true,
|
||||||
|
'caption' => true,
|
||||||
|
'center' => true,
|
||||||
|
'col' => true,
|
||||||
|
'colgroup' => true,
|
||||||
|
'dd' => true,
|
||||||
|
'details' => true,
|
||||||
|
'dialog' => true,
|
||||||
|
'dir' => true,
|
||||||
|
'div' => true,
|
||||||
|
'dl' => true,
|
||||||
|
'dt' => true,
|
||||||
|
'fieldset' => true,
|
||||||
|
'figcaption' => true,
|
||||||
|
'figure' => true,
|
||||||
|
'footer' => true,
|
||||||
|
'form' => true,
|
||||||
|
'frame' => true,
|
||||||
|
'frameset' => true,
|
||||||
|
'h1' => true,
|
||||||
|
'h2' => true,
|
||||||
|
'h3' => true,
|
||||||
|
'h4' => true,
|
||||||
|
'h5' => true,
|
||||||
|
'h6' => true,
|
||||||
|
'head' => true,
|
||||||
|
'header' => true,
|
||||||
|
'hr' => true,
|
||||||
|
'html' => true,
|
||||||
|
'iframe' => true,
|
||||||
|
'legend' => true,
|
||||||
|
'li' => true,
|
||||||
|
'link' => true,
|
||||||
|
'main' => true,
|
||||||
|
'menu' => true,
|
||||||
|
'menuitem' => true,
|
||||||
|
'nav' => true,
|
||||||
|
'noframes' => true,
|
||||||
|
'ol' => true,
|
||||||
|
'optgroup' => true,
|
||||||
|
'option' => true,
|
||||||
|
'p' => true,
|
||||||
|
'param' => true,
|
||||||
|
'section' => true,
|
||||||
|
'source' => true,
|
||||||
|
'summary' => true,
|
||||||
|
'table' => true,
|
||||||
|
'tbody' => true,
|
||||||
|
'td' => true,
|
||||||
|
'tfoot' => true,
|
||||||
|
'th' => true,
|
||||||
|
'thead' => true,
|
||||||
|
'title' => true,
|
||||||
|
'tr' => true,
|
||||||
|
'track' => true,
|
||||||
|
'ul' => true,
|
||||||
|
];
|
||||||
|
|
||||||
|
private const SIMPLE_CONTAINS_END_CONDITIONS = [
|
||||||
2 => '-->',
|
2 => '-->',
|
||||||
3 => '?>',
|
3 => '?>',
|
||||||
4 => '>',
|
4 => '>',
|
||||||
5 => ']]>'
|
5 => ']]>',
|
||||||
];
|
];
|
||||||
|
|
||||||
/** @var array<string, string> */
|
private const SPECIAL_HTML_BLOCK_TAGS = [
|
||||||
private static $specialHtmlBlockTags = [
|
|
||||||
'script' => true,
|
'script' => true,
|
||||||
'style' => true,
|
'style' => true,
|
||||||
'pre' => true,
|
'pre' => true,
|
||||||
@ -87,17 +150,36 @@ final class Markup implements ContinuableBlock
|
|||||||
return new self($rawLine, 5, self::closes12345TypeMarkup(5, $text));
|
return new self($rawLine, 5, self::closes12345TypeMarkup(5, $text));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (\preg_match('/^<[\/]?+(\w++)(?:[ ]*+'.self::REGEX_HTML_ATTRIBUTE.')*+[ ]*+(\/)?>/', $text, $matches)) {
|
if (\preg_match('/^<([\/]?+)(\w++)(.*+)$/', $text, $matches)) {
|
||||||
$element = \strtolower($matches[1]);
|
$isClosing = ($matches[1] === '/');
|
||||||
|
$element = \strtolower($matches[2]);
|
||||||
|
$tail = $matches[3];
|
||||||
|
|
||||||
if (
|
if (\array_key_exists($element, self::BLOCK_ELEMENTS)
|
||||||
\array_key_exists($element, Element::$TEXT_LEVEL_ELEMENTS)
|
&& \preg_match('/^(?:\s|$|>|\/)/', $tail)
|
||||||
|| \array_key_exists($element, self::$specialHtmlBlockTags)
|
|
||||||
) {
|
) {
|
||||||
return null;
|
return new self($rawLine, 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new self($rawLine, 6);
|
if (! $isClosing && \preg_match(
|
||||||
|
'/^(?:[ ]*+'.self::REGEX_HTML_ATTRIBUTE.')*(?:[ ]*+)[\/]?+[>](.*+)$/',
|
||||||
|
$tail,
|
||||||
|
$matches
|
||||||
|
) || $isClosing && \preg_match(
|
||||||
|
'/^(?:[ ]*+)[\/]?+[>](.*+)$/',
|
||||||
|
$tail,
|
||||||
|
$matches
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
$tail = $matches[1];
|
||||||
|
|
||||||
|
if (! \array_key_exists($element, self::SPECIAL_HTML_BLOCK_TAGS)
|
||||||
|
&& ! (isset($Block) && $Block instanceof Paragraph && $Context->previousEmptyLines() < 1)
|
||||||
|
&& \preg_match('/^\s*+$/', $tail)
|
||||||
|
) {
|
||||||
|
return new self($rawLine, 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@ -142,7 +224,7 @@ final class Markup implements ContinuableBlock
|
|||||||
if (\preg_match('/<\/(?:script|pre|style)>/i', $text)) {
|
if (\preg_match('/<\/(?:script|pre|style)>/i', $text)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} elseif (\stripos($text, self::$simpleContainsEndConditions[$type]) !== false) {
|
} elseif (\stripos($text, self::SIMPLE_CONTAINS_END_CONDITIONS[$type]) !== false) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user