From 7d1b9ca5627bbc89231a51c8c98e55c9c9786b2f Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Mon, 11 Oct 2021 20:09:57 +0100 Subject: [PATCH] Better autolinks This doesn't follow gfm spec yet, work lifted from my code over at https://github.com/Parsemd/Parsemd-PHP/blob/8505e2737e43f89c09fc9597889e56a6f8e114b9/src/Parsers/GitHubFlavor/Inlines/AutoLink.php Fixes #505 Fixes #717 --- src/Components/Inlines/Url.php | 34 ++++++++++++++++++++++++++------- tests/data/url_autolinking.html | 8 +++++++- tests/data/url_autolinking.md | 14 +++++++++++++- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/Components/Inlines/Url.php b/src/Components/Inlines/Url.php index 19f818e..8a84492 100644 --- a/src/Components/Inlines/Url.php +++ b/src/Components/Inlines/Url.php @@ -14,6 +14,9 @@ final class Url implements BacktrackingInline { use WidthTrait; + private const ABSOLUTE_URI = '[a-z][a-z0-9+.-]{1,31}:[^\s[:cntrl:]<>]*'; + private const NO_TRAILING_PUNCT = '(?context(), - $matches, - \PREG_OFFSET_CAPTURE - )) { + // this needs some work to follow spec + if ( + \preg_match( + '/'.self::ABSOLUTE_URI.self::NO_TRAILING_PUNCT.'/iu', + $Excerpt->context(), + $matches, + \PREG_OFFSET_CAPTURE + ) + ) { /** @var array{0: array{string, int}} $matches */ - return new self($matches[0][0], \intval($matches[0][1])); + $url = $matches[0][0]; + $position = \intval($matches[0][1]); + + if (\preg_match('/[)]++$/', $url, $matches)) { + $trailingParens = \strlen($matches[0]); + + $openingParens = \substr_count($url, '('); + $closingParens = \substr_count($url, ')'); + + if ($closingParens > $openingParens) { + $url = \substr($url, 0, -\min($trailingParens, $closingParens - $openingParens)); + } + } + + return new self($url, $position); } return null; diff --git a/tests/data/url_autolinking.html b/tests/data/url_autolinking.html index 8f937db..334355c 100644 --- a/tests/data/url_autolinking.html +++ b/tests/data/url_autolinking.html @@ -1,3 +1,9 @@

an autolink http://example.com

inside of parentheses (http://example.com)

-

trailing slash http://example.com/ and http://example.com/path/

\ No newline at end of file +

http://www.google.com/search?q=Markup+(business)

+

http://www.google.com/search?q=Markup+(business)))

+

(http://www.google.com/search?q=Markup+(business))

+

(http://www.google.com/search?q=Markup+(business)

+

trailing slash http://example.com/ and http://example.com/path/

+

trailing paren https://www.owasp.org/index.php/Cross-site_Scripting_(XSS)

+

complex link http://elk.canda.biz/app/kibana#/discover?_g=()&_a=(columns:!(_source),index:'deve-*',interval:auto,query:(query_string:(analyze_wildcard:!t,query:'*')),sort:!('@timestamp',desc))

\ No newline at end of file diff --git a/tests/data/url_autolinking.md b/tests/data/url_autolinking.md index 09b467b..825f17e 100644 --- a/tests/data/url_autolinking.md +++ b/tests/data/url_autolinking.md @@ -2,4 +2,16 @@ an autolink http://example.com inside of parentheses (http://example.com) -trailing slash http://example.com/ and http://example.com/path/ \ No newline at end of file +http://www.google.com/search?q=Markup+(business) + +http://www.google.com/search?q=Markup+(business))) + +(http://www.google.com/search?q=Markup+(business)) + +(http://www.google.com/search?q=Markup+(business) + +trailing slash http://example.com/ and http://example.com/path/ + +trailing paren https://www.owasp.org/index.php/Cross-site_Scripting_(XSS) + +complex link http://elk.canda.biz/app/kibana#/discover?_g=()&_a=(columns:!(_source),index:'deve-*',interval:auto,query:(query_string:(analyze_wildcard:!t,query:'*')),sort:!('@timestamp',desc)) \ No newline at end of file