From c5bc008db1a0bbabf1bdd926ed36bd993980743f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Schw=C3=B6rer?= Date: Fri, 17 Jan 2020 00:21:41 +0100 Subject: [PATCH] update Parsedown + ParsedownExtra --- www/extern/Parsedown.php | 208 ++++++++++++++++++++++++++++++---- www/extern/ParsedownExtra.php | 18 ++- 2 files changed, 201 insertions(+), 25 deletions(-) diff --git a/www/extern/Parsedown.php b/www/extern/Parsedown.php index 757666e..1b9d6d5 100644 --- a/www/extern/Parsedown.php +++ b/www/extern/Parsedown.php @@ -17,7 +17,7 @@ class Parsedown { # ~ - const version = '1.6.0'; + const version = '1.7.4'; # ~ @@ -75,6 +75,32 @@ class Parsedown protected $urlsLinked = true; + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -342,8 +368,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -354,7 +378,7 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } @@ -396,7 +420,7 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) { $Element = array( 'name' => 'code', @@ -405,7 +429,21 @@ class Parsedown if (isset($matches[1])) { - $class = 'language-'.$matches[1]; + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r")); + + $class = 'language-'.$language; $Element['attributes'] = array( 'class' => $class, @@ -457,8 +495,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -515,10 +551,10 @@ class Parsedown ), ); - if($name === 'ol') + if($name === 'ol') { $listStart = stristr($matches[0], '.', true); - + if($listStart !== '1') { $Block['element']['attributes'] = array('start' => $listStart); @@ -547,6 +583,8 @@ class Parsedown { $Block['li']['text'] []= ''; + $Block['loose'] = true; + unset($Block['interrupted']); } @@ -595,6 +633,22 @@ class Parsedown } } + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) + { + foreach ($Block['element']['text'] as &$li) + { + if (end($li['text']) !== '') + { + $li['text'] []= ''; + } + } + } + + return $Block; + } + # # Quote @@ -678,7 +732,7 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } @@ -997,7 +1051,7 @@ class Parsedown # ~ # - public function line($text) + public function line($text, $nonNestables=array()) { $markup = ''; @@ -1013,6 +1067,13 @@ class Parsedown foreach ($this->InlineTypes[$marker] as $inlineType) { + # check to see if the current inline type is nestable in the current context + + if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + { + continue; + } + $Inline = $this->{'inline'.$inlineType}($Excerpt); if ( ! isset($Inline)) @@ -1034,6 +1095,13 @@ class Parsedown $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + foreach ($nonNestables as $non_nestable) + { + $Inline['element']['nonNestables'][] = $non_nestable; + } + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); @@ -1074,7 +1142,6 @@ class Parsedown if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? 'a', 'handler' => 'line', + 'nonNestables' => array('Url', 'Link'), 'text' => null, 'attributes' => array( 'href' => null, @@ -1253,8 +1321,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1263,7 +1329,7 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } @@ -1343,14 +1409,16 @@ class Parsedown if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1363,7 +1431,7 @@ class Parsedown { if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + $url = $matches[1]; return array( 'extent' => strlen($matches[0]), @@ -1401,6 +1469,11 @@ class Parsedown protected function element(array $Element) { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + $markup = '<'.$Element['name']; if (isset($Element['attributes'])) @@ -1412,21 +1485,45 @@ class Parsedown continue; } - $markup .= ' '.$name.'="'.$value.'"'; + $markup .= ' '.$name.'="'.self::escape($value).'"'; } } + $permitRawHtml = false; + if (isset($Element['text'])) + { + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + if (isset($text)) { $markup .= '>'; + if (!isset($Element['nonNestables'])) + { + $Element['nonNestables'] = array(); + } + if (isset($Element['handler'])) { - $markup .= $this->{$Element['handler']}($Element['text']); + $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']); + } + elseif (!$permitRawHtml) + { + $markup .= self::escape($text, true); } else { - $markup .= $Element['text']; + $markup .= $text; } $markup .= ''; @@ -1485,10 +1582,77 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) diff --git a/www/extern/ParsedownExtra.php b/www/extern/ParsedownExtra.php index be6966d..632ba84 100644 --- a/www/extern/ParsedownExtra.php +++ b/www/extern/ParsedownExtra.php @@ -17,13 +17,13 @@ class ParsedownExtra extends Parsedown { # ~ - const version = '0.7.0'; + const version = '0.8.1'; # ~ function __construct() { - if (parent::version < '1.5.0') + if (version_compare(parent::version, '1.7.4') < 0) { throw new Exception('ParsedownExtra requires a later version of Parsedown'); } @@ -206,6 +206,10 @@ class ParsedownExtra extends Parsedown { $Block = parent::blockHeader($Line); + if (! isset($Block)) { + return null; + } + if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) { $attributeString = $matches[1][0]; @@ -238,6 +242,10 @@ class ParsedownExtra extends Parsedown { $Block = parent::blockSetextHeader($Line, $Block); + if (! isset($Block)) { + return null; + } + if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) { $attributeString = $matches[1][0]; @@ -302,6 +310,10 @@ class ParsedownExtra extends Parsedown { $Link = parent::inlineLink($Excerpt); + if (! isset($Link)) { + return null; + } + $remainder = substr($Excerpt['text'], $Link['extent']); if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) @@ -420,7 +432,7 @@ class ParsedownExtra extends Parsedown $Element['text'][1]['text'] []= array( 'name' => 'li', 'attributes' => array('id' => 'fn:'.$definitionId), - 'text' => "\n".$text."\n", + 'rawHtml' => "\n".$text."\n", ); }