1
0

update Parsedown + ParsedownExtra

This commit is contained in:
Mike Schwörer 2020-01-17 00:21:41 +01:00
parent f5a9552dbd
commit c5bc008db1
Signed by: Mikescher
GPG Key ID: D3C7172E0A70F8CF
2 changed files with 201 additions and 25 deletions

View File

@ -17,7 +17,7 @@ class Parsedown
{ {
# ~ # ~
const version = '1.6.0'; const version = '1.7.4';
# ~ # ~
@ -75,6 +75,32 @@ class Parsedown
protected $urlsLinked = true; protected $urlsLinked = true;
function setSafeMode($safeMode)
{
$this->safeMode = (bool) $safeMode;
return $this;
}
protected $safeMode;
protected $safeLinksWhitelist = array(
'http://',
'https://',
'ftp://',
'ftps://',
'mailto:',
'data:image/png;base64,',
'data:image/gif;base64,',
'data:image/jpeg;base64,',
'irc:',
'ircs:',
'git:',
'ssh:',
'news:',
'steam:',
);
# #
# Lines # Lines
# #
@ -342,8 +368,6 @@ class Parsedown
{ {
$text = $Block['element']['text']['text']; $text = $Block['element']['text']['text'];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$Block['element']['text']['text'] = $text; $Block['element']['text']['text'] = $text;
return $Block; return $Block;
@ -354,7 +378,7 @@ class Parsedown
protected function blockComment($Line) protected function blockComment($Line)
{ {
if ($this->markupEscaped) if ($this->markupEscaped or $this->safeMode)
{ {
return; return;
} }
@ -396,7 +420,7 @@ class Parsedown
protected function blockFencedCode($Line) protected function blockFencedCode($Line)
{ {
if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
{ {
$Element = array( $Element = array(
'name' => 'code', 'name' => 'code',
@ -405,7 +429,21 @@ class Parsedown
if (isset($matches[1])) if (isset($matches[1]))
{ {
$class = 'language-'.$matches[1]; /**
* https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
* Every HTML element may have a class attribute specified.
* The attribute, if specified, must have a value that is a set
* of space-separated tokens representing the various classes
* that the element belongs to.
* [...]
* The space characters, for the purposes of this specification,
* are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
* U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
* U+000D CARRIAGE RETURN (CR).
*/
$language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r"));
$class = 'language-'.$language;
$Element['attributes'] = array( $Element['attributes'] = array(
'class' => $class, 'class' => $class,
@ -457,8 +495,6 @@ class Parsedown
{ {
$text = $Block['element']['text']['text']; $text = $Block['element']['text']['text'];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$Block['element']['text']['text'] = $text; $Block['element']['text']['text'] = $text;
return $Block; return $Block;
@ -515,10 +551,10 @@ class Parsedown
), ),
); );
if($name === 'ol') if($name === 'ol')
{ {
$listStart = stristr($matches[0], '.', true); $listStart = stristr($matches[0], '.', true);
if($listStart !== '1') if($listStart !== '1')
{ {
$Block['element']['attributes'] = array('start' => $listStart); $Block['element']['attributes'] = array('start' => $listStart);
@ -547,6 +583,8 @@ class Parsedown
{ {
$Block['li']['text'] []= ''; $Block['li']['text'] []= '';
$Block['loose'] = true;
unset($Block['interrupted']); unset($Block['interrupted']);
} }
@ -595,6 +633,22 @@ class Parsedown
} }
} }
protected function blockListComplete(array $Block)
{
if (isset($Block['loose']))
{
foreach ($Block['element']['text'] as &$li)
{
if (end($li['text']) !== '')
{
$li['text'] []= '';
}
}
}
return $Block;
}
# #
# Quote # Quote
@ -678,7 +732,7 @@ class Parsedown
protected function blockMarkup($Line) protected function blockMarkup($Line)
{ {
if ($this->markupEscaped) if ($this->markupEscaped or $this->safeMode)
{ {
return; return;
} }
@ -997,7 +1051,7 @@ class Parsedown
# ~ # ~
# #
public function line($text) public function line($text, $nonNestables=array())
{ {
$markup = ''; $markup = '';
@ -1013,6 +1067,13 @@ class Parsedown
foreach ($this->InlineTypes[$marker] as $inlineType) foreach ($this->InlineTypes[$marker] as $inlineType)
{ {
# check to see if the current inline type is nestable in the current context
if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
{
continue;
}
$Inline = $this->{'inline'.$inlineType}($Excerpt); $Inline = $this->{'inline'.$inlineType}($Excerpt);
if ( ! isset($Inline)) if ( ! isset($Inline))
@ -1034,6 +1095,13 @@ class Parsedown
$Inline['position'] = $markerPosition; $Inline['position'] = $markerPosition;
} }
# cause the new element to 'inherit' our non nestables
foreach ($nonNestables as $non_nestable)
{
$Inline['element']['nonNestables'][] = $non_nestable;
}
# the text that comes before the inline # the text that comes before the inline
$unmarkedText = substr($text, 0, $Inline['position']); $unmarkedText = substr($text, 0, $Inline['position']);
@ -1074,7 +1142,6 @@ class Parsedown
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches)) if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
{ {
$text = $matches[2]; $text = $matches[2];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$text = preg_replace("/[ ]*\n/", ' ', $text); $text = preg_replace("/[ ]*\n/", ' ', $text);
return array( return array(
@ -1193,6 +1260,7 @@ class Parsedown
$Element = array( $Element = array(
'name' => 'a', 'name' => 'a',
'handler' => 'line', 'handler' => 'line',
'nonNestables' => array('Url', 'Link'),
'text' => null, 'text' => null,
'attributes' => array( 'attributes' => array(
'href' => null, 'href' => null,
@ -1253,8 +1321,6 @@ class Parsedown
$Element['attributes']['title'] = $Definition['title']; $Element['attributes']['title'] = $Definition['title'];
} }
$Element['attributes']['href'] = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Element['attributes']['href']);
return array( return array(
'extent' => $extent, 'extent' => $extent,
'element' => $Element, 'element' => $Element,
@ -1263,7 +1329,7 @@ class Parsedown
protected function inlineMarkup($Excerpt) protected function inlineMarkup($Excerpt)
{ {
if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
{ {
return; return;
} }
@ -1343,14 +1409,16 @@ class Parsedown
if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
{ {
$url = $matches[0][0];
$Inline = array( $Inline = array(
'extent' => strlen($matches[0][0]), 'extent' => strlen($matches[0][0]),
'position' => $matches[0][1], 'position' => $matches[0][1],
'element' => array( 'element' => array(
'name' => 'a', 'name' => 'a',
'text' => $matches[0][0], 'text' => $url,
'attributes' => array( 'attributes' => array(
'href' => $matches[0][0], 'href' => $url,
), ),
), ),
); );
@ -1363,7 +1431,7 @@ class Parsedown
{ {
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
{ {
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]); $url = $matches[1];
return array( return array(
'extent' => strlen($matches[0]), 'extent' => strlen($matches[0]),
@ -1401,6 +1469,11 @@ class Parsedown
protected function element(array $Element) protected function element(array $Element)
{ {
if ($this->safeMode)
{
$Element = $this->sanitiseElement($Element);
}
$markup = '<'.$Element['name']; $markup = '<'.$Element['name'];
if (isset($Element['attributes'])) if (isset($Element['attributes']))
@ -1412,21 +1485,45 @@ class Parsedown
continue; continue;
} }
$markup .= ' '.$name.'="'.$value.'"'; $markup .= ' '.$name.'="'.self::escape($value).'"';
} }
} }
$permitRawHtml = false;
if (isset($Element['text'])) if (isset($Element['text']))
{
$text = $Element['text'];
}
// very strongly consider an alternative if you're writing an
// extension
elseif (isset($Element['rawHtml']))
{
$text = $Element['rawHtml'];
$allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
$permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
}
if (isset($text))
{ {
$markup .= '>'; $markup .= '>';
if (!isset($Element['nonNestables']))
{
$Element['nonNestables'] = array();
}
if (isset($Element['handler'])) if (isset($Element['handler']))
{ {
$markup .= $this->{$Element['handler']}($Element['text']); $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
}
elseif (!$permitRawHtml)
{
$markup .= self::escape($text, true);
} }
else else
{ {
$markup .= $Element['text']; $markup .= $text;
} }
$markup .= '</'.$Element['name'].'>'; $markup .= '</'.$Element['name'].'>';
@ -1485,10 +1582,77 @@ class Parsedown
return $markup; return $markup;
} }
protected function sanitiseElement(array $Element)
{
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
static $safeUrlNameToAtt = array(
'a' => 'href',
'img' => 'src',
);
if (isset($safeUrlNameToAtt[$Element['name']]))
{
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
}
if ( ! empty($Element['attributes']))
{
foreach ($Element['attributes'] as $att => $val)
{
# filter out badly parsed attribute
if ( ! preg_match($goodAttribute, $att))
{
unset($Element['attributes'][$att]);
}
# dump onevent attribute
elseif (self::striAtStart($att, 'on'))
{
unset($Element['attributes'][$att]);
}
}
}
return $Element;
}
protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
{
foreach ($this->safeLinksWhitelist as $scheme)
{
if (self::striAtStart($Element['attributes'][$attribute], $scheme))
{
return $Element;
}
}
$Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
return $Element;
}
# #
# Static Methods # Static Methods
# #
protected static function escape($text, $allowQuotes = false)
{
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
}
protected static function striAtStart($string, $needle)
{
$len = strlen($needle);
if ($len > strlen($string))
{
return false;
}
else
{
return strtolower(substr($string, 0, $len)) === strtolower($needle);
}
}
static function instance($name = 'default') static function instance($name = 'default')
{ {
if (isset(self::$instances[$name])) if (isset(self::$instances[$name]))

View File

@ -17,13 +17,13 @@ class ParsedownExtra extends Parsedown
{ {
# ~ # ~
const version = '0.7.0'; const version = '0.8.1';
# ~ # ~
function __construct() function __construct()
{ {
if (parent::version < '1.5.0') if (version_compare(parent::version, '1.7.4') < 0)
{ {
throw new Exception('ParsedownExtra requires a later version of Parsedown'); throw new Exception('ParsedownExtra requires a later version of Parsedown');
} }
@ -206,6 +206,10 @@ class ParsedownExtra extends Parsedown
{ {
$Block = parent::blockHeader($Line); $Block = parent::blockHeader($Line);
if (! isset($Block)) {
return null;
}
if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
{ {
$attributeString = $matches[1][0]; $attributeString = $matches[1][0];
@ -238,6 +242,10 @@ class ParsedownExtra extends Parsedown
{ {
$Block = parent::blockSetextHeader($Line, $Block); $Block = parent::blockSetextHeader($Line, $Block);
if (! isset($Block)) {
return null;
}
if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
{ {
$attributeString = $matches[1][0]; $attributeString = $matches[1][0];
@ -302,6 +310,10 @@ class ParsedownExtra extends Parsedown
{ {
$Link = parent::inlineLink($Excerpt); $Link = parent::inlineLink($Excerpt);
if (! isset($Link)) {
return null;
}
$remainder = substr($Excerpt['text'], $Link['extent']); $remainder = substr($Excerpt['text'], $Link['extent']);
if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches))
@ -420,7 +432,7 @@ class ParsedownExtra extends Parsedown
$Element['text'][1]['text'] []= array( $Element['text'][1]['text'] []= array(
'name' => 'li', 'name' => 'li',
'attributes' => array('id' => 'fn:'.$definitionId), 'attributes' => array('id' => 'fn:'.$definitionId),
'text' => "\n".$text."\n", 'rawHtml' => "\n".$text."\n",
); );
} }