Check object whitelist in HTMLFilter class, not EmbedFilter class

This commit is contained in:
Kijin Sung 2016-03-12 22:35:43 +09:00
parent 143b65e840
commit 6f53a3f068
3 changed files with 36 additions and 52 deletions

View file

@ -53,9 +53,7 @@ class EmbedFilter
*/
function check(&$content)
{
$this->checkObjectTag($content);
$this->checkEmbedTag($content);
$this->checkParamTag($content);
// This functionality has been moved to the HTMLFilter class.
}
/**
@ -64,8 +62,7 @@ class EmbedFilter
*/
function checkIframeTag(&$content)
{
// check in Purifier class
return;
// This functionality has been moved to the HTMLFilter class.
}
/**
@ -74,21 +71,7 @@ class EmbedFilter
*/
function checkObjectTag(&$content)
{
$content = preg_replace_callback('/<\s*object\s*[^>]+(?:\/?>?)/is', function($m) {
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
foreach ($html->find('object') as $element)
{
if ($element->data && !$this->isWhiteDomain($element->data))
{
return escape($m[0], false);
}
if ($element->type && !$this->isWhiteMimetype($element->type))
{
return escape($m[0], false);
}
}
return $m[0];
}, $content);
// This functionality has been moved to the HTMLFilter class.
}
/**
@ -97,21 +80,7 @@ class EmbedFilter
*/
function checkEmbedTag(&$content)
{
$content = preg_replace_callback('/<\s*embed\s*[^>]+(?:\/?>?)/is', function($m) {
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
foreach ($html->find('embed') as $element)
{
if ($element->src && !$this->isWhiteDomain($element->src))
{
return escape($m[0], false);
}
if ($element->type && !$this->isWhiteMimetype($element->type))
{
return escape($m[0], false);
}
}
return $m[0];
}, $content);
// This functionality has been moved to the HTMLFilter class.
}
/**
@ -120,20 +89,7 @@ class EmbedFilter
*/
function checkParamTag(&$content)
{
$content = preg_replace_callback('/<\s*param\s*[^>]+(?:\/?>?)/is', function($m) {
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
foreach ($html->find('param') as $element)
{
foreach (array('movie', 'src', 'href', 'url', 'source') as $attr)
{
if ($element->$attr && !$this->isWhiteDomain($element->$attr))
{
return escape($m[0], false);
}
}
}
return $m[0];
}, $content);
// This functionality has been moved to the HTMLFilter class.
}
/**

View file

@ -369,6 +369,22 @@ class HTMLFilter
}
}
/**
* Get the object whitelist as a regular expression.
*
* @return string
*/
protected static function _getObjectWhitelist()
{
$domains = \EmbedFilter::getInstance()->getWhiteUrlList();
$result = array();
foreach($domains as $domain)
{
$result[] = preg_quote($domain, '%');
}
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Get the iframe whitelist as a regular expression.
*
@ -415,6 +431,21 @@ class HTMLFilter
return htmlspecialchars($matches[0], ENT_QUOTES, 'UTF-8');
}, $content);
// Remove object and embed URLs that are not allowed.
$whitelist = self::_getObjectWhitelist();
$content = preg_replace_callback('!<(object|embed|param)([^>]+)>!i', function($matches) use($whitelist) {
return preg_replace_callback('!([a-zA-Z0-9_-]+)="([^"]+)"!', function($attr) use($whitelist) {
if (in_array($attr[1], array('data', 'src', 'href', 'url', 'movie', 'source')))
{
if (!preg_match($whitelist, htmlspecialchars_decode($attr[2])))
{
return $attr[1] . '=""';
}
}
return $attr[0];
}, $matches[0]);
}, $content);
// Remove link URLs that may be CSRF attempts.
$content = preg_replace_callback('!\b(src|href|data|value)="([^"]+)"!i', function($matches) use($allow_acts, $deny_acts) {
$url = preg_replace('!\s+!', '', htmlspecialchars_decode(rawurldecode($matches[2])));

View file

@ -827,9 +827,6 @@ function purifierHtml(&$content)
*/
function removeHackTag($content)
{
$oEmbedFilter = EmbedFilter::getInstance();
$oEmbedFilter->check($content);
return Rhymix\Framework\Security\HTMLFilter::clean($content);
}