From 6f53a3f0689bb4d4aba37e9927f9021048d627bc Mon Sep 17 00:00:00 2001 From: Kijin Sung Date: Sat, 12 Mar 2016 22:35:43 +0900 Subject: [PATCH] Check object whitelist in HTMLFilter class, not EmbedFilter class --- classes/security/EmbedFilter.class.php | 54 +++--------------------- common/framework/security/htmlfilter.php | 31 ++++++++++++++ common/legacy.php | 3 -- 3 files changed, 36 insertions(+), 52 deletions(-) diff --git a/classes/security/EmbedFilter.class.php b/classes/security/EmbedFilter.class.php index 97b3866f1..382bf0fe0 100644 --- a/classes/security/EmbedFilter.class.php +++ b/classes/security/EmbedFilter.class.php @@ -53,9 +53,7 @@ class EmbedFilter */ function check(&$content) { - $this->checkObjectTag($content); - $this->checkEmbedTag($content); - $this->checkParamTag($content); + // This functionality has been moved to the HTMLFilter class. } /** @@ -64,8 +62,7 @@ class EmbedFilter */ function checkIframeTag(&$content) { - // check in Purifier class - return; + // This functionality has been moved to the HTMLFilter class. } /** @@ -74,21 +71,7 @@ class EmbedFilter */ function checkObjectTag(&$content) { - $content = preg_replace_callback('/<\s*object\s*[^>]+(?:\/?>?)/is', function($m) { - $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]); - foreach ($html->find('object') as $element) - { - if ($element->data && !$this->isWhiteDomain($element->data)) - { - return escape($m[0], false); - } - if ($element->type && !$this->isWhiteMimetype($element->type)) - { - return escape($m[0], false); - } - } - return $m[0]; - }, $content); + // This functionality has been moved to the HTMLFilter class. } /** @@ -97,21 +80,7 @@ class EmbedFilter */ function checkEmbedTag(&$content) { - $content = preg_replace_callback('/<\s*embed\s*[^>]+(?:\/?>?)/is', function($m) { - $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]); - foreach ($html->find('embed') as $element) - { - if ($element->src && !$this->isWhiteDomain($element->src)) - { - return escape($m[0], false); - } - if ($element->type && !$this->isWhiteMimetype($element->type)) - { - return escape($m[0], false); - } - } - return $m[0]; - }, $content); + // This functionality has been moved to the HTMLFilter class. } /** @@ -120,20 +89,7 @@ class EmbedFilter */ function checkParamTag(&$content) { - $content = preg_replace_callback('/<\s*param\s*[^>]+(?:\/?>?)/is', function($m) { - $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]); - foreach ($html->find('param') as $element) - { - foreach (array('movie', 'src', 'href', 'url', 'source') as $attr) - { - if ($element->$attr && !$this->isWhiteDomain($element->$attr)) - { - return escape($m[0], false); - } - } - } - return $m[0]; - }, $content); + // This functionality has been moved to the HTMLFilter class. } /** diff --git a/common/framework/security/htmlfilter.php b/common/framework/security/htmlfilter.php index e791595eb..07fa31914 100644 --- a/common/framework/security/htmlfilter.php +++ b/common/framework/security/htmlfilter.php @@ -369,6 +369,22 @@ class HTMLFilter } } + /** + * Get the object whitelist as a regular expression. + * + * @return string + */ + protected static function _getObjectWhitelist() + { + $domains = \EmbedFilter::getInstance()->getWhiteUrlList(); + $result = array(); + foreach($domains as $domain) + { + $result[] = preg_quote($domain, '%'); + } + return '%^https?://(' . implode('|', $result) . ')%'; + } + /** * Get the iframe whitelist as a regular expression. * @@ -415,6 +431,21 @@ class HTMLFilter return htmlspecialchars($matches[0], ENT_QUOTES, 'UTF-8'); }, $content); + // Remove object and embed URLs that are not allowed. + $whitelist = self::_getObjectWhitelist(); + $content = preg_replace_callback('!<(object|embed|param)([^>]+)>!i', function($matches) use($whitelist) { + return preg_replace_callback('!([a-zA-Z0-9_-]+)="([^"]+)"!', function($attr) use($whitelist) { + if (in_array($attr[1], array('data', 'src', 'href', 'url', 'movie', 'source'))) + { + if (!preg_match($whitelist, htmlspecialchars_decode($attr[2]))) + { + return $attr[1] . '=""'; + } + } + return $attr[0]; + }, $matches[0]); + }, $content); + // Remove link URLs that may be CSRF attempts. $content = preg_replace_callback('!\b(src|href|data|value)="([^"]+)"!i', function($matches) use($allow_acts, $deny_acts) { $url = preg_replace('!\s+!', '', htmlspecialchars_decode(rawurldecode($matches[2]))); diff --git a/common/legacy.php b/common/legacy.php index 3ff522350..1830d3a90 100644 --- a/common/legacy.php +++ b/common/legacy.php @@ -827,9 +827,6 @@ function purifierHtml(&$content) */ function removeHackTag($content) { - $oEmbedFilter = EmbedFilter::getInstance(); - $oEmbedFilter->check($content); - return Rhymix\Framework\Security\HTMLFilter::clean($content); }