Add MediaFilter class to absorb most EmbedFilter functionality

This commit is contained in:
Kijin Sung 2016-03-13 09:51:15 +09:00
parent 3d3fa6d7d3
commit 7ecd9230c2
7 changed files with 266 additions and 1888 deletions

View file

@ -4,216 +4,78 @@
class EmbedFilter
{
/**
* allow script access list
* Deprecated properties
* @var array
*/
var $allowscriptaccessList = array();
var $whiteUrlList = array();
var $whiteIframeUrlList = array();
var $mimeTypeList = array();
var $extList = array();
var $parser = NULL;
/**
* @constructor
* @return void
*/
function __construct()
{
$this->_makeWhiteDomainList();
}
public $whiteUrlList = array();
public $whiteIframeUrlList = array();
public $mimeTypeList = array();
public $extList = array();
/**
* Return EmbedFilter object
* This method for singleton
*
* @return EmbedFilter
*/
function getInstance()
{
if(!isset($GLOBALS['__EMBEDFILTER_INSTANCE__']))
{
$GLOBALS['__EMBEDFILTER_INSTANCE__'] = new EmbedFilter();
}
return $GLOBALS['__EMBEDFILTER_INSTANCE__'];
return new self();
}
public function getWhiteUrlList()
{
return $this->whiteUrlList;
return Rhymix\Framework\Security\MediaFilter::getObjectWhitelist();
}
public function getWhiteIframeUrlList()
{
return $this->whiteIframeUrlList;
return Rhymix\Framework\Security\MediaFilter::getIframeWhitelist();
}
/**
* Check the content.
* @return void
*/
function isWhiteDomain($urlAttribute)
{
return Rhymix\Framework\Security\MediaFilter::matchObjectWhitelist($urlAttribute);
}
function isWhiteIframeDomain($urlAttribute)
{
return Rhymix\Framework\Security\MediaFilter::matchIframeWhitelist($urlAttribute);
}
function isWhiteMimetype($mimeType)
{
return true;
}
function isWhiteExt($ext)
{
return true;
}
function check(&$content)
{
// This functionality has been moved to the HTMLFilter class.
}
/**
* Check iframe tag in the content.
* @return void
*/
function checkIframeTag(&$content)
{
// This functionality has been moved to the HTMLFilter class.
}
/**
* Check object tag in the content.
* @return void
*/
function checkObjectTag(&$content)
{
// This functionality has been moved to the HTMLFilter class.
}
/**
* Check embed tag in the content.
* @return void
*/
function checkEmbedTag(&$content)
{
// This functionality has been moved to the HTMLFilter class.
}
/**
* Check param tag in the content.
* @return void
*/
function checkParamTag(&$content)
{
// This functionality has been moved to the HTMLFilter class.
}
/**
* Check white domain in object data attribute or embed src attribute.
* @return string
*/
function isWhiteDomain($urlAttribute)
{
if(is_array($this->whiteUrlList))
{
foreach($this->whiteUrlList AS $key => $value)
{
if(preg_match('@^https?://' . preg_quote($value, '@') . '@i', $urlAttribute))
{
return TRUE;
}
}
}
return FALSE;
}
/**
* Check white domain in iframe src attribute.
* @return string
*/
function isWhiteIframeDomain($urlAttribute)
{
if(is_array($this->whiteIframeUrlList))
{
foreach($this->whiteIframeUrlList AS $key => $value)
{
if(preg_match('@^https?://' . preg_quote($value, '@') . '@i', $urlAttribute))
{
return TRUE;
}
}
}
return FALSE;
}
/**
* Check white mime type in object type attribute or embed type attribute.
* @return string
*/
function isWhiteMimetype($mimeType)
{
if(isset($this->mimeTypeList[$mimeType]))
{
return TRUE;
}
return FALSE;
}
function isWhiteExt($ext)
{
if(isset($this->extList[$ext]))
{
return TRUE;
}
return FALSE;
}
/**
* Make white domain list cache file from xml config file.
* @param $whitelist array
* @return void
*/
function _makeWhiteDomainList($whitelist = NULL)
{
$whiteUrlDefaultList = (include RX_BASEDIR . 'common/defaults/whitelist.php');
$this->extList = $whiteUrlDefaultList['extensions'];
$this->mimeTypeList = $whiteUrlDefaultList['mime'];
$this->whiteUrlList = array();
$this->whiteIframeUrlList = array();
if($whitelist !== NULL)
{
if(!is_array($whitelist) || !isset($whitelist['object']) || !isset($whitelist['iframe']))
{
$whitelist = array(
'object' => isset($whitelist->object) ? $whitelist->object : array(),
'iframe' => isset($whitelist->iframe) ? $whitelist->iframe : array(),
);
}
foreach ($whitelist['object'] as $prefix)
{
$this->whiteUrlList[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
foreach ($whitelist['iframe'] as $prefix)
{
$this->whiteIframeUrlList[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
else
{
foreach ($whiteUrlDefaultList['object'] as $prefix)
{
$this->whiteUrlList[] = $prefix;
}
foreach ($whiteUrlDefaultList['iframe'] as $prefix)
{
$this->whiteIframeUrlList[] = $prefix;
}
if ($embedfilter_object = config('embedfilter.object'))
{
foreach ($embedfilter_object as $prefix)
{
$this->whiteUrlList[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
if ($embedfilter_iframe = config('embedfilter.iframe'))
{
foreach ($embedfilter_iframe as $prefix)
{
$this->whiteIframeUrlList[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
}
$this->whiteUrlList = array_unique($this->whiteUrlList);
$this->whiteIframeUrlList = array_unique($this->whiteIframeUrlList);
natcasesort($this->whiteUrlList);
natcasesort($this->whiteIframeUrlList);
}
}
/* End of file : EmbedFilter.class.php */
/* Location: ./classes/security/EmbedFilter.class.php */

File diff suppressed because it is too large Load diff

View file

@ -87,21 +87,6 @@ class HTMLFilter
return $output;
}
/**
* Remove embedded media from HTML content.
*
* @param string $input
* @param string $replacement
* @return string
*/
public static function removeEmbeddedMedia($input, $replacement = '')
{
$input = preg_replace('!<object[^>]*>(.*?</object>)?!is', $replacement, $input);
$input = preg_replace('!<embed[^>]*>(.*?</embed>)?!is', $replacement, $input);
$input = preg_replace('!<img[^>]*editor_component="multimedia_link"[^>]*>(.*?</img>)?!is', $replacement, $input);
return $input;
}
/**
* Get an instance of HTMLPurifier.
*
@ -136,7 +121,7 @@ class HTMLFilter
$config->set('HTML.SafeEmbed', true);
$config->set('HTML.SafeIframe', true);
$config->set('HTML.SafeObject', true);
$config->set('URI.SafeIframeRegexp', self::_getIframeWhitelist());
$config->set('URI.SafeIframeRegexp', MediaFilter::getIframeWhitelistRegex());
// Set the serializer path.
$config->set('Cache.SerializerPath', RX_BASEDIR . 'files/cache/htmlpurifier');
@ -384,38 +369,6 @@ class HTMLFilter
}
}
/**
* Get the object whitelist as a regular expression.
*
* @return string
*/
protected static function _getObjectWhitelist()
{
$domains = \EmbedFilter::getInstance()->getWhiteUrlList();
$result = array();
foreach($domains as $domain)
{
$result[] = preg_quote($domain, '%');
}
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Get the iframe whitelist as a regular expression.
*
* @return string
*/
protected static function _getIframeWhitelist()
{
$domains = \EmbedFilter::getInstance()->getWhiteIframeUrlList();
$result = array();
foreach($domains as $domain)
{
$result[] = preg_quote($domain, '%');
}
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Rhymix-specific preprocessing method.
*
@ -447,7 +400,7 @@ class HTMLFilter
}, $content);
// Remove object and embed URLs that are not allowed.
$whitelist = self::_getObjectWhitelist();
$whitelist = MediaFilter::getObjectWhitelistRegex();
$content = preg_replace_callback('!<(object|embed|param)([^>]+)>!i', function($matches) use($whitelist) {
return preg_replace_callback('!([a-zA-Z0-9_-]+)="([^"]+)"!', function($attr) use($whitelist) {
if (in_array($attr[1], array('data', 'src', 'href', 'url', 'movie', 'source')))

View file

@ -0,0 +1,180 @@
<?php
namespace Rhymix\Framework\Security;
/**
* The media filter class.
*/
class MediaFilter
{
/**
* Whitelists are cached here.
*/
protected static $_iframe_whitelist;
protected static $_object_whitelist;
/**
* Get the iframe whitelist.
*
* @return string
*/
public static function getIframeWhitelist()
{
if (!count(self::$_iframe_whitelist))
{
self::_loadWhitelists();
}
return self::$_iframe_whitelist;
}
/**
* Get the iframe whitelist as a regular expression.
*
* @return string
*/
public static function getIframeWhitelistRegex()
{
if (!count(self::$_iframe_whitelist))
{
self::_loadWhitelists();
}
$result = array();
foreach(self::$_iframe_whitelist as $domain)
{
$result[] = preg_quote($domain, '%');
}
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Get the object whitelist.
*
* @return string
*/
public static function getObjectWhitelist()
{
if (!count(self::$_object_whitelist))
{
self::_loadWhitelists();
}
return self::$_object_whitelist;
}
/**
* Get the object whitelist as a regular expression.
*
* @return string
*/
public static function getObjectWhitelistRegex()
{
if (!count(self::$_object_whitelist))
{
self::_loadWhitelists();
}
$result = array();
foreach(self::$_object_whitelist as $domain)
{
$result[] = preg_quote($domain, '%');
}
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Check if a URL matches the iframe whitelist.
*
* @param string $url
* @return bool
*/
public static function matchIframeWhitelist($url)
{
return preg_match(self::getIframeWhitelistRegex(), $url) ? true : false;
}
/**
* Check if a URL matches the iframe whitelist.
*
* @param string $url
* @return bool
*/
public static function matchObjectWhitelist($url)
{
return preg_match(self::getObjectWhitelistRegex(), $url) ? true : false;
}
/**
* Remove embedded media from HTML content.
*
* @param string $input
* @param string $replacement
* @return string
*/
public static function removeEmbeddedMedia($input, $replacement = '')
{
$input = preg_replace('!<object[^>]*>(.*?</object>)?!is', $replacement, $input);
$input = preg_replace('!<embed[^>]*>(.*?</embed>)?!is', $replacement, $input);
$input = preg_replace('!<img[^>]*editor_component="multimedia_link"[^>]*>(.*?</img>)?!is', $replacement, $input);
return $input;
}
/**
* Load whitelists.
*
* @param array $custom_whitelist
* @return void
*/
protected static function _loadWhitelists($custom_whitelist = array())
{
$default_whitelist = (include RX_BASEDIR . 'common/defaults/whitelist.php');
self::$_object_whitelist = array();
self::$_iframe_whitelist = array();
if(count($custom_whitelist))
{
if(!is_array($custom_whitelist) || !isset($custom_whitelist['iframe']) || !isset($custom_whitelist['object']))
{
$whitelist = array(
'iframe' => isset($whitelist->iframe) ? $whitelist->iframe : array(),
'object' => isset($whitelist->object) ? $whitelist->object : array(),
);
}
foreach ($custom_whitelist['iframe'] as $prefix)
{
self::$_iframe_whitelist[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
foreach ($custom_whitelist['object'] as $prefix)
{
self::$_object_whitelist[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
else
{
foreach ($default_whitelist['iframe'] as $prefix)
{
self::$_iframe_whitelist[] = $prefix;
}
foreach ($default_whitelist['object'] as $prefix)
{
self::$_object_whitelist[] = $prefix;
}
if ($iframe_whitelist = config('mediafilter.iframe') ?: config('embedfilter.iframe'))
{
foreach ($iframe_whitelist as $prefix)
{
self::$_iframe_whitelist[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
if ($object_whitelist = config('mediafilter.object') ?: config('embedfilter.object'))
{
foreach ($object_whitelist as $prefix)
{
self::$_object_whitelist[] = preg_match('@^https?://(.*)$@i', $prefix, $matches) ? $matches[1] : $prefix;
}
}
}
self::$_object_whitelist = array_unique(self::$_object_whitelist);
self::$_iframe_whitelist = array_unique(self::$_iframe_whitelist);
natcasesort(self::$_object_whitelist);
natcasesort(self::$_iframe_whitelist);
}
}

View file

@ -1036,7 +1036,7 @@ function stripEmbedTagForAdmin(&$content, $writer_member_srl)
$security_msg = '<div style="border: 1px solid #DDD; background: #FAFAFA; text-align:center; margin: 1em 0;">' .
'<p style="margin: 1em;">' . lang('security_warning_embed') . '</p></div>';
$content = Rhymix\Framework\Security\HTMLFilter::removeEmbeddedMedia($content, $security_msg);
$content = Rhymix\Framework\Security\MediaFilter::removeEmbeddedMedia($content, $security_msg);
}
return;

View file

@ -64,20 +64,4 @@ class HTMLFilterTest extends \Codeception\TestCase\Test
$this->assertEquals($test[1], Rhymix\Framework\Security\HTMLFilter::clean($test[0]));
}
}
public function testRemoveEmbeddedMedia()
{
$tests = array(
'<div><object></object></div>' => '<div></div>',
'<div><object><embed></embed></object></div>' => '<div></div>',
'<div><object><param /></object></div>' => '<div></div>',
'<div><img class="foo" editor_component="multimedia_link" /></div>' => '<div></div>',
'<div><img editor_component="multimedia_link"></img></div>' => '<div></div>',
);
foreach ($tests as $from => $to)
{
$this->assertEquals($to, Rhymix\Framework\Security\HTMLFilter::removeEmbeddedMedia($from));
}
}
}

View file

@ -0,0 +1,47 @@
<?php
class MediaFilterTest extends \Codeception\TestCase\Test
{
public function testWhitelists()
{
// iframe whitelist as array.
$this->assertTrue(in_array('www.youtube.com/', Rhymix\Framework\Security\MediaFilter::getIframeWhitelist()));
$this->assertFalse(in_array('random-website.com/', Rhymix\Framework\Security\MediaFilter::getIframeWhitelist()));
// iframe whitelist as regex.
$this->assertTrue(strpos(Rhymix\Framework\Security\MediaFilter::getIframeWhitelistRegex(), '|www\.youtube\.com/') !== false);
$this->assertFalse(strpos(Rhymix\Framework\Security\MediaFilter::getIframeWhitelistRegex(), 'www.youtube.com/') !== false);
// Match individual URL against iframe whitelist.
$this->assertTrue(Rhymix\Framework\Security\MediaFilter::matchIframeWhitelist('https://www.youtube.com/v'));
$this->assertFalse(Rhymix\Framework\Security\MediaFilter::matchIframeWhitelist('http://www-youtube.com/v'));
// object whitelist as array.
$this->assertTrue(in_array('www.youtube.com/', Rhymix\Framework\Security\MediaFilter::getObjectWhitelist()));
$this->assertFalse(in_array('random-website.com/', Rhymix\Framework\Security\MediaFilter::getObjectWhitelist()));
// object whitelist as regex.
$this->assertTrue(strpos(Rhymix\Framework\Security\MediaFilter::getObjectWhitelistRegex(), '|www\.youtube\.com/') !== false);
$this->assertFalse(strpos(Rhymix\Framework\Security\MediaFilter::getObjectWhitelistRegex(), 'www.youtube.com/') !== false);
// Match individual URL against object whitelist.
$this->assertTrue(Rhymix\Framework\Security\MediaFilter::matchObjectWhitelist('https://www.youtube.com/v'));
$this->assertFalse(Rhymix\Framework\Security\MediaFilter::matchObjectWhitelist('http://www-youtube.com/v'));
}
public function testRemoveEmbeddedMedia()
{
$tests = array(
'<div><object></object></div>' => '<div></div>',
'<div><object><embed></embed></object></div>' => '<div></div>',
'<div><object><param /></object></div>' => '<div></div>',
'<div><img class="foo" editor_component="multimedia_link" /></div>' => '<div></div>',
'<div><img editor_component="multimedia_link"></img></div>' => '<div></div>',
);
foreach ($tests as $from => $to)
{
$this->assertEquals($to, Rhymix\Framework\Security\MediaFilter::removeEmbeddedMedia($from));
}
}
}