Merge pull request #660 from kijin/pr/htmlfilter-customizations

HTMLFilter 기능 개선
This commit is contained in:
Kijin Sung 2016-12-15 21:24:52 +09:00 committed by GitHub
commit fa25b1ba47
6 changed files with 124 additions and 18 deletions

View file

@ -103,6 +103,7 @@ return array(
'mediafilter' => array(
'iframe' => array(),
'object' => array(),
'classes' => array(),
),
'mobile' => array(
'enabled' => true,

View file

@ -2,6 +2,7 @@
namespace Rhymix\Framework\Filters;
use Rhymix\Framework\Config;
use Rhymix\Framework\Security;
use Rhymix\Framework\Storage;
@ -11,9 +12,9 @@ use Rhymix\Framework\Storage;
class HTMLFilter
{
/**
* HTMLPurifier instance is cached here.
* HTMLPurifier instances are cached here.
*/
protected static $_htmlpurifier;
protected static $_instances = array();
/**
* Pre-processing and post-processing filters are stored here.
@ -69,18 +70,26 @@ class HTMLFilter
* Filter HTML content to block XSS attacks.
*
* @param string $input
* @param bool $allow_editor_components (optional)
* @param bool $allow_widgets (optional)
* @return string
*/
public static function clean($input)
public static function clean($input, $allow_editor_components = true, $allow_widgets = false)
{
foreach (self::$_preproc as $callback)
{
$input = $callback($input);
}
$input = self::_preprocess($input);
$output = self::getHTMLPurifier()->purify($input);
$output = self::_postprocess($output);
$allowed_classes = Config::get('mediafilter.classes') ?: array();
if ($allow_widgets)
{
$allowed_classes[] = 'zbxe_widget_output';
}
$input = self::_preprocess($input, $allow_editor_components, $allow_widgets);
$output = self::getHTMLPurifier($allowed_classes)->purify($input);
$output = self::_postprocess($output, $allow_editor_components, $allow_widgets);
foreach (self::$_postproc as $callback)
{
@ -93,18 +102,24 @@ class HTMLFilter
/**
* Get an instance of HTMLPurifier.
*
* @param array $allowed_classes (optional)
* @return object
*/
public static function getHTMLPurifier()
public static function getHTMLPurifier($allowed_classes = array())
{
// Keep separate instances for different sets of allowed classes.
$allowed_classes = array_unique($allowed_classes);
sort($allowed_classes);
$key = sha1(serialize($allowed_classes));
// Create an instance with reasonable defaults.
if (self::$_htmlpurifier === null)
if (!isset(self::$_instances[$key]))
{
// Get the default configuration.
$config = \HTMLPurifier_Config::createDefault();
// Customize the default configuration.
$config->set('Attr.AllowedClasses', array());
$config->set('Attr.AllowedClasses', $allowed_classes);
$config->set('Attr.AllowedFrameTargets', array('_blank'));
$config->set('Attr.DefaultImageAlt', '');
$config->set('Attr.EnableID', true);
@ -144,11 +159,11 @@ class HTMLFilter
self::_supportCSS3($config);
// Cache our instance of HTMLPurifier.
self::$_htmlpurifier = new \HTMLPurifier($config);
self::$_instances[$key] = new \HTMLPurifier($config);
}
// Return the cached instance.
return self::$_htmlpurifier;
return self::$_instances[$key];
}
/**
@ -379,12 +394,17 @@ class HTMLFilter
* Rhymix-specific preprocessing method.
*
* @param string $content
* @param bool $allow_editor_components (optional)
* @param bool $allow_widgets (optional)
* @return string
*/
protected static function _preprocess($content)
protected static function _preprocess($content, $allow_editor_components = true, $allow_widgets = false)
{
// Encode widget and editor component properties so that they are not removed by HTMLPurifier.
$content = self::_encodeWidgetsAndEditorComponents($content);
if ($allow_editor_components || $allow_widgets)
{
$content = self::_encodeWidgetsAndEditorComponents($content, $allow_editor_components, $allow_widgets);
}
return $content;
}
@ -392,9 +412,11 @@ class HTMLFilter
* Rhymix-specific postprocessing method.
*
* @param string $content
* @param bool $allow_editor_components (optional)
* @param bool $allow_widgets (optional)
* @return string
*/
protected static function _postprocess($content)
protected static function _postprocess($content, $allow_editor_components = true, $allow_widgets = false)
{
// Define acts to allow and deny.
$allow_acts = array('procFileDownload');
@ -436,7 +458,7 @@ class HTMLFilter
}, $content);
// Restore widget and editor component properties.
$content = self::_decodeWidgetsAndEditorComponents($content);
$content = self::_decodeWidgetsAndEditorComponents($content, $allow_editor_components, $allow_widgets);
return $content;
}
@ -444,11 +466,27 @@ class HTMLFilter
* Encode widgets and editor components before processing.
*
* @param string $content
* @param bool $allow_editor_components (optional)
* @param bool $allow_widgets (optional)
* @return string
*/
protected static function _encodeWidgetsAndEditorComponents($content)
protected static function _encodeWidgetsAndEditorComponents($content, $allow_editor_components = true, $allow_widgets = false)
{
return preg_replace_callback('!<(div|img)([^>]*)(editor_component="[^"]+"|class="zbxe_widget_output")([^>]*)>!i', function($match) {
$regexp = array();
if ($allow_editor_components)
{
$regexp[] = 'editor_component="[^"]+"';
}
if ($allow_widgets)
{
$regexp[] = 'class="zbxe_widget_output"';
}
if (!count($regexp))
{
return $content;
}
return preg_replace_callback('!<(div|img)([^>]*)(' . implode('|', $regexp) . ')([^>]*)>!i', function($match) {
$tag = strtolower($match[1]);
$attrs = array();
$html = preg_replace_callback('!([a-zA-Z0-9_-]+)="([^"]+)"!', function($attr) use($tag, &$attrs) {
@ -477,10 +515,25 @@ class HTMLFilter
* Decode widgets and editor components after processing.
*
* @param string $content
* @param bool $allow_editor_components (optional)
* @param bool $allow_widgets (optional)
* @return string
*/
protected static function _decodeWidgetsAndEditorComponents($content)
protected static function _decodeWidgetsAndEditorComponents($content, $allow_editor_components = true, $allow_widgets = false)
{
if (!$allow_editor_components)
{
$content = preg_replace('!(<(?:div|img)[^>]*)\s(editor_component="(?:[^"]+)")!i', '$1', $content);
}
if (!$allow_widgets)
{
$content = preg_replace('!(<(?:div|img)[^>]*)\s(widget="(?:[^"]+)")!i', '$1blocked-$2', $content);
}
if (!$allow_editor_components && !$allow_widgets)
{
return $content;
}
return preg_replace_callback('!<(div|img)([^>]*)(\srx_encoded_properties="([^"]+)")!i', function($match) {
$attrs = array();
$decoded_properties = Security::decrypt($match[4]);

View file

@ -690,6 +690,14 @@ class adminAdminController extends admin
natcasesort($object_whitelist);
Rhymix\Framework\Config::set('mediafilter.object', array_values($object_whitelist));
// HTML classes
$classes = $vars->mediafilter_classes;
$classes = array_filter(array_map('trim', preg_split('/[\r\n]/', $classes)), function($item) {
return preg_match('/^[a-zA-Z0-9_-]+$/u', $item);
});
natcasesort($classes);
Rhymix\Framework\Config::set('mediafilter.classes', array_values($classes));
// Remove old embed filter
$config = Rhymix\Framework\Config::getAll();
unset($config['embedfilter']);

View file

@ -490,6 +490,7 @@ class adminAdminView extends admin
// Load embed filter.
context::set('mediafilter_iframe', implode(PHP_EOL, Rhymix\Framework\Filters\MediaFilter::getIframeWhitelist()));
context::set('mediafilter_object', implode(PHP_EOL, Rhymix\Framework\Filters\MediaFilter::getObjectWhitelist()));
context::set('mediafilter_classes', implode(PHP_EOL, Rhymix\Framework\Config::get('mediafilter.classes') ?: array()));
// Admin IP access control
$allowed_ip = Rhymix\Framework\Config::get('admin.allow');

View file

@ -19,6 +19,12 @@
<textarea name="mediafilter_object" id="mediafilter_object" rows="8" style="width:100%;">{$mediafilter_object}</textarea>
</div>
</div>
<div class="x_control-group">
<label class="x_control-label" for="mediafilter_classes">HTML class</label>
<div class="x_controls" style="margin-right:14px">
<textarea name="mediafilter_classes" id="mediafilter_classes" rows="4" style="width:100%;">{$mediafilter_classes}</textarea>
</div>
</div>
<div class="x_control-group">
<label class="x_control-label" for="admin_allowed_ip">{$lang->admin_ip_allow}</label>
<div class="x_controls">

View file

@ -59,6 +59,7 @@ class HTMLFilterTest extends \Codeception\TestCase\Test
)
);
config('mediafilter.classes', array());
foreach ($tests as $test)
{
$this->assertEquals($test[1], Rhymix\Framework\Filters\HTMLFilter::clean($test[0]));
@ -142,6 +143,19 @@ class HTMLFilterTest extends \Codeception\TestCase\Test
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source));
}
public function testHTMLFilterAllowedClasses()
{
config('mediafilter.classes', array());
$source = '<p class="mytest">Hello World</p>';
$target = '<p>Hello World</p>';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source));
config('mediafilter.classes', array('mytest'));
$source = '<p class="mytest">Hello World</p>';
$target = '<p class="mytest">Hello World</p>';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source));
}
public function testHTMLFilterEditorComponent()
{
$source = '<img somekey="somevalue" otherkey="othervalue" onmouseover="alert(\'xss\');" editor_component="component_name" src="./foo/bar.jpg" alt="My Picture" style="width:320px;height:240px;" width="320" height="240" />';
@ -159,6 +173,29 @@ class HTMLFilterTest extends \Codeception\TestCase\Test
$source = '<div editor_component="component_name" style="width:400px;height:300px;" draggable dropzone contextmenu="whatever"></div>';
$target = '<div editor_component="component_name" style="width:400px;height:300px;"></div>';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source));
$source = '<img somekey="somevalue" otherkey="othervalue" onmouseover="alert(\'xss\');" editor_component="component_name" src="./foo/bar.jpg" alt="My Picture" style="width:320px;height:240px;" width="320" height="240" />';
$target = '<img src="./foo/bar.jpg" alt="My Picture" style="width:320px;height:240px;" width="320" height="240" />';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source, false));
$source = '<img somekey="somevalue" otherkey="othervalue" onkeypress="alert(\'xss\');" editor_component="component_name" />';
$target = '';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source, false));
}
public function testHTMLFilterWidgetCode()
{
$source = '<p>Hello World</p><img class="zbxe_widget_output" widget="content" skin="default" colorset="white" widget_sequence="1234" widget_cache="1m" content_type="document" module_srls="56" list_type="normal" tab_type="none" markup_type="table" page_count="1" option_view="title,regdate,nickname" show_browser_title="Y" show_comment_count="Y" show_trackback_count="Y" show_category="Y" show_icon="Y" show_secret="N" order_target="regdate" order_type="desc" thumbnail_type="crop" />';
$target = '<p>Hello World</p>';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source));
$source = '<p>Hello World</p><img class="zbxe_widget_output" widget="content" skin="default" colorset="white" widget_sequence="1234" widget_cache="1m" content_type="document" module_srls="56" list_type="normal" tab_type="none" markup_type="table" page_count="1" option_view="title,regdate,nickname" show_browser_title="Y" show_comment_count="Y" show_trackback_count="Y" show_category="Y" show_icon="Y" show_secret="N" order_target="regdate" order_type="desc" thumbnail_type="crop" />';
$target = '<p>Hello World</p><img widget="content" skin="default" colorset="white" widget_sequence="1234" widget_cache="1m" content_type="document" module_srls="56" list_type="normal" tab_type="none" markup_type="table" page_count="1" option_view="title,regdate,nickname" show_browser_title="Y" show_comment_count="Y" show_trackback_count="Y" show_category="Y" show_icon="Y" show_secret="N" order_target="regdate" order_type="desc" thumbnail_type="crop" src="" class="zbxe_widget_output" alt="" />';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source, true, true));
$source = '<p>Hello World</p><img class="zbxe_widget_output" widget="content" onmouseover="alert(\'xss\');" skin="default" colorset="white" widget_sequence="1234" widget_cache="1m" content_type="document" module_srls="56" list_type="normal" tab_type="none" markup_type="table" page_count="1" option_view="title,regdate,nickname" show_browser_title="Y" show_comment_count="Y" show_trackback_count="Y" show_category="Y" show_icon="Y" show_secret="N" order_target="regdate" order_type="desc" thumbnail_type="crop" />';
$target = '<p>Hello World</p><img widget="content" skin="default" colorset="white" widget_sequence="1234" widget_cache="1m" content_type="document" module_srls="56" list_type="normal" tab_type="none" markup_type="table" page_count="1" option_view="title,regdate,nickname" show_browser_title="Y" show_comment_count="Y" show_trackback_count="Y" show_category="Y" show_icon="Y" show_secret="N" order_target="regdate" order_type="desc" thumbnail_type="crop" src="" class="zbxe_widget_output" alt="" />';
$this->assertEquals($target, Rhymix\Framework\Filters\HTMLFilter::clean($source, true, true));
}
public function testHTMLFilterUserContentID()