mirror of
https://github.com/Lastorder-DC/rhymix.git
synced 2026-01-07 02:31:40 +09:00
Update HTMLPurifier configuration and allow HTML5
This commit is contained in:
parent
1b8a41b077
commit
b89818e13d
3 changed files with 281 additions and 154 deletions
|
|
@ -3,161 +3,14 @@
|
||||||
|
|
||||||
class Purifier
|
class Purifier
|
||||||
{
|
{
|
||||||
|
public static function getInstance()
|
||||||
private $_cacheDir;
|
|
||||||
private $_htmlPurifier;
|
|
||||||
private $_config;
|
|
||||||
private $_def;
|
|
||||||
|
|
||||||
public function __construct()
|
|
||||||
{
|
{
|
||||||
$this->_checkCacheDir();
|
return new self();
|
||||||
$this->_setConfig();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getInstance()
|
|
||||||
{
|
|
||||||
if(!isset($GLOBALS['__PURIFIER_INSTANCE__']))
|
|
||||||
{
|
|
||||||
$GLOBALS['__PURIFIER_INSTANCE__'] = new Purifier();
|
|
||||||
}
|
|
||||||
return $GLOBALS['__PURIFIER_INSTANCE__'];
|
|
||||||
}
|
|
||||||
|
|
||||||
private function _setConfig()
|
|
||||||
{
|
|
||||||
$this->_config = HTMLPurifier_Config::createDefault();
|
|
||||||
$this->_config->set('HTML.TidyLevel', 'light');
|
|
||||||
$this->_config->set('Output.FlashCompat', TRUE);
|
|
||||||
$this->_config->set('HTML.SafeObject', TRUE);
|
|
||||||
$this->_config->set('HTML.SafeEmbed', TRUE);
|
|
||||||
$this->_config->set('HTML.SafeIframe', TRUE);
|
|
||||||
$this->_config->set('URI.SafeIframeRegexp', $this->_getWhiteDomainRegexp());
|
|
||||||
$this->_config->set('Cache.SerializerPath', $this->_cacheDir);
|
|
||||||
$this->_config->set('Attr.AllowedFrameTargets', array('_blank'));
|
|
||||||
//$allowdClasses = array('emoticon');
|
|
||||||
//$this->_config->set('Attr.AllowedClasses', $allowdClasses);
|
|
||||||
$this->_def = $this->_config->getHTMLDefinition(TRUE);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function _setDefinition(&$content)
|
|
||||||
{
|
|
||||||
// add attribute for edit component
|
|
||||||
$editComponentAttrs = $this->_searchEditComponent($content);
|
|
||||||
if(is_array($editComponentAttrs))
|
|
||||||
{
|
|
||||||
foreach($editComponentAttrs AS $k => $v)
|
|
||||||
{
|
|
||||||
$this->_def->addAttribute('img', $v, 'CDATA');
|
|
||||||
$this->_def->addAttribute('div', $v, 'CDATA');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// add attribute for widget component
|
|
||||||
$widgetAttrs = $this->_searchWidget($content);
|
|
||||||
if(is_array($widgetAttrs))
|
|
||||||
{
|
|
||||||
foreach($widgetAttrs AS $k => $v)
|
|
||||||
{
|
|
||||||
$this->_def->addAttribute('img', $v, 'CDATA');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search attribute of edit component tag
|
|
||||||
* @param string $content
|
|
||||||
* @return array
|
|
||||||
*/
|
|
||||||
private function _searchEditComponent($content)
|
|
||||||
{
|
|
||||||
preg_match_all('!<(?:(div)|img)([^>]*)editor_component=([^>]*)>(?(1)(.*?)</div>)!is', $content, $m);
|
|
||||||
|
|
||||||
$attributeList = array();
|
|
||||||
if(is_array($m[2]))
|
|
||||||
{
|
|
||||||
foreach($m[2] as $key => $value)
|
|
||||||
{
|
|
||||||
unset($script, $m2);
|
|
||||||
$script = " {$m[2][$key]} editor_component={$m[3][$key]}";
|
|
||||||
|
|
||||||
if(preg_match_all('/([a-z0-9_-]+)="([^"]+)"/is', $script, $m2))
|
|
||||||
{
|
|
||||||
foreach($m2[1] as $value2)
|
|
||||||
{
|
|
||||||
//SECISSUE check style attr
|
|
||||||
if($value2 == 'style')
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$attributeList[] = $value2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return array_unique($attributeList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search edit component tag
|
|
||||||
* @param string $content
|
|
||||||
* @return array
|
|
||||||
*/
|
|
||||||
private function _searchWidget(&$content)
|
|
||||||
{
|
|
||||||
preg_match_all('!<(?:(div)|img)([^>]*)class="zbxe_widget_output"([^>]*)>(?(1)(.*?)</div>)!is', $content, $m);
|
|
||||||
|
|
||||||
$attributeList = array();
|
|
||||||
if(is_array($m[3]))
|
|
||||||
{
|
|
||||||
$content = str_replace('<img class="zbxe_widget_output"', '<img src="" class="zbxe_widget_output"', $content);
|
|
||||||
|
|
||||||
foreach($m[3] as $key => $value)
|
|
||||||
{
|
|
||||||
if (preg_match_all('/([a-z0-9_-]+)="([^"]+)"/is', $m[3][$key], $m2))
|
|
||||||
{
|
|
||||||
foreach($m2[1] as $value2)
|
|
||||||
{
|
|
||||||
//SECISSUE check style attr
|
|
||||||
if($value2 == 'style')
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$attributeList[] = $value2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return array_unique($attributeList);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function _getWhiteDomainRegexp()
|
|
||||||
{
|
|
||||||
$oEmbedFilter = EmbedFilter::getInstance();
|
|
||||||
$whiteIframeUrlList = $oEmbedFilter->getWhiteIframeUrlList();
|
|
||||||
|
|
||||||
$whiteDomains = array();
|
|
||||||
foreach($whiteIframeUrlList as $domain)
|
|
||||||
{
|
|
||||||
$whiteDomains[] = preg_quote($domain, '%');
|
|
||||||
}
|
|
||||||
return '%^https?://(' . implode('|', $whiteDomains) . ')%';
|
|
||||||
}
|
|
||||||
|
|
||||||
private function _checkCacheDir()
|
|
||||||
{
|
|
||||||
// check htmlpurifier cache directory
|
|
||||||
$this->_cacheDir = _XE_PATH_ . 'files/cache/htmlpurifier';
|
|
||||||
FileHandler::makeDir($this->_cacheDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function purify(&$content)
|
public function purify(&$content)
|
||||||
{
|
{
|
||||||
$this->_setDefinition($content);
|
$content = Rhymix\Framework\Security\HTMLFilter::clean($content);
|
||||||
$this->_htmlPurifier = new HTMLPurifier($this->_config);
|
|
||||||
|
|
||||||
$content = $this->_htmlPurifier->purify($content);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
275
common/framework/security/htmlfilter.php
Normal file
275
common/framework/security/htmlfilter.php
Normal file
|
|
@ -0,0 +1,275 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Rhymix\Framework\Security;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HTML filter class.
|
||||||
|
*/
|
||||||
|
class HTMLFilter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* HTMLPurifier instance is cached here.
|
||||||
|
*/
|
||||||
|
protected static $_htmlpurifier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pre-processing and post-processing filters are stored here.
|
||||||
|
*/
|
||||||
|
protected static $_preproc = array();
|
||||||
|
protected static $_postproc = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepend a pre-processing filter.
|
||||||
|
*
|
||||||
|
* @param callable $callback
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public static function prependPreFilter($callback)
|
||||||
|
{
|
||||||
|
array_unshift(self::$_preproc, $callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a pre-processing filter.
|
||||||
|
*
|
||||||
|
* @param callable $callback
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public static function appendPreFilter($callback)
|
||||||
|
{
|
||||||
|
self::$_preproc[] = $callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepend a post-processing filter.
|
||||||
|
*
|
||||||
|
* @param callable $callback
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public static function prependPostFilter($callback)
|
||||||
|
{
|
||||||
|
array_unshift(self::$_postproc, $callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a post-processing filter.
|
||||||
|
*
|
||||||
|
* @param callable $callback
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public static function appendPostFilter($callback)
|
||||||
|
{
|
||||||
|
self::$_postproc[] = $callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter HTML content to block XSS attacks.
|
||||||
|
*
|
||||||
|
* @param string $input
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function clean($input)
|
||||||
|
{
|
||||||
|
foreach (self::$_preproc as $callback)
|
||||||
|
{
|
||||||
|
$input = $callback($input);
|
||||||
|
}
|
||||||
|
|
||||||
|
$input = self::_encodeWidgetsAndEditorComponents($input);
|
||||||
|
$output = self::getHTMLPurifier()->purify($input);
|
||||||
|
$output = self::_decodeWidgetsAndEditorComponents($output);
|
||||||
|
|
||||||
|
foreach (self::$_postproc as $callback)
|
||||||
|
{
|
||||||
|
$output = $callback($output);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $output;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an instance of HTMLPurifier.
|
||||||
|
*
|
||||||
|
* @return object
|
||||||
|
*/
|
||||||
|
public static function getHTMLPurifier()
|
||||||
|
{
|
||||||
|
// Create an instance with reasonable defaults.
|
||||||
|
if (self::$_htmlpurifier === null)
|
||||||
|
{
|
||||||
|
// Get the default configuration.
|
||||||
|
$config = \HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
|
// Customize the default configuration.
|
||||||
|
$config->set('Attr.AllowedFrameTargets', array('_blank'));
|
||||||
|
$config->set('Attr.DefaultImageAlt', '');
|
||||||
|
$config->set('Attr.EnableID', false);
|
||||||
|
$config->set('AutoFormat.AutoParagraph', false);
|
||||||
|
$config->set('AutoFormat.DisplayLinkURI', false);
|
||||||
|
$config->set('AutoFormat.Linkify', false);
|
||||||
|
$config->set('Core.Encoding', 'UTF-8');
|
||||||
|
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||||
|
$config->set('HTML.FlashAllowFullScreen', true);
|
||||||
|
$config->set('HTML.MaxImgLength', null);
|
||||||
|
$config->set('CSS.MaxImgLength', null);
|
||||||
|
$config->set('CSS.Proprietary', true);
|
||||||
|
$config->set('Output.FlashCompat', true);
|
||||||
|
$config->set('Output.Newline', "\n");
|
||||||
|
$config->set('URI.MakeAbsolute', false);
|
||||||
|
|
||||||
|
// Allow embedding of external multimedia content.
|
||||||
|
$config->set('HTML.SafeEmbed', true);
|
||||||
|
$config->set('HTML.SafeIframe', true);
|
||||||
|
$config->set('HTML.SafeObject', true);
|
||||||
|
$config->set('URI.SafeIframeRegexp', self::_getIframeWhitelist());
|
||||||
|
|
||||||
|
// Set the serializer path.
|
||||||
|
$config->set('Cache.SerializerPath', RX_BASEDIR . 'files/cache/htmlpurifier');
|
||||||
|
\FileHandler::makeDir(RX_BASEDIR . 'files/cache/htmlpurifier');
|
||||||
|
|
||||||
|
// Modify the HTML definition to support editor components and widgets.
|
||||||
|
$def = $config->getHTMLDefinition(true);
|
||||||
|
$def->addAttribute('img', 'editor_component', 'Text');
|
||||||
|
$def->addAttribute('img', 'rx_encoded_properties', 'Text');
|
||||||
|
$def->addAttribute('div', 'rx_encoded_properties', 'Text');
|
||||||
|
|
||||||
|
// Support HTML5: Based on https://github.com/xemlock/htmlpurifier-html5
|
||||||
|
$def->addAttribute('img', 'srcset', 'Text');
|
||||||
|
$def->addAttribute('iframe', 'allowfullscreen', 'Bool');
|
||||||
|
$def->addElement('header', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('footer', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('nav', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('main', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('section', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('article', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('aside', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('address', 'Block', 'Flow', 'Common');
|
||||||
|
$def->addElement('figure', 'Block', 'Optional: (figcaption, Flow) | (Flow, figcaption) | Flow', 'Common');
|
||||||
|
$def->addElement('figcaption', 'Inline', 'Flow', 'Common');
|
||||||
|
$def->addElement('s', 'Inline', 'Inline', 'Common');
|
||||||
|
$def->addElement('var', 'Inline', 'Inline', 'Common');
|
||||||
|
$def->addElement('sub', 'Inline', 'Inline', 'Common');
|
||||||
|
$def->addElement('sup', 'Inline', 'Inline', 'Common');
|
||||||
|
$def->addElement('mark', 'Inline', 'Inline', 'Common');
|
||||||
|
$def->addElement('wbr', 'Inline', 'Empty', 'Core');
|
||||||
|
$def->addElement('ins', 'Block', 'Flow', 'Common', array('cite' => 'URI', 'datetime' => 'Text'));
|
||||||
|
$def->addElement('del', 'Block', 'Flow', 'Common', array('cite' => 'URI', 'datetime' => 'Text'));
|
||||||
|
$time = $def->addElement('time', 'Inline', 'Inline', 'Common', array('datetime' => 'Text', 'pubdate' => 'Bool'));
|
||||||
|
$time->excludes = array('time' => true);
|
||||||
|
$def->addElement('audio', 'Block', 'Optional: (source, Flow) | (Flow, source) | Flow', 'Common', array(
|
||||||
|
'src' => 'URI',
|
||||||
|
'type' => 'Text',
|
||||||
|
'preload' => 'Enum#auto,metadata,none',
|
||||||
|
'controls' => 'Bool',
|
||||||
|
'muted' => 'Bool',
|
||||||
|
'loop' => 'Bool',
|
||||||
|
));
|
||||||
|
$def->addElement('video', 'Block', 'Optional: (source, Flow) | (Flow, source) | Flow', 'Common', array(
|
||||||
|
'src' => 'URI',
|
||||||
|
'type' => 'Text',
|
||||||
|
'width' => 'Length',
|
||||||
|
'height' => 'Length',
|
||||||
|
'poster' => 'URI',
|
||||||
|
'preload' => 'Enum#auto,metadata,none',
|
||||||
|
'controls' => 'Bool',
|
||||||
|
'muted' => 'Bool',
|
||||||
|
'loop' => 'Bool',
|
||||||
|
));
|
||||||
|
$def->addElement('source', 'Block', 'Empty', 'Common', array(
|
||||||
|
'src' => 'URI',
|
||||||
|
'media' => 'Text',
|
||||||
|
'type' => 'Text',
|
||||||
|
));
|
||||||
|
$def->addElement('track', 'Block', 'Empty', 'Common', array(
|
||||||
|
'src' => 'URI',
|
||||||
|
'srclang' => 'Text',
|
||||||
|
'label' => 'Text',
|
||||||
|
'kind' => 'Enum#captions,chapters,descriptions,metadata,subtitles',
|
||||||
|
'default' => 'Bool',
|
||||||
|
));
|
||||||
|
|
||||||
|
// Cache our instance of HTMLPurifier.
|
||||||
|
self::$_htmlpurifier = new \HTMLPurifier($config);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the cached instance.
|
||||||
|
return self::$_htmlpurifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the iframe whitelist as a regular expression.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected static function _getIframeWhitelist()
|
||||||
|
{
|
||||||
|
$domains = \EmbedFilter::getInstance()->getWhiteIframeUrlList();
|
||||||
|
$result = array();
|
||||||
|
foreach($domains as $domain)
|
||||||
|
{
|
||||||
|
$result[] = preg_quote($domain, '%');
|
||||||
|
}
|
||||||
|
return '%^https?://(' . implode('|', $result) . ')%';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode widgets and editor components before processing.
|
||||||
|
*
|
||||||
|
* @param string $content
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected static function _encodeWidgetsAndEditorComponents($content)
|
||||||
|
{
|
||||||
|
preg_match_all('!<(div|img)([^>]*)(editor_component="[^"]+"|class="zbxe_widget_output")([^>]*)>!i', $content, $matches, \PREG_SET_ORDER);
|
||||||
|
foreach ($matches as $match)
|
||||||
|
{
|
||||||
|
$attrs = array();
|
||||||
|
$html = $match[0];
|
||||||
|
preg_match_all('/([a-zA-Z0-9_-]+)="([^"]+)"/', $match[2] . ' ' . $match[4], $found_attrs, \PREG_SET_ORDER);
|
||||||
|
foreach ($found_attrs as $attr)
|
||||||
|
{
|
||||||
|
$attrkey = strtolower($attr[1]);
|
||||||
|
if (strtolower($match[1]) === 'img' && ($attrkey === 'width' || $attrkey === 'height' || $attrkey === 'alt'))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($attrkey === 'src' || $attrkey === 'style' || substr($attrkey, 0, 2) === 'on')
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$attrs[$attrkey] = htmlspecialchars_decode($attr[2]);
|
||||||
|
$html = str_replace($attr[0], '', $html);
|
||||||
|
}
|
||||||
|
if (strtolower($match[1]) === 'img' && !isset($attrs['src']))
|
||||||
|
{
|
||||||
|
//$html = substr($html, 0, 4) . ' src=""' . substr($html, 4);
|
||||||
|
}
|
||||||
|
$encoded_properties = base64_encode(json_encode($attrs));
|
||||||
|
$html = substr($html, 0, 4) . ' rx_encoded_properties="' . $encoded_properties . '"' . substr($html, 4);
|
||||||
|
$content = str_replace($match[0], $html, $content);
|
||||||
|
}
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode widgets and editor components after processing.
|
||||||
|
*
|
||||||
|
* @param string $content
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected static function _decodeWidgetsAndEditorComponents($content)
|
||||||
|
{
|
||||||
|
preg_match_all('!<(div|img)([^>]*)(rx_encoded_properties="([^"]+)")!i', $content, $matches, \PREG_SET_ORDER);
|
||||||
|
foreach ($matches as $match)
|
||||||
|
{
|
||||||
|
$attrs = array();
|
||||||
|
$decoded_properties = @json_decode(base64_decode($match[4])) ?: array();
|
||||||
|
foreach ($decoded_properties as $key => $val)
|
||||||
|
{
|
||||||
|
$attrs[] = $key . '="' . htmlspecialchars($val) . '"';
|
||||||
|
}
|
||||||
|
$content = str_replace($match[3], implode(' ', $attrs), $content);
|
||||||
|
}
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -815,8 +815,7 @@ function url_decode($str)
|
||||||
|
|
||||||
function purifierHtml(&$content)
|
function purifierHtml(&$content)
|
||||||
{
|
{
|
||||||
$oPurifier = Purifier::getInstance();
|
$content = Rhymix\Framework\Security\HTMLFilter::clean($content);
|
||||||
$oPurifier->purify($content);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -830,7 +829,7 @@ function removeHackTag($content)
|
||||||
$oEmbedFilter = EmbedFilter::getInstance();
|
$oEmbedFilter = EmbedFilter::getInstance();
|
||||||
$oEmbedFilter->check($content);
|
$oEmbedFilter->check($content);
|
||||||
|
|
||||||
purifierHtml($content);
|
$content = Rhymix\Framework\Security\HTMLFilter::clean($content);
|
||||||
|
|
||||||
// change the specific tags to the common texts
|
// change the specific tags to the common texts
|
||||||
$content = preg_replace('@<(\/?(?:html|body|head|title|meta|base|link|script|style|applet)(/*).*?>)@i', '<$1', $content);
|
$content = preg_replace('@<(\/?(?:html|body|head|title|meta|base|link|script|style|applet)(/*).*?>)@i', '<$1', $content);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue