Absorb removeSrcHack() into HTMLFilter postprocessing method

This commit is contained in:
Kijin Sung 2016-03-12 22:15:55 +09:00
parent 598722b0cd
commit f577b456ec
4 changed files with 72 additions and 127 deletions

View file

@ -75,9 +75,9 @@ class HTMLFilter
$input = $callback($input);
}
$input = self::_encodeWidgetsAndEditorComponents($input);
$input = self::_preprocess($input);
$output = self::getHTMLPurifier()->purify($input);
$output = self::_decodeWidgetsAndEditorComponents($output);
$output = self::_postprocess($output);
foreach (self::$_postproc as $callback)
{
@ -385,6 +385,55 @@ class HTMLFilter
return '%^https?://(' . implode('|', $result) . ')%';
}
/**
* Rhymix-specific preprocessing method.
*
* @param string $content
* @return string
*/
protected static function _preprocess($content)
{
// Remove tags not supported in Rhymix. Some of these may also be removed by HTMLPurifier.
$content = preg_replace_callback('!</?(?:html|body|head|title|meta|base|link|script|style|applet)\b[^>]*>!i', function($matches) {
return htmlspecialchars($matches[0], ENT_QUOTES, 'UTF-8');
}, $content);
// Encode widget and editor component properties so that they are not removed by HTMLPurifier.
$content = self::_encodeWidgetsAndEditorComponents($content);
return $content;
}
/**
* Rhymix-specific postprocessing method.
*
* @param string $content
* @return string
*/
protected static function _postprocess($content)
{
// Define acts to allow and deny.
$allow_acts = array('procFileDownload');
$deny_acts = array('dispMemberLogout', 'dispLayoutPreview');
// Remove URLs that may be CSRF attempts.
$content = preg_replace_callback('!\b(src|href|data|value)="([^"]+)"!i', function($matches) use($allow_acts, $deny_acts) {
$url = preg_replace('!\s+!', '', htmlspecialchars_decode(rawurldecode($matches[2])));
if (preg_match('!\bact=((disp|proc)[^&]+)!i', $url, $urlmatches))
{
$act = $urlmatches[1];
if (!in_array($act, $allow_acts) && (in_array($act, $deny_acts) || $urlmatches[2] === 'proc'))
{
return $matches[1] . '=""';
}
}
return $matches[0];
}, $content);
// Restore widget and editor component properties.
$content = self::_decodeWidgetsAndEditorComponents($content);
return $content;
}
/**
* Encode widgets and editor components before processing.
*
@ -402,20 +451,20 @@ class HTMLFilter
foreach ($found_attrs as $attr)
{
$attrkey = strtolower($attr[1]);
if (strtolower($match[1]) === 'img' && ($attrkey === 'width' || $attrkey === 'height' || $attrkey === 'alt'))
if (strtolower($match[1]) === 'img' && preg_match('/^(?:width|height|alt)$/', $attrkey))
{
continue;
}
if ($attrkey === 'src' || $attrkey === 'style' || substr($attrkey, 0, 2) === 'on')
if (preg_match('/^(?:on|data-|(?:src|style|class)$)/', $attrkey))
{
continue;
}
$attrs[$attrkey] = htmlspecialchars_decode($attr[2]);
$html = str_replace($attr[0], '', $html);
}
if (strtolower($match[1]) === 'img' && !isset($attrs['src']))
if (strtolower($match[1]) === 'img' && !preg_match('/\ssrc="/', $html))
{
//$html = substr($html, 0, 4) . ' src=""' . substr($html, 4);
$html = substr($html, 0, 4) . ' src=""' . substr($html, 4);
}
$encoded_properties = base64_encode(json_encode($attrs));
$html = substr($html, 0, 4) . ' rx_encoded_properties="' . $encoded_properties . '"' . substr($html, 4);

View file

@ -830,17 +830,7 @@ function removeHackTag($content)
$oEmbedFilter = EmbedFilter::getInstance();
$oEmbedFilter->check($content);
$content = Rhymix\Framework\Security\HTMLFilter::clean($content);
// change the specific tags to the common texts
$content = preg_replace('@<(\/?(?:html|body|head|title|meta|base|link|script|style|applet)(/*).*?>)@i', '&lt;$1', $content);
/**
* Remove codes to abuse the admin session in src by tags of imaages and video postings
* - Issue reported by Sangwon Kim
*/
$content = preg_replace_callback('@<(/?)([a-z]+[0-9]?)((?>"[^"]*"|\'[^\']*\'|[^>])*?\b(?:on[a-z]+|data|style|background|href|(?:dyn|low)?src)\s*=[\s\S]*?)(/?)($|>|<)@i', 'removeSrcHack', $content);
return $content;
return Rhymix\Framework\Security\HTMLFilter::clean($content);
}
/**
@ -865,6 +855,17 @@ function blockWidgetCode($content)
return preg_replace('/(<(?:img|div)(?:[^>]*))(widget)(?:(=([^>]*?)>))/is', '$1blocked-widget$3', $content);
}
/**
* Remove src hack (Deprecated)
*
* @param array $match
* @return string
*/
function removeSrcHack($match)
{
return $match[0];
}
/**
* Check uploaded file (Deprecated)
*
@ -876,111 +877,6 @@ function checkUploadedFile($file)
return true;
}
/**
* Remove src hack(preg_replace_callback)
*
* @param array $match
* @return string
*/
function removeSrcHack($match)
{
$tag = strtolower($match[2]);
if($match[1])
{
return $match[0];
}
if($match[4])
{
$match[4] = ' ' . $match[4];
}
$attrs = array();
if(preg_match_all('/([\w:-]+)\s*=(?:\s*(["\']))?(?(2)(.*?)\2|([^ ]+))/s', $match[3], $m))
{
foreach($m[1] as $idx => $name)
{
if(strlen($name) >= 2 && substr_compare($name, 'on', 0, 2) === 0)
{
continue;
}
$val = preg_replace_callback('/&#(?:x([a-fA-F0-9]+)|0*(\d+));/', function($n) {return chr($n[1] ? ('0x00' . $n[1]) : ($n[2] + 0)); }, $m[3][$idx] . $m[4][$idx]);
$val = preg_replace('/^\s+|[\t\n\r]+/', '', $val);
if(preg_match('/^[a-z]+script:/i', $val))
{
continue;
}
$attrs[$name] = $val;
}
}
//Remove ACT URL (CSRF)
$except_act = array('procFileDownload');
$block_act = array('dispMemberLogout', 'dispLayoutPreview');
$filter_arrts = array('style', 'src', 'href');
if($tag === 'object') array_push($filter_arrts, 'data');
if($tag === 'param') array_push($filter_arrts, 'value');
foreach($filter_arrts as $attr)
{
if(!isset($attrs[$attr])) continue;
$attr_value = rawurldecode($attrs[$attr]);
$attr_value = htmlspecialchars_decode($attr_value, ENT_COMPAT);
$attr_value = preg_replace('/\s+|[\t\n\r]+/', '', $attr_value);
preg_match('@(\?|&|;)act=(disp|proc)([^&]*)@i', $attr_value, $actmatch);
$url_action = $actmatch[2].$actmatch[3];
if(!empty($url_action) && !in_array($url_action, $except_act))
{
if($actmatch[2] == 'proc' || in_array($url_action, $block_act))
{
unset($attrs[$attr]);
}
}
}
if(isset($attrs['style']) && preg_match('@(?:/\*|\*/|\n|:\s*expression\s*\()@i', $attrs['style']))
{
unset($attrs['style']);
}
$attr = array();
foreach($attrs as $name => $val)
{
if($tag == 'object' || $tag == 'embed' || $tag == 'a')
{
$attribute = strtolower(trim($name));
if($attribute == 'data' || $attribute == 'src' || $attribute == 'href')
{
if(stripos($val, 'data:') === 0)
{
continue;
}
}
}
if($tag == 'img')
{
$attribute = strtolower(trim($name));
if(stripos($val, 'data:') === 0)
{
continue;
}
}
$val = str_replace('"', '&quot;', $val);
$attr[] = $name . "=\"{$val}\"";
}
$attr = count($attr) ? ' ' . implode(' ', $attr) : '';
return "<{$match[1]}{$tag}{$attr}{$match[4]}>";
}
/**
* Convert hexa value to RGB
*

View file

@ -11,7 +11,7 @@ class SecurityTest extends \Codeception\TestCase\Test
$this->assertEquals('foobar', Rhymix\Framework\Security::sanitize('foo<p>bar</p>', 'strip'));
// HTML (more thorough tests in HTMLFilterTest)
$this->assertEquals('<p>safe</p>', Rhymix\Framework\Security::sanitize('<p>safe<script>unsafe();</script></p>', 'html'));
$this->assertEquals('<p>safe&lt;script&gt;&lt;/script&gt;</p>', Rhymix\Framework\Security::sanitize('<p>safe<script></script></p>', 'html'));
// Filename (more thorough tests in FilenameFilterTest)
$this->assertEquals('foo(bar).xls', Rhymix\Framework\Security::sanitize('foo<bar>.xls', 'filename'));

View file

@ -43,19 +43,19 @@ class HTMLFilterTest extends \Codeception\TestCase\Test
// issue #1813 https://github.com/xpressengine/xe-core/issues/1813
array(
'<img src="?act=dispLayoutPreview" alt="dummy" />',
'<img alt="dummy" />'
'<img src="" alt="dummy" />'
),
array(
'<img src="?act =dispLayoutPreview" alt="dummy" />',
'<img alt="dummy" />'
'<img src="" alt="dummy" />'
),
array(
"<img src=\"?act\n=dispLayoutPreview\" alt=\"dummy\" />",
'<img alt="dummy" />'
'<img src="" alt="dummy" />'
),
array(
"<img src=\"?pam=act&a\nct =\r\n\tdispLayoutPreview\" alt=\"dummy\" />",
'<img alt="dummy" />'
'<img src="" alt="dummy" />'
)
);