Update embed filter to use Simple HTML DOM Parser

This commit is contained in:
Kijin Sung 2016-02-16 15:22:12 +09:00
parent be83a34b1e
commit e7bacf689a

View file

@ -1,8 +1,6 @@
<?php <?php
/* Copyright (C) NAVER <http://www.navercorp.com> */ /* Copyright (C) NAVER <http://www.navercorp.com> */
include _XE_PATH_ . 'classes/security/phphtmlparser/src/htmlparser.inc';
class EmbedFilter class EmbedFilter
{ {
@ -68,106 +66,9 @@ class EmbedFilter
$this->checkObjectTag($content); $this->checkObjectTag($content);
$this->checkEmbedTag($content); $this->checkEmbedTag($content);
$this->checkIframeTag($content);
$this->checkParamTag($content); $this->checkParamTag($content);
} }
/**
* Check object tag in the content.
* @return void
*/
function checkObjectTag(&$content)
{
preg_match_all('/<\s*object\s*[^>]+(?:\/?>?)/is', $content, $m);
$objectTagList = $m[0];
if($objectTagList)
{
foreach($objectTagList AS $key => $objectTag)
{
$isWhiteDomain = true;
$isWhiteMimetype = true;
$isWhiteExt = true;
$ext = '';
$parser = new HtmlParser($objectTag);
while($parser->parse())
{
if(is_array($parser->iNodeAttributes))
{
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
{
// data url check
if($attrValue && strtolower($attrName) == 'data')
{
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
$isWhiteDomain = $this->isWhiteDomain($attrValue);
}
// mime type check
if(strtolower($attrName) == 'type' && $attrValue)
{
$isWhiteMimetype = $this->isWhiteMimetype($attrValue);
}
}
}
}
if(!$isWhiteDomain || !$isWhiteMimetype)
{
$content = str_replace($objectTag, htmlspecialchars($objectTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
}
}
}
}
/**
* Check embed tag in the content.
* @return void
*/
function checkEmbedTag(&$content)
{
preg_match_all('/<\s*embed\s*[^>]+(?:\/?>?)/is', $content, $m);
$embedTagList = $m[0];
if($embedTagList)
{
foreach($embedTagList AS $key => $embedTag)
{
$isWhiteDomain = TRUE;
$isWhiteMimetype = TRUE;
$isWhiteExt = TRUE;
$ext = '';
$parser = new HtmlParser($embedTag);
while($parser->parse())
{
if(is_array($parser->iNodeAttributes))
{
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
{
// src url check
if($attrValue && strtolower($attrName) == 'src')
{
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
$isWhiteDomain = $this->isWhiteDomain($attrValue);
}
// mime type check
if(strtolower($attrName) == 'type' && $attrValue)
{
$isWhiteMimetype = $this->isWhiteMimetype($attrValue);
}
}
}
}
if(!$isWhiteDomain || !$isWhiteMimetype)
{
$content = str_replace($embedTag, htmlspecialchars($embedTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
}
}
}
}
/** /**
* Check iframe tag in the content. * Check iframe tag in the content.
* @return void * @return void
@ -176,39 +77,52 @@ class EmbedFilter
{ {
// check in Purifier class // check in Purifier class
return; return;
}
preg_match_all('/<\s*iframe\s*[^>]+(?:\/?>?)/is', $content, $m); /**
$iframeTagList = $m[0]; * Check object tag in the content.
if($iframeTagList) * @return void
{ */
foreach($iframeTagList AS $key => $iframeTag) function checkObjectTag(&$content)
{
$content = preg_replace_callback('/<\s*object\s*[^>]+(?:\/?>?)/is', function($m) {
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
foreach ($html->find('object') as $element)
{ {
$isWhiteDomain = TRUE; if ($element->data && !$this->isWhiteDomain($element->data))
$ext = '';
$parser = new HtmlParser($iframeTag);
while($parser->parse())
{ {
if(is_array($parser->iNodeAttributes)) return escape($m[0], false);
{
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
{
// src url check
if(strtolower($attrName) == 'src' && $attrValue)
{
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
$isWhiteDomain = $this->isWhiteIframeDomain($attrValue);
}
}
}
} }
if ($element->type && !$this->isWhiteMimetype($element->type))
if(!$isWhiteDomain)
{ {
$content = str_replace($iframeTag, htmlspecialchars($iframeTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); return escape($m[0], false);
} }
} }
} return $m[0];
}, $content);
}
/**
* Check embed tag in the content.
* @return void
*/
function checkEmbedTag(&$content)
{
$content = preg_replace_callback('/<\s*embed\s*[^>]+(?:\/?>?)/is', function($m) {
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
foreach ($html->find('embed') as $element)
{
if ($element->src && !$this->isWhiteDomain($element->src))
{
return escape($m[0], false);
}
if ($element->type && !$this->isWhiteMimetype($element->type))
{
return escape($m[0], false);
}
}
return $m[0];
}, $content);
} }
/** /**
@ -217,36 +131,20 @@ class EmbedFilter
*/ */
function checkParamTag(&$content) function checkParamTag(&$content)
{ {
preg_match_all('/<\s*param\s*[^>]+(?:\/?>?)/is', $content, $m); $content = preg_replace_callback('/<\s*param\s*[^>]+(?:\/?>?)/is', function($m) {
$paramTagList = $m[0]; $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
if($paramTagList) foreach ($html->find('param') as $element)
{
foreach($paramTagList AS $key => $paramTag)
{ {
$isWhiteDomain = TRUE; foreach (array('movie', 'src', 'href', 'url', 'source') as $attr)
$isWhiteExt = TRUE;
$ext = '';
$parser = new HtmlParser($paramTag);
while($parser->parse())
{ {
if($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value']) if ($element->$attr && !$this->isWhiteDomain($element->$attr))
{ {
$name = strtolower($parser->iNodeAttributes['name']); return escape($m[0], false);
if($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source')
{
$ext = strtolower(substr(strrchr($parser->iNodeAttributes['value'], "."), 1));
$isWhiteDomain = $this->isWhiteDomain($parser->iNodeAttributes['value']);
if(!$isWhiteDomain)
{
$content = str_replace($paramTag, htmlspecialchars($paramTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
}
}
} }
} }
} }
} return $m[0];
}, $content);
} }
/** /**