Also filter XML and HTML file uploads

- Detect possible XML (including SVG) by actual content, not extension
- Check large files in overlapping chunks to reduce memory usage
- Check XML files for external entities
- Check HTML files for SSI and PHP code
This commit is contained in:
Kijin Sung 2018-09-18 02:45:48 +09:00
parent 60d390f52e
commit e03ccbd7a9

View file

@ -1,5 +1,4 @@
<?php <?php
/* Copyright (C) NAVER <http://www.navercorp.com> */
class UploadFileFilter class UploadFileFilter
{ {
@ -19,7 +18,7 @@ class UploadFileFilter
} }
// Return error if the file size is zero. // Return error if the file size is zero.
if (!filesize($file)) if (($filesize = filesize($file)) == 0)
{ {
return false; return false;
} }
@ -27,38 +26,120 @@ class UploadFileFilter
// Get the extension. // Get the extension.
$ext = $filename ? strtolower(substr(strrchr($filename, '.'), 1)) : ''; $ext = $filename ? strtolower(substr(strrchr($filename, '.'), 1)) : '';
// Check the first 4KB of the file for possible XML content.
$fp = fopen($file, 'rb');
$first4kb = fread($fp, 4096);
$is_xml = preg_match('/<(?:\?xml|!DOCTYPE|html|head|body|meta|script|svg)\b/i', $first4kb);
// Check SVG files. // Check SVG files.
if ($ext === 'svg' && !self::_checkSVG($file)) if (($ext === 'svg' || $is_xml) && !self::_checkSVG($fp, 0, $filesize))
{ {
fclose($fp);
return false;
}
// Check XML files.
if (($ext === 'xml' || $is_xml) && !self::_checkXML($fp, 0, $filesize))
{
fclose($fp);
return false;
}
// Check HTML files.
if (($ext === 'html' || $ext === 'shtml' || $ext === 'xhtml' || $ext === 'phtml' || $is_xml) && !self::_checkHTML($fp, 0, $filesize))
{
fclose($fp);
return false; return false;
} }
// Return true if everything is OK. // Return true if everything is OK.
fclose($fp);
return true; return true;
} }
/** /**
* Check SVG file for XSS or SSRF vulnerabilities (#1088, #1089) * Check SVG file for XSS or SSRF vulnerabilities (#1088, #1089)
* *
* @param string $file * @param resource $fp
* @param int $from
* @param int $to
* @return bool * @return bool
*/ */
protected static function _checkSVG($file) protected static function _checkSVG($fp, $from, $to)
{ {
$content = file_get_contents($file); if (self::_matchStream('/<script|xlink:href\s*=\s*"(?!data:)/i', $fp, $from, $to))
if (preg_match('/xlink:href\s*=\s*"(?!data:)/i', $content))
{
return false;
}
if (preg_match('/<script/i', $content))
{ {
return false; return false;
} }
return true; return true;
} }
/**
* Check XML file for external entity inclusion.
*
* @param resource $fp
* @param int $from
* @param int $to
* @return bool
*/
protected static function _checkXML($fp, $from, $to)
{
if (self::_matchStream('/<!ENTITY/i', $fp, $from, $to))
{
return false;
}
return true;
}
/**
* Check HTML file for PHP code, server-side includes, and other nastiness.
*
* @param resource $fp
* @param int $from
* @param int $to
* @return bool
*/
protected static function _checkHTML($fp, $from, $to)
{
if (self::_matchStream('/<\?(?!xml\b)|<!--#(?:include|exec|echo|config|fsize|flastmod|printenv)\b/i', $fp, $from, $to))
{
return false;
}
return true;
}
/**
* Match a stream against a regular expression.
*
* This method is useful when dealing with large files,
* because we don't need to load the entire file into memory.
* We allow a generous overlap in case the matching string
* occurs across a block boundary.
*
* @param string $regexp
* @param resource $fp
* @param int $from
* @param int $to
* @param int $block_size (optional)
* @param int $overlap_size (optional)
* @return bool
*/
protected static function _matchStream($regexp, $fp, $from, $to, $block_size = 16384, $overlap_size = 1024)
{
fseek($fp, $position = $from);
while (strlen($content = fread($fp, $block_size + $overlap_size)) > 0)
{
if (preg_match($regexp, $content))
{
return true;
}
fseek($fp, min($to, $position += $block_size));
}
return false;
}
} }
/* End of file : UploadFileFilter.class.php */ /* End of file : UploadFileFilter.class.php */