mirror of
https://github.com/Lastorder-DC/rhymix.git
synced 2026-01-07 02:31:40 +09:00
Merge pull request #283 from kijin/pr/improve-embed-filter
EmbedFilter에서 사용하던 오래된 라이브러리 제거
This commit is contained in:
commit
78c0417065
5 changed files with 62 additions and 782 deletions
|
|
@ -1,8 +1,6 @@
|
|||
<?php
|
||||
/* Copyright (C) NAVER <http://www.navercorp.com> */
|
||||
|
||||
include _XE_PATH_ . 'classes/security/phphtmlparser/src/htmlparser.inc';
|
||||
|
||||
class EmbedFilter
|
||||
{
|
||||
|
||||
|
|
@ -17,7 +15,6 @@ class EmbedFilter
|
|||
* @var int
|
||||
*/
|
||||
var $allowscriptaccessKey = 0;
|
||||
var $whiteUrlDefaultFile = './classes/security/conf/whitelist.php';
|
||||
var $whiteUrlList = array();
|
||||
var $whiteIframeUrlList = array();
|
||||
var $mimeTypeList = array();
|
||||
|
|
@ -68,106 +65,9 @@ class EmbedFilter
|
|||
|
||||
$this->checkObjectTag($content);
|
||||
$this->checkEmbedTag($content);
|
||||
$this->checkIframeTag($content);
|
||||
$this->checkParamTag($content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check object tag in the content.
|
||||
* @return void
|
||||
*/
|
||||
function checkObjectTag(&$content)
|
||||
{
|
||||
preg_match_all('/<\s*object\s*[^>]+(?:\/?>?)/is', $content, $m);
|
||||
$objectTagList = $m[0];
|
||||
if($objectTagList)
|
||||
{
|
||||
foreach($objectTagList AS $key => $objectTag)
|
||||
{
|
||||
$isWhiteDomain = true;
|
||||
$isWhiteMimetype = true;
|
||||
$isWhiteExt = true;
|
||||
$ext = '';
|
||||
|
||||
$parser = new HtmlParser($objectTag);
|
||||
while($parser->parse())
|
||||
{
|
||||
if(is_array($parser->iNodeAttributes))
|
||||
{
|
||||
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
|
||||
{
|
||||
// data url check
|
||||
if($attrValue && strtolower($attrName) == 'data')
|
||||
{
|
||||
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
|
||||
$isWhiteDomain = $this->isWhiteDomain($attrValue);
|
||||
}
|
||||
|
||||
// mime type check
|
||||
if(strtolower($attrName) == 'type' && $attrValue)
|
||||
{
|
||||
$isWhiteMimetype = $this->isWhiteMimetype($attrValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!$isWhiteDomain || !$isWhiteMimetype)
|
||||
{
|
||||
$content = str_replace($objectTag, htmlspecialchars($objectTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check embed tag in the content.
|
||||
* @return void
|
||||
*/
|
||||
function checkEmbedTag(&$content)
|
||||
{
|
||||
preg_match_all('/<\s*embed\s*[^>]+(?:\/?>?)/is', $content, $m);
|
||||
$embedTagList = $m[0];
|
||||
if($embedTagList)
|
||||
{
|
||||
foreach($embedTagList AS $key => $embedTag)
|
||||
{
|
||||
$isWhiteDomain = TRUE;
|
||||
$isWhiteMimetype = TRUE;
|
||||
$isWhiteExt = TRUE;
|
||||
$ext = '';
|
||||
|
||||
$parser = new HtmlParser($embedTag);
|
||||
while($parser->parse())
|
||||
{
|
||||
if(is_array($parser->iNodeAttributes))
|
||||
{
|
||||
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
|
||||
{
|
||||
// src url check
|
||||
if($attrValue && strtolower($attrName) == 'src')
|
||||
{
|
||||
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
|
||||
$isWhiteDomain = $this->isWhiteDomain($attrValue);
|
||||
}
|
||||
|
||||
// mime type check
|
||||
if(strtolower($attrName) == 'type' && $attrValue)
|
||||
{
|
||||
$isWhiteMimetype = $this->isWhiteMimetype($attrValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!$isWhiteDomain || !$isWhiteMimetype)
|
||||
{
|
||||
$content = str_replace($embedTag, htmlspecialchars($embedTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check iframe tag in the content.
|
||||
* @return void
|
||||
|
|
@ -176,39 +76,52 @@ class EmbedFilter
|
|||
{
|
||||
// check in Purifier class
|
||||
return;
|
||||
}
|
||||
|
||||
preg_match_all('/<\s*iframe\s*[^>]+(?:\/?>?)/is', $content, $m);
|
||||
$iframeTagList = $m[0];
|
||||
if($iframeTagList)
|
||||
{
|
||||
foreach($iframeTagList AS $key => $iframeTag)
|
||||
/**
|
||||
* Check object tag in the content.
|
||||
* @return void
|
||||
*/
|
||||
function checkObjectTag(&$content)
|
||||
{
|
||||
$content = preg_replace_callback('/<\s*object\s*[^>]+(?:\/?>?)/is', function($m) {
|
||||
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
|
||||
foreach ($html->find('object') as $element)
|
||||
{
|
||||
$isWhiteDomain = TRUE;
|
||||
$ext = '';
|
||||
|
||||
$parser = new HtmlParser($iframeTag);
|
||||
while($parser->parse())
|
||||
if ($element->data && !$this->isWhiteDomain($element->data))
|
||||
{
|
||||
if(is_array($parser->iNodeAttributes))
|
||||
{
|
||||
foreach($parser->iNodeAttributes AS $attrName => $attrValue)
|
||||
{
|
||||
// src url check
|
||||
if(strtolower($attrName) == 'src' && $attrValue)
|
||||
{
|
||||
$ext = strtolower(substr(strrchr($attrValue, "."), 1));
|
||||
$isWhiteDomain = $this->isWhiteIframeDomain($attrValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
return escape($m[0], false);
|
||||
}
|
||||
|
||||
if(!$isWhiteDomain)
|
||||
if ($element->type && !$this->isWhiteMimetype($element->type))
|
||||
{
|
||||
$content = str_replace($iframeTag, htmlspecialchars($iframeTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
|
||||
return escape($m[0], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
return $m[0];
|
||||
}, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check embed tag in the content.
|
||||
* @return void
|
||||
*/
|
||||
function checkEmbedTag(&$content)
|
||||
{
|
||||
$content = preg_replace_callback('/<\s*embed\s*[^>]+(?:\/?>?)/is', function($m) {
|
||||
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
|
||||
foreach ($html->find('embed') as $element)
|
||||
{
|
||||
if ($element->src && !$this->isWhiteDomain($element->src))
|
||||
{
|
||||
return escape($m[0], false);
|
||||
}
|
||||
if ($element->type && !$this->isWhiteMimetype($element->type))
|
||||
{
|
||||
return escape($m[0], false);
|
||||
}
|
||||
}
|
||||
return $m[0];
|
||||
}, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -217,36 +130,20 @@ class EmbedFilter
|
|||
*/
|
||||
function checkParamTag(&$content)
|
||||
{
|
||||
preg_match_all('/<\s*param\s*[^>]+(?:\/?>?)/is', $content, $m);
|
||||
$paramTagList = $m[0];
|
||||
if($paramTagList)
|
||||
{
|
||||
foreach($paramTagList AS $key => $paramTag)
|
||||
$content = preg_replace_callback('/<\s*param\s*[^>]+(?:\/?>?)/is', function($m) {
|
||||
$html = Sunra\PhpSimple\HtmlDomParser::str_get_html($m[0]);
|
||||
foreach ($html->find('param') as $element)
|
||||
{
|
||||
$isWhiteDomain = TRUE;
|
||||
$isWhiteExt = TRUE;
|
||||
$ext = '';
|
||||
|
||||
$parser = new HtmlParser($paramTag);
|
||||
while($parser->parse())
|
||||
foreach (array('movie', 'src', 'href', 'url', 'source') as $attr)
|
||||
{
|
||||
if($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value'])
|
||||
if ($element->$attr && !$this->isWhiteDomain($element->$attr))
|
||||
{
|
||||
$name = strtolower($parser->iNodeAttributes['name']);
|
||||
if($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source')
|
||||
{
|
||||
$ext = strtolower(substr(strrchr($parser->iNodeAttributes['value'], "."), 1));
|
||||
$isWhiteDomain = $this->isWhiteDomain($parser->iNodeAttributes['value']);
|
||||
|
||||
if(!$isWhiteDomain)
|
||||
{
|
||||
$content = str_replace($paramTag, htmlspecialchars($paramTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
|
||||
}
|
||||
}
|
||||
return escape($m[0], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return $m[0];
|
||||
}, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -359,8 +256,7 @@ class EmbedFilter
|
|||
*/
|
||||
function _makeWhiteDomainList($whitelist = NULL)
|
||||
{
|
||||
$whiteUrlDefaultFile = FileHandler::getRealPath($this->whiteUrlDefaultFile);
|
||||
$whiteUrlDefaultList = (include $whiteUrlDefaultFile);
|
||||
$whiteUrlDefaultList = (include RX_BASEDIR . 'common/defaults/whitelist.php');
|
||||
$this->extList = $whiteUrlDefaultList['extensions'];
|
||||
$this->mimeTypeList = $whiteUrlDefaultList['mime'];
|
||||
$this->whiteUrlList = array();
|
||||
|
|
|
|||
|
|
@ -1,48 +0,0 @@
|
|||
/* ====================================================================
|
||||
* Based on The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* Jose Solorzano."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The name "Jose Solorzano" must not be used to endorse or promote
|
||||
* products derived from this software without prior written
|
||||
* permission.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Jose Solorzano",
|
||||
* nor may "Jose Solorzano" appear in their name, without prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL STARNETSYS, LLC. OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
*/
|
||||
|
||||
|
|
@ -1,214 +0,0 @@
|
|||
<?
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
* Redistribution of source must retain this copyright notice.
|
||||
*/
|
||||
|
||||
include ("htmlparser.inc");
|
||||
|
||||
/**
|
||||
* Class Html2Text. (HtmlParser example.)
|
||||
* Converts HTML to ASCII attempting to preserve
|
||||
* document structure.
|
||||
* To use, create an instance of Html2Text passing
|
||||
* the text to convert and the desired maximum
|
||||
* number of characters per line. Then invoke
|
||||
* convert() which returns ASCII text.
|
||||
*/
|
||||
class Html2Text {
|
||||
|
||||
// Private fields
|
||||
|
||||
var $iCurrentLine = "";
|
||||
var $iCurrentWord = "";
|
||||
var $iCurrentWordArray;
|
||||
var $iCurrentWordIndex;
|
||||
var $iInScript;
|
||||
var $iListLevel = 0;
|
||||
var $iHtmlText;
|
||||
var $iMaxColumns;
|
||||
var $iHtmlParser;
|
||||
|
||||
// Constants
|
||||
|
||||
var $TOKEN_BR = 0;
|
||||
var $TOKEN_P = 1;
|
||||
var $TOKEN_LI = 2;
|
||||
var $TOKEN_AFTERLI = 3;
|
||||
var $TOKEN_UL = 4;
|
||||
var $TOKEN_ENDUL = 5;
|
||||
|
||||
function Html2Text ($aHtmlText, $aMaxColumns) {
|
||||
$this->iHtmlText = $aHtmlText;
|
||||
$this->iMaxColumns = $aMaxColumns;
|
||||
}
|
||||
|
||||
function convert() {
|
||||
$this->iHtmlParser = new HtmlParser($this->iHtmlText);
|
||||
$wholeText = "";
|
||||
while (($line = $this->getLine()) !== false) {
|
||||
$wholeText .= ($line . "\r\n");
|
||||
}
|
||||
return $wholeText;
|
||||
}
|
||||
|
||||
function getLine() {
|
||||
while (true) {
|
||||
if (!$this->addWordToLine($this->iCurrentWord)) {
|
||||
$retvalue = $this->iCurrentLine;
|
||||
$this->iCurrentLine = "";
|
||||
return $retvalue;
|
||||
}
|
||||
$word = $this->getWord();
|
||||
if ($word === false) {
|
||||
if ($this->iCurrentLine == "") {
|
||||
break;
|
||||
}
|
||||
$retvalue = $this->iCurrentLine;
|
||||
$this->iCurrentLine = "";
|
||||
$this->iInText = false;
|
||||
$this->iCurrentWord = "";
|
||||
return $retvalue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function addWordToLine ($word) {
|
||||
if ($this->iInScript) {
|
||||
return true;
|
||||
}
|
||||
$prevLine = $this->iCurrentLine;
|
||||
if ($word === $this->TOKEN_BR) {
|
||||
$this->iCurrentWord = "";
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_P) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_UL) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_ENDUL) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_LI) {
|
||||
$this->iCurrentWord = $this->TOKEN_AFTERLI;
|
||||
return false;
|
||||
}
|
||||
$toAdd = $word;
|
||||
if ($word === $this->TOKEN_AFTERLI) {
|
||||
$toAdd = "";
|
||||
}
|
||||
if ($prevLine != "") {
|
||||
$prevLine .= " ";
|
||||
}
|
||||
else {
|
||||
$prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI);
|
||||
}
|
||||
$candidateLine = $prevLine . $toAdd;
|
||||
if (strlen ($candidateLine) > $this->iMaxColumns && $prevLine != "") {
|
||||
return false;
|
||||
}
|
||||
$this->iCurrentLine = $candidateLine;
|
||||
return true;
|
||||
}
|
||||
|
||||
function getWord() {
|
||||
while (true) {
|
||||
if ($this->iHtmlParser->iNodeType == NODE_TYPE_TEXT) {
|
||||
if (!$this->iInText) {
|
||||
$words = $this->splitWords($this->iHtmlParser->iNodeValue);
|
||||
$this->iCurrentWordArray = $words;
|
||||
$this->iCurrentWordIndex = 0;
|
||||
$this->iInText = true;
|
||||
}
|
||||
if ($this->iCurrentWordIndex < count($this->iCurrentWordArray)) {
|
||||
$this->iCurrentWord = $this->iCurrentWordArray[$this->iCurrentWordIndex++];
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else {
|
||||
$this->iInText = false;
|
||||
}
|
||||
}
|
||||
else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) {
|
||||
if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_P;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = "";
|
||||
$this->iInScript = true;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_UL;
|
||||
$this->iListLevel++;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_LI;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
}
|
||||
else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) {
|
||||
if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = "";
|
||||
$this->iInScript = false;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_ENDUL;
|
||||
if ($this->iListLevel > 0) {
|
||||
$this->iListLevel--;
|
||||
}
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
}
|
||||
if (!$this->iHtmlParser->parse()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function splitWords ($text) {
|
||||
$words = split ("[ \t\r\n]+", $text);
|
||||
for ($idx = 0; $idx < count($words); $idx++) {
|
||||
$words[$idx] = $this->htmlDecode($words[$idx]);
|
||||
}
|
||||
return $words;
|
||||
}
|
||||
|
||||
function htmlDecode ($text) {
|
||||
// TBD
|
||||
return $text;
|
||||
}
|
||||
|
||||
function getIndentation ($hasLI) {
|
||||
$indent = "";
|
||||
$idx = 0;
|
||||
for ($idx = 0; $idx < ($this->iListLevel - 1); $idx++) {
|
||||
$indent .= " ";
|
||||
}
|
||||
if ($this->iListLevel > 0) {
|
||||
$indent = $hasLI ? ($indent . "- ") : ($indent . " ");
|
||||
}
|
||||
return $indent;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,365 +0,0 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
* Redistribution of source must retain this copyright notice.
|
||||
*
|
||||
* Jose Solorzano (http://jexpert.us) is a software consultant.
|
||||
*
|
||||
* Contributions by:
|
||||
* - Leo West (performance improvements)
|
||||
*/
|
||||
|
||||
define ("NODE_TYPE_START",0);
|
||||
define ("NODE_TYPE_ELEMENT",1);
|
||||
define ("NODE_TYPE_ENDELEMENT",2);
|
||||
define ("NODE_TYPE_TEXT",3);
|
||||
define ("NODE_TYPE_COMMENT",4);
|
||||
define ("NODE_TYPE_DONE",5);
|
||||
|
||||
/**
|
||||
* Class HtmlParser.
|
||||
* To use, create an instance of the class passing
|
||||
* HTML text. Then invoke parse() until it's false.
|
||||
* When parse() returns true, $iNodeType, $iNodeName
|
||||
* $iNodeValue and $iNodeAttributes are updated.
|
||||
*
|
||||
* To create an HtmlParser instance you may also
|
||||
* use convenience functions HtmlParser_ForFile
|
||||
* and HtmlParser_ForURL.
|
||||
*/
|
||||
class HtmlParser {
|
||||
|
||||
/**
|
||||
* Field iNodeType.
|
||||
* May be one of the NODE_TYPE_* constants above.
|
||||
*/
|
||||
var $iNodeType;
|
||||
|
||||
/**
|
||||
* Field iNodeName.
|
||||
* For elements, it's the name of the element.
|
||||
*/
|
||||
var $iNodeName = "";
|
||||
|
||||
/**
|
||||
* Field iNodeValue.
|
||||
* For text nodes, it's the text.
|
||||
*/
|
||||
var $iNodeValue = "";
|
||||
|
||||
/**
|
||||
* Field iNodeAttributes.
|
||||
* A string-indexed array containing attribute values
|
||||
* of the current node. Indexes are always lowercase.
|
||||
*/
|
||||
var $iNodeAttributes;
|
||||
|
||||
// The following fields should be
|
||||
// considered private:
|
||||
|
||||
var $iHtmlText;
|
||||
var $iHtmlTextLength;
|
||||
var $iHtmlTextIndex = 0;
|
||||
var $iHtmlCurrentChar;
|
||||
var $BOE_ARRAY;
|
||||
var $B_ARRAY;
|
||||
var $BOS_ARRAY;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* Constructs an HtmlParser instance with
|
||||
* the HTML text given.
|
||||
*/
|
||||
function HtmlParser ($aHtmlText) {
|
||||
$this->iHtmlText = $aHtmlText;
|
||||
$this->iHtmlTextLength = strlen($aHtmlText);
|
||||
$this->iNodeAttributes = array();
|
||||
$this->setTextIndex (0);
|
||||
|
||||
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
|
||||
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
|
||||
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Method parse.
|
||||
* Parses the next node. Returns false only if
|
||||
* the end of the HTML text has been reached.
|
||||
* Updates values of iNode* fields.
|
||||
*/
|
||||
function parse() {
|
||||
$text = $this->skipToElement();
|
||||
if ($text != "") {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = $text;
|
||||
return true;
|
||||
}
|
||||
return $this->readTag();
|
||||
}
|
||||
|
||||
function clearAttributes() {
|
||||
$this->iNodeAttributes = array();
|
||||
}
|
||||
|
||||
function readTag() {
|
||||
if ($this->iCurrentChar != "<") {
|
||||
$this->iNodeType = NODE_TYPE_DONE;
|
||||
return false;
|
||||
}
|
||||
$this->clearAttributes();
|
||||
$this->skipMaxInTag ("<", 1);
|
||||
if ($this->iCurrentChar == '/') {
|
||||
$this->moveNext();
|
||||
$name = $this->skipToBlanksInTag();
|
||||
$this->iNodeType = NODE_TYPE_ENDELEMENT;
|
||||
$this->iNodeName = $name;
|
||||
$this->iNodeValue = "";
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
$name = $this->skipToBlanksOrSlashInTag();
|
||||
if (!$this->isValidTagIdentifier ($name)) {
|
||||
$comment = false;
|
||||
if (strpos($name, "!--") === 0) {
|
||||
$ppos = strpos($name, "--", 3);
|
||||
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . ">";
|
||||
$comment = true;
|
||||
}
|
||||
else {
|
||||
$rest = $this->skipToStringInTag ("-->");
|
||||
if ($rest != "") {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . $rest;
|
||||
$comment = true;
|
||||
// Already skipped end of tag
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!$comment) {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = "<" . $name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$this->iNodeType = NODE_TYPE_ELEMENT;
|
||||
$this->iNodeValue = "";
|
||||
$this->iNodeName = $name;
|
||||
while ($this->skipBlanksInTag()) {
|
||||
$attrName = $this->skipToBlanksOrEqualsInTag();
|
||||
if ($attrName != "" && $attrName != "/") {
|
||||
$this->skipBlanksInTag();
|
||||
if ($this->iCurrentChar == "=") {
|
||||
$this->skipEqualsInTag();
|
||||
$this->skipBlanksInTag();
|
||||
$value = $this->readValueInTag();
|
||||
$this->iNodeAttributes[strtolower($attrName)] = $value;
|
||||
}
|
||||
else {
|
||||
$this->iNodeAttributes[strtolower($attrName)] = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
|
||||
function isValidTagIdentifier ($name) {
|
||||
return ereg ("^[A-Za-z0-9_\\-]+$", $name);
|
||||
}
|
||||
|
||||
function skipBlanksInTag() {
|
||||
return "" != ($this->skipInTag ($this->B_ARRAY));
|
||||
}
|
||||
|
||||
function skipToBlanksOrEqualsInTag() {
|
||||
return $this->skipToInTag ($this->BOE_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksInTag() {
|
||||
return $this->skipToInTag ($this->B_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksOrSlashInTag() {
|
||||
return $this->skipToInTag ($this->BOS_ARRAY);
|
||||
}
|
||||
|
||||
function skipEqualsInTag() {
|
||||
return $this->skipMaxInTag ("=", 1);
|
||||
}
|
||||
|
||||
function readValueInTag() {
|
||||
$ch = $this->iCurrentChar;
|
||||
$value = "";
|
||||
if ($ch == "\"") {
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
$value = $this->skipToInTag ("\"");
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
}
|
||||
else if ($ch == "'") {
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
$value = $this->skipToInTag ("'");
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
}
|
||||
else {
|
||||
$value = $this->skipToBlanksInTag();
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
function setTextIndex ($index) {
|
||||
$this->iHtmlTextIndex = $index;
|
||||
if ($index >= $this->iHtmlTextLength) {
|
||||
$this->iCurrentChar = -1;
|
||||
}
|
||||
else {
|
||||
$this->iCurrentChar = $this->iHtmlText{$index};
|
||||
}
|
||||
}
|
||||
|
||||
function moveNext() {
|
||||
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
|
||||
$this->setTextIndex ($this->iHtmlTextIndex + 1);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function skipEndOfTag() {
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
$this->moveNext();
|
||||
return;
|
||||
}
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
|
||||
function skipInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipMaxInTag ($chars, $maxChars) {
|
||||
$sb = "";
|
||||
$count = 0;
|
||||
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
$match = $ch == ">";
|
||||
if (!$match) {
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToElement() {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == "<") {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns text between current position and $needle,
|
||||
* inclusive, or "" if not found. The current index is moved to a point
|
||||
* after the location of $needle, or not moved at all
|
||||
* if nothing is found.
|
||||
*/
|
||||
function skipToStringInTag ($needle) {
|
||||
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
|
||||
if ($pos === false) {
|
||||
return "";
|
||||
}
|
||||
$top = $pos + strlen($needle);
|
||||
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
|
||||
$this->setTextIndex ($top);
|
||||
return $retvalue;
|
||||
}
|
||||
}
|
||||
|
||||
function HtmlParser_ForFile ($fileName) {
|
||||
return HtmlParser_ForURL($fileName);
|
||||
}
|
||||
|
||||
function HtmlParser_ForURL ($url) {
|
||||
$fp = fopen ($url, "r");
|
||||
$content = "";
|
||||
while (true) {
|
||||
$data = fread ($fp, 8192);
|
||||
if (strlen($data) == 0) {
|
||||
break;
|
||||
}
|
||||
$content .= $data;
|
||||
}
|
||||
fclose ($fp);
|
||||
return new HtmlParser ($content);
|
||||
}
|
||||
|
||||
php?>
|
||||
|
|
@ -1,8 +1,15 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Rhymix Default iframe/object/iframe Whitelist
|
||||
*
|
||||
* Copyright (c) Rhymix Developers and Contributors
|
||||
*/
|
||||
return array(
|
||||
|
||||
// Allowed domains in <object> or <embed> tag
|
||||
/**
|
||||
* Allowed domains in <object> or <embed> tag
|
||||
*/
|
||||
'object' => array(
|
||||
// YouTube
|
||||
'www.youtube.com/',
|
||||
|
|
@ -49,7 +56,9 @@ return array(
|
|||
'sbsplayer.sbs.co.kr/',
|
||||
),
|
||||
|
||||
// Allowed domains in <iframe> tag
|
||||
/**
|
||||
* Allowed domains in <iframe> tag
|
||||
*/
|
||||
'iframe' => array(
|
||||
// YouTube
|
||||
'www.youtube.com/',
|
||||
|
|
@ -69,7 +78,9 @@ return array(
|
|||
'afree.ca/',
|
||||
),
|
||||
|
||||
// Allowed extensions
|
||||
/**
|
||||
* Allowed extensions in <object> or <embed> tag
|
||||
*/
|
||||
'extensions' => array(
|
||||
'123' => 1,
|
||||
'3ds' => 1,
|
||||
Loading…
Add table
Add a link
Reference in a new issue