mirror of
https://github.com/Lastorder-DC/rhymix.git
synced 2026-01-16 17:59:55 +09:00
issue 2239 Object, Embed, Iframe source allow by white list
git-svn-id: http://xe-core.googlecode.com/svn/branches/1.5.3.2@11024 201d5d3c-b55e-5fd7-737f-ddc643e51545
This commit is contained in:
parent
8e64ac6e2b
commit
927c944ff3
25 changed files with 1044 additions and 2 deletions
365
classes/security/phphtmlparser/src/htmlparser.inc
Executable file
365
classes/security/phphtmlparser/src/htmlparser.inc
Executable file
|
|
@ -0,0 +1,365 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
* Redistribution of source must retain this copyright notice.
|
||||
*
|
||||
* Jose Solorzano (http://jexpert.us) is a software consultant.
|
||||
*
|
||||
* Contributions by:
|
||||
* - Leo West (performance improvements)
|
||||
*/
|
||||
|
||||
define ("NODE_TYPE_START",0);
|
||||
define ("NODE_TYPE_ELEMENT",1);
|
||||
define ("NODE_TYPE_ENDELEMENT",2);
|
||||
define ("NODE_TYPE_TEXT",3);
|
||||
define ("NODE_TYPE_COMMENT",4);
|
||||
define ("NODE_TYPE_DONE",5);
|
||||
|
||||
/**
|
||||
* Class HtmlParser.
|
||||
* To use, create an instance of the class passing
|
||||
* HTML text. Then invoke parse() until it's false.
|
||||
* When parse() returns true, $iNodeType, $iNodeName
|
||||
* $iNodeValue and $iNodeAttributes are updated.
|
||||
*
|
||||
* To create an HtmlParser instance you may also
|
||||
* use convenience functions HtmlParser_ForFile
|
||||
* and HtmlParser_ForURL.
|
||||
*/
|
||||
class HtmlParser {
|
||||
|
||||
/**
|
||||
* Field iNodeType.
|
||||
* May be one of the NODE_TYPE_* constants above.
|
||||
*/
|
||||
var $iNodeType;
|
||||
|
||||
/**
|
||||
* Field iNodeName.
|
||||
* For elements, it's the name of the element.
|
||||
*/
|
||||
var $iNodeName = "";
|
||||
|
||||
/**
|
||||
* Field iNodeValue.
|
||||
* For text nodes, it's the text.
|
||||
*/
|
||||
var $iNodeValue = "";
|
||||
|
||||
/**
|
||||
* Field iNodeAttributes.
|
||||
* A string-indexed array containing attribute values
|
||||
* of the current node. Indexes are always lowercase.
|
||||
*/
|
||||
var $iNodeAttributes;
|
||||
|
||||
// The following fields should be
|
||||
// considered private:
|
||||
|
||||
var $iHtmlText;
|
||||
var $iHtmlTextLength;
|
||||
var $iHtmlTextIndex = 0;
|
||||
var $iHtmlCurrentChar;
|
||||
var $BOE_ARRAY;
|
||||
var $B_ARRAY;
|
||||
var $BOS_ARRAY;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* Constructs an HtmlParser instance with
|
||||
* the HTML text given.
|
||||
*/
|
||||
function HtmlParser ($aHtmlText) {
|
||||
$this->iHtmlText = $aHtmlText;
|
||||
$this->iHtmlTextLength = strlen($aHtmlText);
|
||||
$this->iNodeAttributes = array();
|
||||
$this->setTextIndex (0);
|
||||
|
||||
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
|
||||
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
|
||||
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Method parse.
|
||||
* Parses the next node. Returns false only if
|
||||
* the end of the HTML text has been reached.
|
||||
* Updates values of iNode* fields.
|
||||
*/
|
||||
function parse() {
|
||||
$text = $this->skipToElement();
|
||||
if ($text != "") {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = $text;
|
||||
return true;
|
||||
}
|
||||
return $this->readTag();
|
||||
}
|
||||
|
||||
function clearAttributes() {
|
||||
$this->iNodeAttributes = array();
|
||||
}
|
||||
|
||||
function readTag() {
|
||||
if ($this->iCurrentChar != "<") {
|
||||
$this->iNodeType = NODE_TYPE_DONE;
|
||||
return false;
|
||||
}
|
||||
$this->clearAttributes();
|
||||
$this->skipMaxInTag ("<", 1);
|
||||
if ($this->iCurrentChar == '/') {
|
||||
$this->moveNext();
|
||||
$name = $this->skipToBlanksInTag();
|
||||
$this->iNodeType = NODE_TYPE_ENDELEMENT;
|
||||
$this->iNodeName = $name;
|
||||
$this->iNodeValue = "";
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
$name = $this->skipToBlanksOrSlashInTag();
|
||||
if (!$this->isValidTagIdentifier ($name)) {
|
||||
$comment = false;
|
||||
if (strpos($name, "!--") === 0) {
|
||||
$ppos = strpos($name, "--", 3);
|
||||
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . ">";
|
||||
$comment = true;
|
||||
}
|
||||
else {
|
||||
$rest = $this->skipToStringInTag ("-->");
|
||||
if ($rest != "") {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . $rest;
|
||||
$comment = true;
|
||||
// Already skipped end of tag
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!$comment) {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = "<" . $name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$this->iNodeType = NODE_TYPE_ELEMENT;
|
||||
$this->iNodeValue = "";
|
||||
$this->iNodeName = $name;
|
||||
while ($this->skipBlanksInTag()) {
|
||||
$attrName = $this->skipToBlanksOrEqualsInTag();
|
||||
if ($attrName != "" && $attrName != "/") {
|
||||
$this->skipBlanksInTag();
|
||||
if ($this->iCurrentChar == "=") {
|
||||
$this->skipEqualsInTag();
|
||||
$this->skipBlanksInTag();
|
||||
$value = $this->readValueInTag();
|
||||
$this->iNodeAttributes[strtolower($attrName)] = $value;
|
||||
}
|
||||
else {
|
||||
$this->iNodeAttributes[strtolower($attrName)] = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
|
||||
function isValidTagIdentifier ($name) {
|
||||
return ereg ("^[A-Za-z0-9_\\-]+$", $name);
|
||||
}
|
||||
|
||||
function skipBlanksInTag() {
|
||||
return "" != ($this->skipInTag ($this->B_ARRAY));
|
||||
}
|
||||
|
||||
function skipToBlanksOrEqualsInTag() {
|
||||
return $this->skipToInTag ($this->BOE_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksInTag() {
|
||||
return $this->skipToInTag ($this->B_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksOrSlashInTag() {
|
||||
return $this->skipToInTag ($this->BOS_ARRAY);
|
||||
}
|
||||
|
||||
function skipEqualsInTag() {
|
||||
return $this->skipMaxInTag ("=", 1);
|
||||
}
|
||||
|
||||
function readValueInTag() {
|
||||
$ch = $this->iCurrentChar;
|
||||
$value = "";
|
||||
if ($ch == "\"") {
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
$value = $this->skipToInTag ("\"");
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
}
|
||||
else if ($ch == "'") {
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
$value = $this->skipToInTag ("'");
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
}
|
||||
else {
|
||||
$value = $this->skipToBlanksInTag();
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
function setTextIndex ($index) {
|
||||
$this->iHtmlTextIndex = $index;
|
||||
if ($index >= $this->iHtmlTextLength) {
|
||||
$this->iCurrentChar = -1;
|
||||
}
|
||||
else {
|
||||
$this->iCurrentChar = $this->iHtmlText{$index};
|
||||
}
|
||||
}
|
||||
|
||||
function moveNext() {
|
||||
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
|
||||
$this->setTextIndex ($this->iHtmlTextIndex + 1);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function skipEndOfTag() {
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
$this->moveNext();
|
||||
return;
|
||||
}
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
|
||||
function skipInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipMaxInTag ($chars, $maxChars) {
|
||||
$sb = "";
|
||||
$count = 0;
|
||||
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
$match = $ch == ">";
|
||||
if (!$match) {
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToElement() {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == "<") {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns text between current position and $needle,
|
||||
* inclusive, or "" if not found. The current index is moved to a point
|
||||
* after the location of $needle, or not moved at all
|
||||
* if nothing is found.
|
||||
*/
|
||||
function skipToStringInTag ($needle) {
|
||||
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
|
||||
if ($pos === false) {
|
||||
return "";
|
||||
}
|
||||
$top = $pos + strlen($needle);
|
||||
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
|
||||
$this->setTextIndex ($top);
|
||||
return $retvalue;
|
||||
}
|
||||
}
|
||||
|
||||
function HtmlParser_ForFile ($fileName) {
|
||||
return HtmlParser_ForURL($fileName);
|
||||
}
|
||||
|
||||
function HtmlParser_ForURL ($url) {
|
||||
$fp = fopen ($url, "r");
|
||||
$content = "";
|
||||
while (true) {
|
||||
$data = fread ($fp, 8192);
|
||||
if (strlen($data) == 0) {
|
||||
break;
|
||||
}
|
||||
$content .= $data;
|
||||
}
|
||||
fclose ($fp);
|
||||
return new HtmlParser ($content);
|
||||
}
|
||||
|
||||
php?>
|
||||
Loading…
Add table
Add a link
Reference in a new issue