Replace XE XML parser with new implementation based on SimpleXML

This commit is contained in:
Kijin Sung 2025-05-17 12:44:04 +09:00
parent a1f452fa83
commit db72b670d8
2 changed files with 127 additions and 216 deletions

View file

@ -1,240 +1,46 @@
<?php <?php
/** /**
* Xml_Node_ class * XML Parser class from XE
* Element node or attribute node.
* @author NAVER (developers@xpressengine.com)
* @package /classes/xml
* @version 0.1
*/
#[AllowDynamicProperties]
class Xml_Node_
{
/** In PHP5 this will silence E_STRICT warnings
* for undeclared properties.
* No effect in PHP4
*/
function __get($name)
{
return NULL;
}
}
/**
* XmlParser class
* Class parsing a given xmlrpc request and creating a data object
* @remarks <pre>{
* This class may drops unsupported xml lanuage attributes when multiple language attributes are given.
* For example, if 'xml:lang='ko, en, ch, jp..' is given in a xml file, only ko will be left ignoring all other language
* attributes when kor is only supported language. It seems to work fine now but we did not scrutinze any potential side effects,
* }</pre>
* *
* @author NAVER (developers@xpressengine.com) * Renamed because of conflict with built-in XMLParser class in PHP 8+
* @package /classes/xml *
* @version 0.1 * @deprecated
*/ */
class XeXmlParser class XeXmlParser
{ {
/** /**
* Xml parser * Load an XML file.
* @var resource *
* @deprecated
* @param string $filename
* @return ?object
*/ */
var $oParser = NULL; public static function loadXmlFile($filename): ?object
/**
* Input xml
* @var string
*/
var $input = NULL;
/**
* Output object in array
* @var array
*/
var $output = array();
/**
* The default language type
* @var string
*/
var $lang = "en";
/**
* Load a xml file specified by a filename and parse it to Return the resultant data object
* @param string $filename a file path of file
* @return object|null Returns a data object containing data extracted from a xml file or NULL if a specified file does not exist
*/
function loadXmlFile($filename)
{ {
if(!file_exists($filename)) $filename = strval($filename);
if (file_exists($filename))
{ {
return; return Rhymix\Framework\Parsers\XEXMLParser::loadXMLFile($filename);
}
$buff = FileHandler::readFile($filename);
$oXmlParser = new self();
return $oXmlParser->parse($buff);
}
/**
* Parse xml data to extract values from it and construct data object
* @param string $input a data buffer containing xml data
* @param mixed $arg1 ???
* @param mixed $arg2 ???
* @return object|null Returns a resultant data object or NULL in case of error
*/
function parse($input = '', $arg1 = NULL, $arg2 = NULL)
{
// Save the compile starting time for debugging
$start = microtime(true);
$this->lang = Context::getLangType();
$this->input = $input ? $input : $GLOBALS['HTTP_RAW_POST_DATA'];
$this->input = str_replace(array('', ''), array('', ''), $this->input);
// extracts a supported language
preg_match_all("/xml:lang=\"([^\"].+)\"/i", $this->input, $matches);
// extracts the supported lanuage when xml:lang is used
if(count($matches[1]) && $supported_lang = array_unique($matches[1]))
{
$tmpLangList = array_flip($supported_lang);
// if lang of the first log-in user doesn't exist, apply en by default if exists. Otherwise apply the first lang.
if(!isset($tmpLangList[$this->lang]))
{
if(isset($tmpLangList['en']))
{
$this->lang = 'en';
}
else
{
$this->lang = array_shift($supported_lang);
}
}
// uncheck the language if no specific language is set.
} }
else else
{ {
$this->lang = ''; return null;
}
$this->oParser = xml_parser_create('UTF-8');
//xml_set_object($this->oParser, $this);
xml_set_element_handler($this->oParser, [$this, "_tagOpen"], [$this, "_tagClosed"]);
xml_set_character_data_handler($this->oParser, [$this, "_tagBody"]);
xml_parse($this->oParser, $this->input);
xml_parser_free($this->oParser);
if(!count($this->output))
{
return;
}
$output = array_shift($this->output);
// Save compile starting time for debugging
if (!isset($GLOBALS['__xmlparse_elapsed__']))
{
$GLOBALS['__xmlparse_elapsed__'] = 0;
}
$GLOBALS['__xmlparse_elapsed__'] += microtime(true) - $start;
return $output;
}
/**
* Start element handler.
* @param resource $parse an instance of parser
* @param string $node_name a name of node
* @param array $attrs attributes to be set
* @return array
*/
function _tagOpen($parser, $node_name, $attrs)
{
$obj = new Xml_Node_();
$obj->node_name = strtolower($node_name);
$obj->attrs = $this->_arrToAttrsObj($attrs);
$this->output[] = $obj;
}
/**
* Character data handler
* Variable in the last element of this->output
* @param resource $parse an instance of parser
* @param string $body a data to be added
* @return void
*/
function _tagBody($parser, $body)
{
//if(!trim($body)) return;
$this->output[count($this->output) - 1]->body .= $body;
}
/**
* End element handler
* @param resource $parse an instance of parser
* @param string $node_name name of xml node
* @return void
*/
function _tagClosed($parser, $node_name)
{
$node_name = strtolower($node_name);
$cur_obj = array_pop($this->output);
$parent_obj = &$this->output[count($this->output) - 1];
if($this->lang && $cur_obj->attrs->{'xml:lang'} && $cur_obj->attrs->{'xml:lang'} != $this->lang)
{
return;
}
if($this->lang && ($parent_obj->{$node_name}->attrs->{'xml:lang'} ?? null) && $parent_obj->{$node_name}->attrs->{'xml:lang'} != $this->lang)
{
return;
}
if(isset($parent_obj->{$node_name}))
{
$tmp_obj = $parent_obj->{$node_name};
if(is_array($tmp_obj))
{
$parent_obj->{$node_name}[] = $cur_obj;
}
else
{
$parent_obj->{$node_name} = array($tmp_obj, $cur_obj);
}
}
else
{
if(!is_object($parent_obj))
{
$parent_obj = (object) $parent_obj;
}
$parent_obj->{$node_name} = $cur_obj;
} }
} }
/** /**
* Method to transfer values in an array to a data object * Load an XML string.
* @param array $arr data array *
* @return Xml_Node_ object * @deprecated
* @param string $$input
* @return ?object
*/ */
function _arrToAttrsObj($arr) function parse($input = ''): ?object
{ {
$output = new Xml_Node_(); $input = strval($input !== '' ? $input : $GLOBALS['HTTP_RAW_POST_DATA']);
foreach($arr as $key => $val) return Rhymix\Framework\Parsers\XEXMLParser::loadXMLString($input);
{
$key = strtolower($key);
$output->{$key} = $val;
}
return $output;
} }
} }
/** /**

View file

@ -0,0 +1,105 @@
<?php
namespace Rhymix\Framework\Parsers;
/**
* Generic XML parser that produces output identical to XE's XML parser.
*/
class XEXMLParser
{
/**
* Load an XML file.
*
* @param string $filename
* @param string $lang
* @return ?object
*/
public static function loadXMLFile(string $filename, string $lang = ''): ?object
{
$content = file_get_contents($filename);
return self::loadXMLString($content, $lang);
}
/**
* Load an XML file.
*
* @param string $filename
* @param string $lang
* @return ?object
*/
public static function loadXMLString(string $content, string $lang = ''): ?object
{
// Apply transformations identical to XE's XML parser.
$content = str_replace([chr(1), chr(2)], ['', ''], $content);
$xml = simplexml_load_string($content);
if ($xml === false)
{
return null;
}
// Get the current language.
$lang = $lang ?: (\Context::getLangType() ?: 'en');
// Create the result object.
$result = new \stdClass;
$root_name = $xml->getName();
$result->$root_name = self::_recursiveConvert($xml, $lang);
return $result;
}
/**
* Convert an XML node recursively.
*
* @param \SimpleXMLElement $element
* @param string $lang
* @return object
*/
protected static function _recursiveConvert(\SimpleXMLElement $element, string $lang): \stdClass
{
// Create the basic structure of the node.
$node = new \stdClass;
$node->node_name = $element->getName();
$node->attrs = new \stdClass;
$node->body = trim($element->__toString());
// Add attributes.
$attrs = $element->attributes();
foreach ($attrs as $key => $val)
{
$node->attrs->{$key} = trim($val);
}
$attrs = $element->attributes('xml', true);
foreach ($attrs as $key => $val)
{
$node->attrs->{"xml:$key"} = trim($val);
}
// Recursively process child elements.
foreach ($element->children() as $child)
{
// Skip children that do not match the language.
$attrs = $child->attributes('xml', true);
if (isset($attrs['lang']) && strval($attrs['lang']) !== $lang)
{
continue;
}
$child_name = $child->getName();
$child_node = self::_recursiveConvert($child, $lang);
if (!isset($node->$child_name))
{
$node->$child_name = $child_node;
}
elseif (is_array($node->$child_name))
{
$node->$child_name[] = $child_node;
}
else
{
$node->$child_name = [$node->$child_name, $child_node];
}
}
return $node;
}
}