Viewing file: Tokenizer.php (10.58 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?PHP /* vim: set expandtab tabstop=4 shiftwidth=4: */ // +----------------------------------------------------------------------+ // | PHP Version 4 | // +----------------------------------------------------------------------+ // | Copyright (c) 1997-2002 The PHP Group | // +----------------------------------------------------------------------+ // | This source file is subject to version 2.0 of the PHP license, | // | that is bundled with this package in the file LICENSE, and is | // | available at through the world-wide-web at | // | http://www.php.net/license/2_02.txt. | // | If you did not receive a copy of the PHP license and are unable to | // | obtain it through the world-wide-web, please send a note to | // | license@php.net so we can mail you a copy immediately. | // +----------------------------------------------------------------------+ // | Authors: Stephan Schmidt <schst@php.net> | // +----------------------------------------------------------------------+
/** * XML/Beautifier/Tokenizer.php * * @category XML * @package XML_Beautifier * @author Stephan Schmidt <schst@php.net> * @todo tokenize DTD * @todo check for xml:space attribute */
/** * XML_Parser is needed to parse the document */ require_once 'XML/Parser.php'; /** * Tokenizer for XML_Beautifier * * This class breaks an XML document in seperate tokens * that will be rendered by an XML_Beautifier renderer. * * @category XML * @package XML_Beautifier * @author Stephan Schmidt <schst@php.net> */ class XML_Beautifier_Tokenizer extends XML_Parser {
/** * current depth * @var integer * @access private */ var $_depth = 0;
/** * stack for all found elements * @var array * @access private */ var $_struct = array();
/** * current parsing mode * @var string * @access private */ var $_mode = "xml"; /** * Tokenize a document * * @param string $document filename or XML document * @param boolean $isFile flag to indicate whether the first parameter is a file */ function tokenize( $document, $isFile = true ) { $this->folding = false; $this->XML_Parser(); $this->_resetVars(); if( $isFile === true ) { $this->setInputFile($document); $result = $this->parse(); } else { $result = $this->parseString($document); } if ($this->isError($result)) { return $result; }
return $this->_struct; } /** * Start element handler for XML parser * * @access protected * @param object $parser XML parser object * @param string $element XML element * @param array $attribs attributes of XML tag * @return void */ function startHandler($parser, $element, $attribs) { $struct = array( "type" => XML_BEAUTIFIER_ELEMENT, "tagname" => $element, "attribs" => $attribs, "contains" => XML_BEAUTIFIER_EMPTY, "depth" => $this->_depth++, "children" => array() );
array_push($this->_struct,$struct); }
/** * End element handler for XML parser * * @access protected * @param object XML parser object * @param string * @return void */ function endHandler($parser, $element) { $struct = array_pop($this->_struct); if ($struct["depth"] > 0) { $parent = array_pop($this->_struct); array_push($parent["children"], $struct); $parent["contains"] = $parent["contains"] | XML_BEAUTIFIER_ELEMENT; array_push($this->_struct, $parent); } else { array_push($this->_struct, $struct); } $this->_depth--; }
/** * Handler for character data * * @access protected * @param object XML parser object * @param string CDATA * @return void */ function cdataHandler($parser, $cdata) { if ((string)$cdata === '') { return true; }
$struct = array( "type" => XML_BEAUTIFIER_CDATA, "data" => $cdata, "depth" => $this->_depth );
$this->_appendToParent($struct); }
/** * Handler for processing instructions * * @access protected * @param object XML parser object * @param string target * @param string data * @return void */ function piHandler($parser, $target, $data) { $struct = array( "type" => XML_BEAUTIFIER_PI, "target" => $target, "data" => $data, "depth" => $this->_depth );
$this->_appendToParent($struct); } /** * Handler for external entities * * @access protected * @param object XML parser object * @param string target * @param string data * @return void */ function entityrefHandler($parser, $open_entity_names, $base, $system_id, $public_id) { $struct = array( "type" => XML_BEAUTIFIER_ENTITY, "name" => $open_entity_names, "depth" => $this->_depth );
$this->_appendToParent($struct); return true; }
/** * Handler for all other stuff * * @access protected * @param object XML parser object * @param string data * @return void */ function defaultHandler($parser, $data) { switch ($this->_mode) { case "xml": $this->_handleXMLDefault($data); break; case "doctype": $this->_handleDoctype($data); break; } }
/** * handler for all data inside the doctype declaration * * @access private * @param string data * @todo improve doctype parsing to split the declaration into seperate tokens */ function _handleDoctype($data) { if (eregi(">", $data)) { $last = $this->_getLastToken(); if ($last["data"] == "]" ) { $this->_mode = "xml"; } }
$struct = array( "type" => XML_BEAUTIFIER_DT_DECLARATION, "data" => $data, "depth" => $this->_depth ); $this->_appendToParent($struct); } /** * handler for all default XML data * * @access private * @param string data */ function _handleXMLDefault($data) { /* * handle comment */ if (strncmp("<!--", $data, 4) == 0) { $regs = array(); eregi("<!--(.+)-->", $data, $regs); $comment = trim($regs[1]); $struct = array( "type" => XML_BEAUTIFIER_COMMENT, "data" => $comment, "depth" => $this->_depth ); /* * handle XML declaration */ } elseif (strncmp("<?", $data, 2) == 0) { preg_match_all('/([a-zA-Z_]+)="((?:\\\.|[^"\\\])*)"/', $data, $match); $cnt = count($match[1]); $attribs = array(); for ($i = 0; $i < $cnt; $i++) { $attribs[$match[1][$i]] = $match[2][$i]; }
if (!isset($attribs["version"])) { $attribs["version"] = "1.0"; } if (!isset($attribs["encoding"])) { $attribs["encoding"] = "UTF-8"; } if (!isset($attribs["standalone"])) { $attribs["standalone"] = true; } $struct = array( "type" => XML_BEAUTIFIER_XML_DECLARATION, "version" => $attribs["version"], "encoding" => $attribs["encoding"], "standalone" => $attribs["standalone"], "depth" => $this->_depth ); } elseif (eregi("^<!DOCTYPE", $data)) { $this->_mode = "doctype"; $struct = array( "type" => XML_BEAUTIFIER_DT_DECLARATION, "data" => $data, "depth" => $this->_depth ); } else { /* * handle all other data */ $struct = array( "type" => XML_BEAUTIFIER_DEFAULT, "data" => $data, "depth" => $this->_depth ); } $this->_appendToParent($struct); return true; } /** * append a struct to the last struct on the stack * * @access private * @param array $struct structure to append */ function _appendToParent($struct) { if ($this->_depth > 0) { $parent = array_pop($this->_struct); array_push($parent["children"], $struct); $parent["contains"] = $parent["contains"] | $struct["type"]; array_push($this->_struct, $parent); return true; } array_push($this->_struct, $struct); }
/** * get the last token * * @access private * @return array */ function _getLastToken() { $parent = array_pop($this->_struct); if (isset($parent["children"]) && is_array($parent["children"])) { $last = array_pop($parent["children"]); array_push($parent["children"], $last); } else { $last = $parent; } array_push($this->_struct, $parent); return $last; } /** * reset all used object properties * * This method is called before parsing a new document * * @access private */ function _resetVars() { $this->_depth = 0; $this->_struct = array(); $this->_mode = "xml"; } } ?>
|