Viewing file: StringParser.php (22.1 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Pdf * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: StringParser.php 18985 2009-11-14 18:51:34Z alexander $ */
/** Internally used classes */ require_once 'Zend/Pdf/Element/Array.php'; require_once 'Zend/Pdf/Element/String/Binary.php'; require_once 'Zend/Pdf/Element/Boolean.php'; require_once 'Zend/Pdf/Element/Dictionary.php'; require_once 'Zend/Pdf/Element/Name.php'; require_once 'Zend/Pdf/Element/Null.php'; require_once 'Zend/Pdf/Element/Numeric.php'; require_once 'Zend/Pdf/Element/Object.php'; require_once 'Zend/Pdf/Element/Object/Stream.php'; require_once 'Zend/Pdf/Element/Reference.php'; require_once 'Zend/Pdf/Element/String.php';
/** * PDF string parser * * @package Zend_Pdf * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Pdf_StringParser { /** * Source PDF * * @var string */ public $data = '';
/** * Current position in a data * * @var integer */ public $offset = 0;
/** * Current reference context * * @var Zend_Pdf_Element_Reference_Context */ private $_context = null;
/** * Array of elements of the currently parsed object/trailer * * @var array */ private $_elements = array();
/** * PDF objects factory. * * @var Zend_Pdf_ElementFactory_Interface */ private $_objFactory = null;
/** * Clean up resources. * * Clear current state to remove cyclic object references */ public function cleanUp() { $this->_context = null; $this->_elements = array(); $this->_objFactory = null; }
/** * Character with code $chCode is white space * * @param integer $chCode * @return boolean */ public static function isWhiteSpace($chCode) { if ($chCode == 0x00 || // null character $chCode == 0x09 || // Tab $chCode == 0x0A || // Line feed $chCode == 0x0C || // Form Feed $chCode == 0x0D || // Carriage return $chCode == 0x20 // Space ) { return true; } else { return false; } }
/** * Character with code $chCode is a delimiter character * * @param integer $chCode * @return boolean */ public static function isDelimiter($chCode ) { if ($chCode == 0x28 || // '(' $chCode == 0x29 || // ')' $chCode == 0x3C || // '<' $chCode == 0x3E || // '>' $chCode == 0x5B || // '[' $chCode == 0x5D || // ']' $chCode == 0x7B || // '{' $chCode == 0x7D || // '}' $chCode == 0x2F || // '/' $chCode == 0x25 // '%' ) { return true; } else { return false; } }
/** * Skip white space * * @param boolean $skipComment */ public function skipWhiteSpace($skipComment = true) { if ($skipComment) { while (true) { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { // Skip comment $this->offset += strcspn($this->data, "\r\n", $this->offset); } else { // Non white space character not equal to '%' is found return; } } } else { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); }
// /** Original (non-optimized) implementation. */ // // while ($this->offset < strlen($this->data)) { // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) { // $this->offset++; // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%' // $this->skipComment(); // } else { // return; // } // } }
/** * Skip comment */ public function skipComment() { while ($this->offset < strlen($this->data)) { if (ord($this->data[$this->offset]) != 0x0A || // Line feed ord($this->data[$this->offset]) != 0x0d // Carriage return ) { $this->offset++; } else { return; } } }
/** * Read comment line * * @return string */ public function readComment() { $this->skipWhiteSpace(false);
/** Check if it's a comment line */ if ($this->data[$this->offset] != '%') { return ''; }
for ($start = $this->offset; $this->offset < strlen($this->data); $this->offset++) { if (ord($this->data[$this->offset]) == 0x0A || // Line feed ord($this->data[$this->offset]) == 0x0d // Carriage return ) { break; } }
return substr($this->data, $start, $this->offset-$start); }
/** * Returns next lexeme from a pdf stream * * @return string */ public function readLexeme() { // $this->skipWhiteSpace(); while (true) { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { $this->offset += strcspn($this->data, "\r\n", $this->offset); } else { break; } }
if ($this->offset >= strlen($this->data)) { return ''; }
if ( /* self::isDelimiter( ord($this->data[$start]) ) */ strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
switch (substr($this->data, $this->offset, 2)) { case '<<': $this->offset += 2; return '<<'; break;
case '>>': $this->offset += 2; return '>>'; break;
default: return $this->data[$this->offset++]; break; } } else { $start = $this->offset; $this->offset += strcspn($this->data, "()<>[]{}/%\x00\t\n\f\r ", $this->offset);
return substr($this->data, $start, $this->offset - $start); } }
/** * Read elemental object from a PDF stream * * @return Zend_Pdf_Element * @throws Zend_Pdf_Exception */ public function readElement($nextLexeme = null) { if ($nextLexeme === null) { $nextLexeme = $this->readLexeme(); }
/** * Note: readElement() method is a public method and could be invoked from other classes. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care * about _elements member management. */ switch ($nextLexeme) { case '(': return ($this->_elements[] = $this->_readString());
case '<': return ($this->_elements[] = $this->_readBinaryString());
case '/': return ($this->_elements[] = new Zend_Pdf_Element_Name( Zend_Pdf_Element_Name::unescape( $this->readLexeme() ) ));
case '[': return ($this->_elements[] = $this->_readArray());
case '<<': return ($this->_elements[] = $this->_readDictionary());
case ')': // fall through to next case case '>': // fall through to next case case ']': // fall through to next case case '>>': // fall through to next case case '{': // fall through to next case case '}': require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.', $this->offset));
default: if (strcasecmp($nextLexeme, 'true') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true)); } else if (strcasecmp($nextLexeme, 'false') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false)); } else if (strcasecmp($nextLexeme, 'null') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Null()); }
$ref = $this->_readReference($nextLexeme); if ($ref !== null) { return ($this->_elements[] = $ref); }
return ($this->_elements[] = $this->_readNumeric($nextLexeme)); } }
/** * Read string PDF object * Also reads trailing ')' from a pdf stream * * @return Zend_Pdf_Element_String * @throws Zend_Pdf_Exception */ private function _readString() { $start = $this->offset; $openedBrackets = 1;
$this->offset += strcspn($this->data, '()\\', $this->offset);
while ($this->offset < strlen($this->data)) { switch (ord( $this->data[$this->offset] )) { case 0x28: // '(' - opened bracket in the string, needs balanced pair. $this->offset++; $openedBrackets++; break;
case 0x29: // ')' - pair to the opened bracket $this->offset++; $openedBrackets--; break;
case 0x5C: // '\\' - escape sequence, skip next char from a check $this->offset += 2; }
if ($openedBrackets == 0) { break; // end of string }
$this->offset += strcspn($this->data, '()\\', $this->offset); } if ($openedBrackets != 0) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start)); }
return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data, $start, $this->offset - $start - 1) )); }
/** * Read binary string PDF object * Also reads trailing '>' from a pdf stream * * @return Zend_Pdf_Element_String_Binary * @throws Zend_Pdf_Exception */ private function _readBinaryString() { $start = $this->offset;
$this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
if ($this->offset >= strlen($this->data) - 1) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start)); }
if ($this->data[$this->offset++] != '>') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset)); }
return new Zend_Pdf_Element_String_Binary( Zend_Pdf_Element_String_Binary::unescape( substr($this->data, $start, $this->offset - $start - 1) )); }
/** * Read array PDF object * Also reads trailing ']' from a pdf stream * * @return Zend_Pdf_Element_Array * @throws Zend_Pdf_Exception */ private function _readArray() { $elements = array();
while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { if ($nextLexeme != ']') { $elements[] = $this->readElement($nextLexeme); } else { return new Zend_Pdf_Element_Array($elements); } }
require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset)); }
/** * Read dictionary PDF object * Also reads trailing '>>' from a pdf stream * * @return Zend_Pdf_Element_Dictionary * @throws Zend_Pdf_Exception */ private function _readDictionary() { $dictionary = new Zend_Pdf_Element_Dictionary();
while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { if ($nextLexeme != '>>') { $nameStart = $this->offset - strlen($nextLexeme);
$name = $this->readElement($nextLexeme); $value = $this->readElement();
if (!$name instanceof Zend_Pdf_Element_Name) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart)); }
$dictionary->add($name, $value); } else { return $dictionary; } }
require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset)); }
/** * Read reference PDF object * * @param string $nextLexeme * @return Zend_Pdf_Element_Reference */ private function _readReference($nextLexeme = null) { $start = $this->offset;
if ($nextLexeme === null) { $objNum = $this->readLexeme(); } else { $objNum = $nextLexeme; } if (!ctype_digit($objNum)) { // it's not a reference $this->offset = $start; return null; }
$genNum = $this->readLexeme(); if (!ctype_digit($genNum)) { // it's not a reference $this->offset = $start; return null; }
$rMark = $this->readLexeme(); if ($rMark != 'R') { // it's not a reference $this->offset = $start; return null; }
$ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
return $ref; }
/** * Read numeric PDF object * * @param string $nextLexeme * @return Zend_Pdf_Element_Numeric */ private function _readNumeric($nextLexeme = null) { if ($nextLexeme === null) { $nextLexeme = $this->readLexeme(); }
return new Zend_Pdf_Element_Numeric($nextLexeme); }
/** * Read inderect object from a PDF stream * * @param integer $offset * @param Zend_Pdf_Element_Reference_Context $context * @return Zend_Pdf_Element_Object */ public function getObject($offset, Zend_Pdf_Element_Reference_Context $context) { if ($offset === null ) { return new Zend_Pdf_Element_Null(); }
// Save current offset to make getObject() reentrant $offsetSave = $this->offset;
$this->offset = $offset; $this->_context = $context; $this->_elements = array();
$objNum = $this->readLexeme(); if (!ctype_digit($objNum)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum))); }
$genNum = $this->readLexeme(); if (!ctype_digit($genNum)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum))); }
$objKeyword = $this->readLexeme(); if ($objKeyword != 'obj') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword))); }
$objValue = $this->readElement();
$nextLexeme = $this->readLexeme();
if( $nextLexeme == 'endobj' ) { /** * Object is not generated by factory (thus it's not marked as modified object). * But factory is assigned to the obect. */ $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
foreach ($this->_elements as $element) { $element->setParentObject($obj); }
// Restore offset value $this->offset = $offsetSave;
return $obj; }
/** * It's a stream object */ if ($nextLexeme != 'stream') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme))); }
if (!$objValue instanceof Zend_Pdf_Element_Dictionary) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme))); }
/** * References are automatically dereferenced at this moment. */ $streamLength = $objValue->Length->value;
/** * 'stream' keyword must be followed by either cr-lf sequence or lf character only. * This restriction gives the possibility to recognize all cases exactly */ if ($this->data[$this->offset] == "\r" && $this->data[$this->offset + 1] == "\n" ) { $this->offset += 2; } else if ($this->data[$this->offset] == "\n" ) { $this->offset++; } else { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme))); }
$dataOffset = $this->offset;
$this->offset += $streamLength;
$nextLexeme = $this->readLexeme(); if ($nextLexeme != 'endstream') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme))); }
$nextLexeme = $this->readLexeme(); if ($nextLexeme != 'endobj') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme))); }
$obj = new Zend_Pdf_Element_Object_Stream(substr($this->data, $dataOffset, $streamLength), (int)$objNum, (int)$genNum, $this->_objFactory->resolve(), $objValue);
foreach ($this->_elements as $element) { $element->setParentObject($obj); }
// Restore offset value $this->offset = $offsetSave;
return $obj; }
/** * Get length of source string * * @return integer */ public function getLength() { return strlen($this->data); }
/** * Get source string * * @return string */ public function getString() { return $this->data; }
/** * Parse integer value from a binary stream * * @param string $stream * @param integer $offset * @param integer $size * @return integer */ public static function parseIntFromStream($stream, $offset, $size) { $value = 0; for ($count = 0; $count < $size; $count++) { $value *= 256; $value += ord($stream[$offset + $count]); }
return $value; }
/** * Set current context * * @param Zend_Pdf_Element_Reference_Context $context */ public function setContext(Zend_Pdf_Element_Reference_Context $context) { $this->_context = $context; }
/** * Object constructor * * Note: PHP duplicates string, which is sent by value, only of it's updated. * Thus we don't need to care about overhead * * @param string $pdfString * @param Zend_Pdf_ElementFactory_Interface $factory */ public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory) { $this->data = $source; $this->_objFactory = $factory; } }
|