Viewing file: Analyzer.php (5.26 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Analysis * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: Analyzer.php 18947 2009-11-12 11:57:17Z alexander $ */
/** User land classes and interfaces turned on by Zend/Search/Analyzer.php file inclusion. */ /** @todo Section should be removed with ZF 2.0 release as obsolete */ if (!defined('ZEND_SEARCH_LUCENE_COMMON_ANALYZER_PROCESSED')) { /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php'; }
/** * An Analyzer is used to analyze text. * It thus represents a policy for extracting index terms from text. * * Note: * Lucene Java implementation is oriented to streams. It provides effective work * with a huge documents (more then 20Mb). * But engine itself is not oriented such documents. * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays). * * @category Zend * @package Zend_Search_Lucene * @subpackage Analysis * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */
abstract class Zend_Search_Lucene_Analysis_Analyzer { /** * The Analyzer implementation used by default. * * @var Zend_Search_Lucene_Analysis_Analyzer */ private static $_defaultImpl;
/** * Input string * * @var string */ protected $_input = null;
/** * Input string encoding * * @var string */ protected $_encoding = '';
/** * Tokenize text to a terms * Returns array of Zend_Search_Lucene_Analysis_Token objects * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @param string $data * @return array */ public function tokenize($data, $encoding = '') { $this->setInput($data, $encoding);
$tokenList = array(); while (($nextToken = $this->nextToken()) !== null) { $tokenList[] = $nextToken; }
return $tokenList; }
/** * Tokenization stream API * Set input * * @param string $data */ public function setInput($data, $encoding = '') { $this->_input = $data; $this->_encoding = $encoding; $this->reset(); }
/** * Reset token stream */ abstract public function reset();
/** * Tokenization stream API * Get next token * Returns null at the end of stream * * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding) * * @return Zend_Search_Lucene_Analysis_Token|null */ abstract public function nextToken();
/** * Set the default Analyzer implementation used by indexing code. * * @param Zend_Search_Lucene_Analysis_Analyzer $similarity */ public static function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer) { self::$_defaultImpl = $analyzer; }
/** * Return the default Analyzer implementation used by indexing code. * * @return Zend_Search_Lucene_Analysis_Analyzer */ public static function getDefault() { /** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) { self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); }
return self::$_defaultImpl; } }
|