Viewing file: QueryToken.php (7.3 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Search * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: QueryToken.php 16541 2009-07-07 06:59:03Z bkarwin $ */
/** * @category Zend * @package Zend_Search_Lucene * @subpackage Search * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Search_Lucene_Search_QueryToken { /** * Token types. */ const TT_WORD = 0; // Word const TT_PHRASE = 1; // Phrase (one or several quoted words) const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs const TT_FIELD_INDICATOR = 3; // ':' const TT_REQUIRED = 4; // '+' const TT_PROHIBITED = 5; // '-' const TT_FUZZY_PROX_MARK = 6; // '~' const TT_BOOSTING_MARK = 7; // '^' const TT_RANGE_INCL_START = 8; // '[' const TT_RANGE_INCL_END = 9; // ']' const TT_RANGE_EXCL_START = 10; // '{' const TT_RANGE_EXCL_END = 11; // '}' const TT_SUBQUERY_START = 12; // '(' const TT_SUBQUERY_END = 13; // ')' const TT_AND_LEXEME = 14; // 'AND' or 'and' const TT_OR_LEXEME = 15; // 'OR' or 'or' const TT_NOT_LEXEME = 16; // 'NOT' or 'not' const TT_TO_LEXEME = 17; // 'TO' or 'to' const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
/** * Returns all possible lexeme types. * It's used for syntax analyzer state machine initialization * * @return array */ public static function getTypes() { return array( self::TT_WORD, self::TT_PHRASE, self::TT_FIELD, self::TT_FIELD_INDICATOR, self::TT_REQUIRED, self::TT_PROHIBITED, self::TT_FUZZY_PROX_MARK, self::TT_BOOSTING_MARK, self::TT_RANGE_INCL_START, self::TT_RANGE_INCL_END, self::TT_RANGE_EXCL_START, self::TT_RANGE_EXCL_END, self::TT_SUBQUERY_START, self::TT_SUBQUERY_END, self::TT_AND_LEXEME, self::TT_OR_LEXEME, self::TT_NOT_LEXEME, self::TT_TO_LEXEME, self::TT_NUMBER ); }
/** * TokenCategories */ const TC_WORD = 0; // Word const TC_PHRASE = 1; // Phrase (one or several quoted words) const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8 const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
/** * Token type. * * @var integer */ public $type;
/** * Token text. * * @var integer */ public $text;
/** * Token position within query. * * @var integer */ public $position;
/** * IndexReader constructor needs token type and token text as a parameters. * * @param integer $tokenCategory * @param string $tokText * @param integer $position */ public function __construct($tokenCategory, $tokenText, $position) { $this->text = $tokenText; $this->position = $position + 1; // Start from 1
switch ($tokenCategory) { case self::TC_WORD: if ( strtolower($tokenText) == 'and') { $this->type = self::TT_AND_LEXEME; } else if (strtolower($tokenText) == 'or') { $this->type = self::TT_OR_LEXEME; } else if (strtolower($tokenText) == 'not') { $this->type = self::TT_NOT_LEXEME; } else if (strtolower($tokenText) == 'to') { $this->type = self::TT_TO_LEXEME; } else { $this->type = self::TT_WORD; } break;
case self::TC_PHRASE: $this->type = self::TT_PHRASE; break;
case self::TC_NUMBER: $this->type = self::TT_NUMBER; break;
case self::TC_SYNTAX_ELEMENT: switch ($tokenText) { case ':': $this->type = self::TT_FIELD_INDICATOR; break;
case '+': $this->type = self::TT_REQUIRED; break;
case '-': $this->type = self::TT_PROHIBITED; break;
case '~': $this->type = self::TT_FUZZY_PROX_MARK; break;
case '^': $this->type = self::TT_BOOSTING_MARK; break;
case '[': $this->type = self::TT_RANGE_INCL_START; break;
case ']': $this->type = self::TT_RANGE_INCL_END; break;
case '{': $this->type = self::TT_RANGE_EXCL_START; break;
case '}': $this->type = self::TT_RANGE_EXCL_END; break;
case '(': $this->type = self::TT_SUBQUERY_START; break;
case ')': $this->type = self::TT_SUBQUERY_END; break;
case '!': $this->type = self::TT_NOT_LEXEME; break;
case '&&': $this->type = self::TT_AND_LEXEME; break;
case '||': $this->type = self::TT_OR_LEXEME; break;
default: require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\''); } break;
case self::TC_NUMBER: $this->type = self::TT_NUMBER;
default: require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\''); } } }
|