Viewing file: Term.php (3.65 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id: Term.php 16541 2009-07-07 06:59:03Z bkarwin $ */
/** * A Term represents a word from text. This is the unit of search. It is * composed of two elements, the text of the word, as a string, and the name of * the field that the text occured in, an interned string. * * Note that terms may represent more than words from text fields, but also * things like dates, email addresses, urls, etc. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Search_Lucene_Index_Term { /** * Field name or field number (depending from context) * * @var mixed */ public $field;
/** * Term value * * @var string */ public $text;
/** * Object constructor */ public function __construct($text, $field = null) { $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field; $this->text = $text; }
/** * Returns term key * * @return string */ public function key() { return $this->field . chr(0) . $this->text; }
/** * Get term prefix * * @param string $str * @param integer $length * @return string */ public static function getPrefix($str, $length) { $prefixBytes = 0; $prefixChars = 0; while ($prefixBytes < strlen($str) && $prefixChars < $length) { $charBytes = 1; if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) { $charBytes++; if (ord($str[$prefixBytes]) & 0x20 ) { $charBytes++; if (ord($str[$prefixBytes]) & 0x10 ) { $charBytes++; } } }
if ($prefixBytes + $charBytes > strlen($str)) { // wrong character break; }
$prefixChars++; $prefixBytes += $charBytes; }
return substr($str, 0, $prefixBytes); }
/** * Get UTF-8 string length * * @param string $str * @return string */ public static function getLength($str) { $bytes = 0; $chars = 0; while ($bytes < strlen($str)) { $charBytes = 1; if ((ord($str[$bytes]) & 0xC0) == 0xC0) { $charBytes++; if (ord($str[$bytes]) & 0x20 ) { $charBytes++; if (ord($str[$bytes]) & 0x10 ) { $charBytes++; } } }
if ($bytes + $charBytes > strlen($str)) { // wrong character break; }
$chars++; $bytes += $charBytes; }
return $chars; } }
|