Quote:
Originally Posted by lucassus
You could write text analyzer to replace non-standard characters to their equivalents. For instance you can replace
'ą' to 'a' or more complex 'ą' to 'xxxaxxx' and vice-versa during search.
|
Code:
class Lucene_Helper {
protected $_find = array('ą','ż','ś','ź','ę','ć','ń','ó','ł','Ą','Ż','Ś','Ź','Ę','Ć','Ń','Ó','Ł');
protected $_replace = array('a','z','s','x','e','c','n','o','l','A','Z','S','X','E','C','N','O','L');
/**
*
* @param string $string
* @return string
*/
public function simplify($string) {
foreach ($this->_find as $key => $value) {
$string = str_replace($value, 'xxx' . $this->_replace[$key] . 'xxx', $string);
}
$string = iconv('UTF-8', 'ASCII//TRANSLIT', $string);
return $string;
}
/**
*
* @param string $string
* @return string
*/
function unsimplify($string) {
$string = iconv('ASCII//TRANSLIT', 'UTF-8', $string);
foreach ($this->_replace as $key => $value) {
$string = str_replace('xxx' . $value . 'xxx', $this->_find[$key], $string);
}
return $string;
}
}
During indexing:
Code:
$luceneHelper = new Lucene_Helper();
$doc->addField(Zend_Search_Lucene_Field::UnStored('subject', $luceneHelper->simplify($this->subject)));
$doc->addField(Zend_Search_Lucene_Field::UnStored('body', $luceneHelper->simplify($this->body)));
During search:
Code:
$queryStr = $luceneHelper->simplify('out query with zażółć gęsią jaźń ;)');
$query = Zend_Search_Lucene_Search_QueryParser::parse($queryStr);
In view you could highlight matches:
Code:
$luceneHelper = new BluePaprica_Helper_Lucene();
$post = $postDAO->find($post_id)->current();
$highlightedBody = $this->query->highlightMatches($luceneHelper->simplify($post->body));
$highlightedSubject = $this->query->highlightMatches($luceneHelper->simplify($post->subject));
I hope it would help.