Sorry for the time it took me to answer. I re-wrote many parts of my search method to try finding where the issue comes from.
Since my last post, I tried removing searchable fields from my index, putting the index into a memory disk and halving my dataset. The bottleneck appears to be the find() method. With a 300mo index, my search method eats up 1gb of RAM and 99% of my CPU for 30 seconds.
Here is my code:
Code:
// Check that the consumer we are talking to is a valid consumer.
$this->_checkConsumer();
// Preparing the caching engine.
$frontendOptions = array(
'caching' => true,
'cache_id_prefix' => 'search_',
'lifetime' => 3*(24*60*60), // 3 days
'automatic_serialization' => true,
'ignore_user_abort' => true,
);
$backendOptions = array(
'cache_dir' => '/tmp',
'hashed_directory_level' => 1,
);
$cache = Zend_Cache::factory('Core',
'File',
$frontendOptions,
$backendOptions);
// Check RPP count and displayed page.
$pagination = $this->_getPagination(100, 1);
// Check pagination limits
if ($pagination['rpp'] > 100)
$pagination['rpp'] = 100;
$query = '';
$terms = explode(' ', urldecode($keywords));
for ($i=0; $i<count($terms); $i++)
{
$query .= "\"{$terms[$i]}\"";
if ($i != count($terms) - 1)
$query .= " AND ";
}
$front = Zend_Controller_Front::getInstance();
$params = $front->getRequest()->getParams();
// Filter by country
if (!empty($params['filter_by_country']))
{
$countries = explode(',', $params['filter_by_country']);
$subquery = " AND (";
$subquery .= "countries:{$countries[0]}";
for ($i=1; $i<count($countries); $i++)
{
$subquery .= " OR countries:{$countries[$i]}";
}
$subquery .= ")";
$query .= $subquery;
}
// Filter by category
if (!empty($params['filter_by_category']))
{
$categories = explode(',', $params['filter_by_category']);
$subquery = " AND (";
$subquery .= "categories:{$categories[0]}";
for ($i=1; $i<count($categories); $i++)
{
$subquery .= " AND categories:{$categories[$i]}";
}
$subquery .= ")";
$query .= $subquery;
}
// Filter by subcategory
if (!empty($params['filter_by_subcategory']))
{
$subcategories = explode(',', $params['filter_by_subcategory']);
$subquery = " AND (";
$subquery .= "subcategories:{$subcategories[0]}";
for ($i=1; $i<count($subcategories); $i++)
{
$subquery .= " AND subcategories:{$subcategories[$i]}";
}
$subquery .= ")";
$query .= $subquery;
}
// Hits are cached by query.
$cachedItemID = md5($query);
if(!$hits = $cache->load($cachedItemID))
{
// Cache miss, search and cache
// Open the index
$index = Zend_Search_Lucene::open('../data/products_index');
// Limit the returned result set to prevent request timeouts.
Zend_Search_Lucene::setResultSetLimit(500);
$hits = $index->find($query);
$newHits = array();
foreach ($hits as $hitID => $hit)
{
$newHits[$hitID]['productID'] = $hit->productID;
$newHits[$hitID]['price'] = $hit->price;
}
unset($hits);
$hits = $newHits;
// Cache hits returned
$cache->save($hits, $cachedItemID);
}
// Determine categories/subcategories and their respective count
$fcat = array();
$fsub = array();
foreach ($hits as $hit)
{
$product = new MyProject_Model_Product();
$product->findByID($hit['productID']);
if ($product->exists())
{
$vendors = $product->getVendors();
foreach ($vendors as $vendor)
{
$categories = $vendor->getCategories();
foreach ($categories as $category)
{
$fcat[$category[0]]['label_english'] = $category[1];
$fcat[$category[0]]['label_french'] = $category[2];
if (isset($fcat[$category[0]]['count']))
$fcat[$category[0]]['count']++;
else
$fcat[$category[0]]['count'] = 1;
$fsub[$category[3]]['label_english'] = $category[4];
$fsub[$category[3]]['label_french'] = $category[5];
if (isset($fsub[$category[3]]['count']))
$fsub[$category[3]]['count']++;
else
$fsub[$category[3]]['count'] = 1;
$fsub[$category[3]]['parent'] = $category[0];
}
}
}
}
// Append search results to answer.
$answer = new MyProject_Rest_Answer_ProductsSearchResult(
count($hits),
$pagination['rpp'],
$pagination['page']
);
$beginning = ($pagination['rpp'] * $pagination['page']) - $pagination['rpp'];
$ending = $pagination['rpp'] * $pagination['page'];
$answer->appendCategories($fcat, $fsub);
if (!empty($params['sort']))
{
$sorted = array();
if ($params['sort'] == 'price-asc')
{
// Sort by prices
foreach ($hits as $hitID =>$hit)
{
$sorted[$hitID] = $hit['price'];
}
asort($sorted, SORT_NUMERIC);
}
if ($params['sort'] == 'price-desc')
{
// Sort by prices
foreach ($hits as $hitID =>$hit)
{
$sorted[$hitID] = $hit['price'];
}
arsort($sorted, SORT_NUMERIC);
}
// Set the array pointer to the beginning of the array
reset($sorted);
// Skip the unneed rows
for ($i=0; $i<$beginning; $i++)
{
next($sorted);
}
// Fetch the needed rows
for ($i=$beginning; $i<$ending; $i++)
{
if (empty($hits[key($sorted)]))
break;
$product = new MyProject_Model_Product();
$product->findByID($hits[key($sorted)]['productID']);
$answer->append($product);
next($sorted);
}
}
else
{
for ($i=$beginning; $i<$ending; $i++)
{
if (empty($hits[$i]))
break;
$product = new MyProject_Model_Product();
$product->findByID($hits[$i]['productID']);
$answer->append($product);
}
}
return $answer->toXML();