includes/search.php
changeset 326 ab66d6d1f1f4
parent 322 5f1cd51bf1be
child 334 c72b545f1304
child 339 5d62ef764b0d
equal deleted inserted replaced
314:474f8be55943 326:ab66d6d1f1f4
     1 <?php
     1 <?php
     2 
     2 
     3 /*
     3 /*
     4  * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
     4  * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
     5  * Version 1.1.1
     5  * Version 1.0.3 (Dyrad)
     6  * Copyright (C) 2006-2007 Dan Fuhry
     6  * Copyright (C) 2006-2007 Dan Fuhry
     7  * search.php - algorithm used to search pages
     7  * search.php - algorithm used to search pages
     8  *
     8  *
     9  * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
     9  * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
    10  * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
    10  * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
   192       if ( !$case_sensitive )
   192       if ( !$case_sensitive )
   193         $term = strtolower($term);
   193         $term = strtolower($term);
   194       $where_any[] = $term;
   194       $where_any[] = $term;
   195     }
   195     }
   196 
   196 
   197     $col_word = ( $case_sensitive ) ? 'word' : 'lcase(word)';
   197     $col_word = ( $case_sensitive ) ? 'word' : ENANO_SQLFUNC_LOWERCASE . '(word)';
   198     $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : '';
   198     $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : '';
   199 
   199 
   200     // generate query
   200     // generate query
   201     // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back
   201     // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back
   202     // into one later in the processing stages
   202     // into one later in the processing stages
   351   // at this stage we can search the full page_text column instead of relying on a word list.
   351   // at this stage we can search the full page_text column instead of relying on a word list.
   352   //
   352   //
   353 
   353 
   354   // We can skip this stage if none of these special terms apply
   354   // We can skip this stage if none of these special terms apply
   355 
   355 
   356   $text_col = ( $case_sensitive ) ? 'page_text' : 'lcase(page_text)';
   356   $text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)';
   357   $name_col = ( $case_sensitive ) ? 'name' : 'lcase(name)';
   357   $name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)';
   358   $text_col_join = ( $case_sensitive ) ? 't.page_text' : 'lcase(t.page_text)';
   358   $text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)';
   359   $name_col_join = ( $case_sensitive ) ? 'p.name' : 'lcase(p.name)';
   359   $name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)';
       
   360     
       
   361   $concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ?
       
   362     'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' :
       
   363     "'ns=' || t.namespace || ';pid=' || t.page_id";
   360 
   364 
   361   if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 )
   365   if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 )
   362   {
   366   {
   363 
   367 
   364     $where_any = array();
   368     $where_any = array();
   382       $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
   386       $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
   383     }
   387     }
   384     $and_clause = ( $where_any != '' ) ? 'AND ' : '';
   388     $and_clause = ( $where_any != '' ) ? 'AND ' : '';
   385     $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n  ", $where_req) : '';
   389     $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n  ", $where_req) : '';
   386 
   390 
   387     $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, p.name FROM ' . table_prefix . "page_text AS t\n"
   391     $sql = 'SELECT ' . $concat_column . ' AS id, p.name FROM ' . table_prefix . "page_text AS t\n"
   388             . "  LEFT JOIN " . table_prefix . "pages AS p\n"
   392             . "  LEFT JOIN " . table_prefix . "pages AS p\n"
   389             . "    ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n"
   393             . "    ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n"
   390             . "  WHERE\n  $where_any\n  $where_req;";
   394             . "  WHERE\n  $where_any\n  $where_req;";
   391     if ( !($q = $db->sql_unbuffered_query($sql)) )
   395     if ( !($q = $db->sql_unbuffered_query($sql)) )
   392       $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>');
   396       $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>');
   439   $text_where = array();
   443   $text_where = array();
   440   foreach ( $scores as $page_id => $_ )
   444   foreach ( $scores as $page_id => $_ )
   441   {
   445   {
   442     $text_where[] = $db->escape($page_id);
   446     $text_where[] = $db->escape($page_id);
   443   }
   447   }
   444   $text_where = '( CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'' . implode('\' OR CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'', $text_where) . '\' )';
   448   $text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )';
   445 
   449 
   446   if ( count($query['not']) > 0 )
   450   if ( count($query['not']) > 0 )
   447     $text_where .= ' AND';
   451     $text_where .= ' AND';
   448 
   452 
   449   $where_not = array();
   453   $where_not = array();
   454       $term = strtolower($term);
   458       $term = strtolower($term);
   455     $where_not[] = $term;
   459     $where_not[] = $term;
   456   }
   460   }
   457   $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : '';
   461   $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : '';
   458 
   462 
   459   $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t
   463   $sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t
   460             LEFT JOIN " . table_prefix . "pages AS p
   464             LEFT JOIN " . table_prefix . "pages AS p
   461               ON ( p.urlname = t.page_id AND p.namespace = t.namespace )
   465               ON ( p.urlname = t.page_id AND p.namespace = t.namespace )
   462             WHERE $text_where $where_not;";
   466             WHERE $text_where $where_not;";
   463   if ( !($q = $db->sql_unbuffered_query($sql)) )
   467   if ( !($q = $db->sql_unbuffered_query($sql)) )
   464     $db->_die('Error is in perform_search(), includes/search.php, query 3');
   468     $db->_die('Error is in perform_search(), includes/search.php, query 3');
   567 
   571 
   568   // Sort scores array
   572   // Sort scores array
   569   arsort($scores);
   573   arsort($scores);
   570 
   574 
   571   // Divisor for calculating relevance scores
   575   // Divisor for calculating relevance scores
   572   $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query_phrase['not']) ) * 1.5;
   576   $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5;
   573 
   577 
   574   foreach ( $scores as $page_id => $score )
   578   foreach ( $scores as $page_id => $score )
   575   {
   579   {
   576     if ( !isset($page_data[$page_id]) )
   580     if ( !isset($page_data[$page_id]) )
   577       // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
   581       // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
   845           if ( $i - 75 > 0 )
   849           if ( $i - 75 > 0 )
   846           {
   850           {
   847             // Navigate backwards until a space character is found
   851             // Navigate backwards until a space character is found
   848             $chunk = substr($pt, 0, ( $i - 75 ));
   852             $chunk = substr($pt, 0, ( $i - 75 ));
   849             $final_chunk = $chunk;
   853             $final_chunk = $chunk;
   850             for ( $j = strlen($chunk); $j > 0; $j = $j - 1 )
   854             for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 )
   851             {
   855             {
   852               if ( in_array($chunk{$j}, $space_chars) )
   856               if ( in_array($chunk{$j}, $space_chars) )
   853               {
   857               {
   854                 $final_chunk = substr($chunk, $j + 1);
   858                 $final_chunk = substr($chunk, $j + 1);
   855                 break;
   859                 break;