1 <?php |
1 <?php |
2 |
2 |
3 /* |
3 /* |
4 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
4 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
5 * Version 1.1.1 |
5 * Version 1.0.3 (Dyrad) |
6 * Copyright (C) 2006-2007 Dan Fuhry |
6 * Copyright (C) 2006-2007 Dan Fuhry |
7 * search.php - algorithm used to search pages |
7 * search.php - algorithm used to search pages |
8 * |
8 * |
9 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
9 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
10 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
10 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
192 if ( !$case_sensitive ) |
192 if ( !$case_sensitive ) |
193 $term = strtolower($term); |
193 $term = strtolower($term); |
194 $where_any[] = $term; |
194 $where_any[] = $term; |
195 } |
195 } |
196 |
196 |
197 $col_word = ( $case_sensitive ) ? 'word' : 'lcase(word)'; |
197 $col_word = ( $case_sensitive ) ? 'word' : ENANO_SQLFUNC_LOWERCASE . '(word)'; |
198 $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; |
198 $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; |
199 |
199 |
200 // generate query |
200 // generate query |
201 // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back |
201 // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back |
202 // into one later in the processing stages |
202 // into one later in the processing stages |
351 // at this stage we can search the full page_text column instead of relying on a word list. |
351 // at this stage we can search the full page_text column instead of relying on a word list. |
352 // |
352 // |
353 |
353 |
354 // We can skip this stage if none of these special terms apply |
354 // We can skip this stage if none of these special terms apply |
355 |
355 |
356 $text_col = ( $case_sensitive ) ? 'page_text' : 'lcase(page_text)'; |
356 $text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)'; |
357 $name_col = ( $case_sensitive ) ? 'name' : 'lcase(name)'; |
357 $name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)'; |
358 $text_col_join = ( $case_sensitive ) ? 't.page_text' : 'lcase(t.page_text)'; |
358 $text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)'; |
359 $name_col_join = ( $case_sensitive ) ? 'p.name' : 'lcase(p.name)'; |
359 $name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)'; |
|
360 |
|
361 $concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ? |
|
362 'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' : |
|
363 "'ns=' || t.namespace || ';pid=' || t.page_id"; |
360 |
364 |
361 if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
365 if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
362 { |
366 { |
363 |
367 |
364 $where_any = array(); |
368 $where_any = array(); |
382 $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
386 $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
383 } |
387 } |
384 $and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
388 $and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
385 $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : ''; |
389 $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : ''; |
386 |
390 |
387 $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, p.name FROM ' . table_prefix . "page_text AS t\n" |
391 $sql = 'SELECT ' . $concat_column . ' AS id, p.name FROM ' . table_prefix . "page_text AS t\n" |
388 . " LEFT JOIN " . table_prefix . "pages AS p\n" |
392 . " LEFT JOIN " . table_prefix . "pages AS p\n" |
389 . " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n" |
393 . " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n" |
390 . " WHERE\n $where_any\n $where_req;"; |
394 . " WHERE\n $where_any\n $where_req;"; |
391 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
395 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
392 $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
396 $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
439 $text_where = array(); |
443 $text_where = array(); |
440 foreach ( $scores as $page_id => $_ ) |
444 foreach ( $scores as $page_id => $_ ) |
441 { |
445 { |
442 $text_where[] = $db->escape($page_id); |
446 $text_where[] = $db->escape($page_id); |
443 } |
447 } |
444 $text_where = '( CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'' . implode('\' OR CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'', $text_where) . '\' )'; |
448 $text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )'; |
445 |
449 |
446 if ( count($query['not']) > 0 ) |
450 if ( count($query['not']) > 0 ) |
447 $text_where .= ' AND'; |
451 $text_where .= ' AND'; |
448 |
452 |
449 $where_not = array(); |
453 $where_not = array(); |
454 $term = strtolower($term); |
458 $term = strtolower($term); |
455 $where_not[] = $term; |
459 $where_not[] = $term; |
456 } |
460 } |
457 $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
461 $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
458 |
462 |
459 $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
463 $sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
460 LEFT JOIN " . table_prefix . "pages AS p |
464 LEFT JOIN " . table_prefix . "pages AS p |
461 ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
465 ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
462 WHERE $text_where $where_not;"; |
466 WHERE $text_where $where_not;"; |
463 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
467 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
464 $db->_die('Error is in perform_search(), includes/search.php, query 3'); |
468 $db->_die('Error is in perform_search(), includes/search.php, query 3'); |
567 |
571 |
568 // Sort scores array |
572 // Sort scores array |
569 arsort($scores); |
573 arsort($scores); |
570 |
574 |
571 // Divisor for calculating relevance scores |
575 // Divisor for calculating relevance scores |
572 $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query_phrase['not']) ) * 1.5; |
576 $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5; |
573 |
577 |
574 foreach ( $scores as $page_id => $score ) |
578 foreach ( $scores as $page_id => $score ) |
575 { |
579 { |
576 if ( !isset($page_data[$page_id]) ) |
580 if ( !isset($page_data[$page_id]) ) |
577 // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
581 // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
845 if ( $i - 75 > 0 ) |
849 if ( $i - 75 > 0 ) |
846 { |
850 { |
847 // Navigate backwards until a space character is found |
851 // Navigate backwards until a space character is found |
848 $chunk = substr($pt, 0, ( $i - 75 )); |
852 $chunk = substr($pt, 0, ( $i - 75 )); |
849 $final_chunk = $chunk; |
853 $final_chunk = $chunk; |
850 for ( $j = strlen($chunk); $j > 0; $j = $j - 1 ) |
854 for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 ) |
851 { |
855 { |
852 if ( in_array($chunk{$j}, $space_chars) ) |
856 if ( in_array($chunk{$j}, $space_chars) ) |
853 { |
857 { |
854 $final_chunk = substr($chunk, $j + 1); |
858 $final_chunk = substr($chunk, $j + 1); |
855 break; |
859 break; |