# HG changeset patch # User Dan # Date 1195675986 18000 # Node ID 2d4bb97fa75a7f1a8a08cc9f4f4c7a2ed37eb2af # Parent 96524a56d475f7749c3df300841d84a62008c924# Parent e0ec986c0af3c8d2b2f845859ed34ffb70caeb74 Merging in new search algo from Scribus diff -r 96524a56d475 -r 2d4bb97fa75a includes/clientside/css/enano-shared.css --- a/includes/clientside/css/enano-shared.css Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/clientside/css/enano-shared.css Wed Nov 21 15:13:06 2007 -0500 @@ -175,11 +175,19 @@ div.jswindow div.content { padding: 10px; margin: 0; background-color: #FFFFFF; } /* Search results */ +div.search-result, div.search-hibar, div.search-lobar { + font-family: arial, helvetica, sans-serif; + font-size: 8pt; +} div.search-result h3 { font-size: 14pt; margin: 10px 0 0 0; } div.search-result h3 a { color: blue !important; font-weight: normal; padding-bottom: 0; } -div.search-result p { margin: 10px 0 0 0 !important; font-family: arial, helvetica, sans-serif; font-size: 10pt; } -div.search-result span.search-result-info { color: green; } +div.search-result p { margin: 10px 0 0 0 !important; font-family: arial, helvetica, sans-serif; font-size: small; } +div.search-result span.search-result-info { color: #7777CC; } +div.search-result span.search-result-url { color: green; } div.search-result span.search-term, div.search-result span.title-search-term { background-color: #FFFFC0; font-weight: bold; } +div.search-result span.search-result-annotation { font-size: 8pt; } +div.search-hibar { border-top: 1px solid #3366CC; margin-top: 10px; color: #000; background-color: #D5DFF3; padding: 3px; vertical-align: middle; } +div.search-lobar { background-color: #E5EFFF; margin: 0; padding: 5px; } /* * Search box diff -r 96524a56d475 -r 2d4bb97fa75a includes/common.php --- a/includes/common.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/common.php Wed Nov 21 15:13:06 2007 -0500 @@ -156,14 +156,6 @@ // Low level maintenance // -// If the search algorithm backend has been changed, empty out the search cache (the two cache formats are incompatible with each other) -if ( getConfig('last_search_algo') != SEARCH_MODE ) -{ - if ( !$db->sql_query('DELETE FROM '.table_prefix.'search_cache;') ) - $db->_die(); - setConfig('last_search_algo', SEARCH_MODE); -} - // If the AES key size has been changed, bail out and fast if ( !getConfig('aes_key_size') ) { @@ -212,7 +204,6 @@ table_prefix.'groups', table_prefix.'group_members', table_prefix.'acl', - table_prefix.'search_cache', table_prefix.'page_groups', table_prefix.'page_group_members', table_prefix.'tags' diff -r 96524a56d475 -r 2d4bb97fa75a includes/constants.php --- a/includes/constants.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/constants.php Wed Nov 21 15:13:06 2007 -0500 @@ -70,7 +70,6 @@ define('MAX_PMS_PER_BATCH', 7); // The maximum number of users that users can send PMs to in one go; restriction does not apply to users with mod_misc rights define('SEARCH_RESULTS_PER_PAGE', 10); define('MYSQL_MAX_PACKET_SIZE', 1048576); // 1MB; this is the default in MySQL 4.x I think -define('SEARCH_MODE', 'FULLTEXT'); // Can be FULLTEXT or BUILTIN // Sidebar diff -r 96524a56d475 -r 2d4bb97fa75a includes/dbal.php --- a/includes/dbal.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/dbal.php Wed Nov 21 15:13:06 2007 -0500 @@ -142,7 +142,21 @@ if ( !defined('ENANO_INSTALLED') && !defined('MIDGET_INSTALLED') && !defined('IN_ENANO_INSTALL') ) { dc_here('dbal: oops, looks like Enano isn\'t set up. Constants ENANO_INSTALLED, MIDGET_INSTALLED, and IN_ENANO_INSTALL are all undefined.'); - header('Location: install.php'); + // scriptPath isn't set yet - we need to autodetect it to avoid infinite redirects + if ( !defined('scriptPath') ) + { + if ( isset($_SERVER['PATH_INFO']) ) + { + $_SERVER['REQUEST_URI'] = preg_replace(';' . preg_quote($_SERVER['PATH_INFO']) . '$;', '', $_SERVER['REQUEST_URI']); + } + $sp = dirname($_SERVER['REQUEST_URI']); + if($sp == '/' || $sp == '\\') $sp = ''; + define('scriptPath', $sp); + define('contentPath', "$sp/index.php?title="); + } + $loc = scriptPath . '/install.php'; + // header("Location: $loc"); + redirect($loc, 'Enano not installed', 'We can\'t seem to find an Enano installation (valid config file). You will be transferred to the installation wizard momentarily...', 3); exit; } $this->_conn = @mysql_connect($dbhost, $dbuser, $dbpasswd); diff -r 96524a56d475 -r 2d4bb97fa75a includes/functions.php --- a/includes/functions.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/functions.php Wed Nov 21 15:13:06 2007 -0500 @@ -282,6 +282,16 @@ header('Location: ' . $url); header('HTTP/1.1 307 Temporary Redirect'); } + + if ( !is_object($template) ) + { + $template = new template_nodb(); + $template->load_theme('oxygen', 'bleu', false); + $template->tpl_strings['SITE_NAME'] = 'Enano'; + $template->tpl_strings['SITE_DESC'] = 'This site is experiencing a critical error and cannot load.'; + $template->tpl_strings['COPYRIGHT'] = '© ' . date('Y'); + $template->tpl_strings['PAGE_NAME'] = htmlspecialchars($title); + } $template->add_header(''); $template->add_header(' '); + + if ( get_class($template) == 'template_nodb' ) + $template->init_vars(); $template->tpl_strings['PAGE_NAME'] = $title; $template->header(true); - echo '

' . $message . '

If you are not redirected within ' . ( $timeout + 1 ) . ' seconds, please click here.

'; + echo '

' . $message . '

If you are not redirected within ' . $timeout . ' seconds, please click here.

'; $template->footer(true); $db->close(); @@ -2317,6 +2330,7 @@ $blk = $template->makeParserText($block); $inner = ''; $cls = 'row2'; + $total = $num_pages * $perpage - $perpage; if ( $start > 0 ) { $url = sprintf($result_url, abs($start - $perpage)); @@ -2399,8 +2413,6 @@ $inner .= $blk->run(); } - $total = $num_pages * $perpage - $perpage; - if ( $this_page < $num_pages ) { // $cls = ( $cls == 'row1' ) ? 'row2' : 'row1'; @@ -2423,7 +2435,9 @@ if ( $start < $total ) { - $url = sprintf($result_url, abs($start + $perpage)); + $link_offset = abs($start + $perpage); + // i'm tired of debugging a defective sprintf + $url = htmlspecialchars(sprintf($result_url, strval($link_offset))); $link = "Next »"; $cls = ( $cls == 'row1' ) ? 'row2' : 'row1'; $blk->assign_vars(array( @@ -2437,7 +2451,9 @@ $paginator = "\n$begin$inner$end\n"; if ( $total > 1 ) + { $out .= $paginator; + } $cls = 'row2'; diff -r 96524a56d475 -r 2d4bb97fa75a includes/paths.php --- a/includes/paths.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/paths.php Wed Nov 21 15:13:06 2007 -0500 @@ -658,14 +658,14 @@ } /** - * Fetches a MySQL search query to use for Searcher::searchMySQL() + * Generates an SQL query to grab all of the text */ function fetch_page_search_resource() { global $db, $session, $paths, $template, $plugins; // Common objects // sha1('') returns "da39a3ee5e6b4b0d3255bfef95601890afd80709" - $texts = 'SELECT t.page_text,CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id) FROM '.table_prefix.'page_text AS t + $texts = 'SELECT t.page_text, CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id) AS page_idstring, t.page_id, t.namespace FROM '.table_prefix.'page_text AS t LEFT JOIN '.table_prefix.'pages AS p ON ( t.page_id=p.urlname AND t.namespace=p.namespace ) WHERE p.namespace=t.namespace @@ -685,9 +685,25 @@ $texts = Array(); $textq = $db->sql_unbuffered_query($this->fetch_page_search_resource()); if(!$textq) $db->_die(''); - while($row = $db->fetchrow_num()) + while($row = $db->fetchrow()) { - $texts[(string)$row[1]] = $row[0]; + if ( isset($this->nslist[$row['namespace']]) ) + { + $idstring = $this->nslist[$row['namespace']] . sanitize_page_id($row['page_id']); + if ( isset($this->pages[$idstring]) ) + { + $page = $this->pages[$idstring]; + } + else + { + $page = array('name' => dirtify_page_id($row['page_id'])); + } + } + else + { + $page = array('name' => dirtify_page_id($row['page_id'])); + } + $texts[(string)$row['page_idstring']] = $row['page_text'] . ' ' . $page['name']; } $search->buildIndex($texts); // echo '
'.print_r($search->index, true).'
'; @@ -725,10 +741,17 @@ { return $db->get_error(); } + if ( $db->numrows() < 1 ) + return 'E: No rows'; + $idstring = $this->nslist[$namespace] . sanitize_page_id($page_id); + if ( !isset($this->pages[$idstring]) ) + { + return 'E: Can\'t find page metadata'; + } $row = $db->fetchrow(); $db->free_result(); $search = new Searcher(); - $search->buildIndex(Array("ns={$namespace};pid={$page_id}"=>$row['page_text'])); + $search->buildIndex(Array("ns={$namespace};pid={$page_id}"=>$row['page_text'] . ' ' . $this->pages[$idstring]['name'])); $new_index = $search->index; $keys = array_keys($search->index); @@ -739,20 +762,6 @@ } $keys = "word=0x" . implode ( " OR word=0x", $keys ) . ""; - // Zap the cache - $cache = array_keys($search->index); - if ( count($cache) < 1 ) - { - return false; - } - foreach ( $cache as $key => $_unused ) - { - $cache[$key] = $db->escape( $cache[$key] ); - } - $cache = "query LIKE '%" . implode ( "%' OR query LIKE '%", $cache ) . "%'"; - $sql = 'DELETE FROM '.table_prefix.'search_cache WHERE '.$cache; - $db->sql_query($sql); - $query = $db->sql_query('SELECT word,page_names FROM '.table_prefix.'search_index WHERE '.$keys.';'); while($row = $db->fetchrow()) @@ -785,7 +794,7 @@ } - /** + /* * Creates an instance of the Searcher class, including index info * @return object */ diff -r 96524a56d475 -r 2d4bb97fa75a includes/search.php --- a/includes/search.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/search.php Wed Nov 21 15:13:06 2007 -0500 @@ -31,15 +31,13 @@ } /** - * Algorithm to actually do the searching. This system usually works pretty fast (tested and developed on a site with 22 pages) but one - * caveat of this algorithm is that it has to load the entire index into memory. It also requires manual parsing of the search query - * which can be quite CPU-intensive. On the flip side this algorithm is extremely flexible and can be adapted for other uses very easily. + * In Enano versions prior to 1.0.2, this class provided a search function that was keyword-based and allowed boolean searches. It was + * cut from Coblynau and replaced with perform_search(), later in this file, because of speed issues. Now mostly deprecated. The only + * thing remaining is the buildIndex function, which is still used by the path manager and the new search framework. * - * Most of the time, this system is disabled. It is only used when MySQL can't or won't allow FULLTEXT indices. - * * @package Enano * @subpackage Page management frontend - * @license GNU General Public License http://enanocms.org/Special:GNU_General_Public_License + * @license GNU General Public License */ class Searcher @@ -50,30 +48,11 @@ var $warnings; var $match_case = false; - function __construct() - { - $this->warnings = Array(); - } - - function Searcher() - { - $this->__construct(); - } - - function warn($t) - { - if(!in_array($t, $this->warnings)) $this->warnings[] = $t; - } - - function convertCase($text) - { - return ( $this->match_case ) ? $text : strtolower($text); - } - function buildIndex($texts) { $this->index = Array(); - + $stopwords = get_stopwords(); + foreach($texts as $i => $l) { $seed = md5(microtime(true) . mt_rand()); @@ -96,7 +75,7 @@ $words = explode(' ', $letters); foreach($words as $c => $w) { - if(strlen($w) < 4) + if(strlen($w) < 2 || in_array($w, $stopwords)) unset($words[$c]); else $words[$c] = $w; @@ -121,516 +100,808 @@ $this->index[$k] = implode(',', $this->index[$k]); } } +} + +/** + * Searches the site for the specified string and returns an array with each value being an array filled with the following: + * page_id: string, self-explanatory + * namespace: string, self-explanatory + * page_length: integer, the length of the full page in bytes + * page_text: string, the contents of the page (trimmed to ~150 bytes if necessary) + * score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not + * @param string Search query + * @param string Will be filled with any warnings encountered whilst parsing the query + * @param bool Case sensitivity - defaults to false + * @return array + */ + +function perform_search($query, &$warnings, $case_sensitive = false) +{ + global $db, $session, $paths, $template, $plugins; // Common objects + $warnings = array(); - function search($query, $texts) + $query = parse_search_query($query, $warnings); + + // Segregate search terms containing spaces + $query_phrase = array( + 'any' => array(), + 'req' => array() + ); + + foreach ( $query['any'] as $i => $_ ) { - - // OK, let's establish some basics here. Here is the procedure for performing the search: - // * search for items that matches all the terms in the correct order. - // * search for items that match in any order - // * eliminate one term and do the loop all over - - $this->results = Array(); - $query = $this->parseQuery($query); - $querybak = $query; - for($i = sizeof($query['any'])-1; $i >= 0; $i--) + $term =& $query['any'][$i]; + $term = trim($term); + // the indexer only indexes words a-z with apostrophes + if ( preg_match('/[^A-Za-z\']/', $term) ) + { + $query_phrase['any'][] = $term; + unset($term, $query['any'][$i]); + } + } + unset($term); + $query['any'] = array_values($query['any']); + + foreach ( $query['req'] as $i => $_ ) + { + $term =& $query['req'][$i]; + $term = trim($term); + if ( preg_match('/[^A-Za-z\']/', $term) ) { - $res = $this->performCoreSearch($query, $texts, true); - $this->results = enano_safe_array_merge($this->results, $res); - $res = $this->performCoreSearch($query, $texts, false); - $this->results = enano_safe_array_merge($this->results, $res); - unset($query['any'][$i]); + $query_phrase['req'][] = $term; + unset($term, $query['req'][$i]); } - - // Last resort - search for any of the terms instead of all of 'em - $res = $this->performCoreSearch($querybak, $texts, false, true); - $this->results = enano_safe_array_merge($this->results, $res); - - $this->highlightResults($querybak); + } + unset($term); + $query['req'] = array_values($query['req']); + + $results = array(); + $scores = array(); + + // FIXME: Update to use FULLTEXT algo when available. + + // Build an SQL query to load from the index table + if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 ) + { + // This is both because of technical restrictions and devastation that would occur on shared servers/large sites. + $warnings[] = 'You need to have at least one keyword in your search query. Searching only for pages not containing a term is not allowed.'; + return array(); } - // $texts should be a textual MySQL query! - // @todo document - function searchMySQL($query, $texts) + // + // STAGE 1 + // Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance + // + + // Skip this if no indexable words are included + + if ( count($query['any']) > 0 || count($query['req']) > 0 ) { - global $db; - // OK, let's establish some basics here. Here is the procedure for performing the search: - // * search for items that matches all the terms in the correct order. - // * search for items that match in any order - // * eliminate one term and do the loop all over - - $this->results = Array(); - $query = $this->parseQuery($query); - $querytmp = $query; - $querybak = $query; - for($i = sizeof($querytmp['any'])-1; $i >= 0; $i--) + $where_any = array(); + foreach ( $query['any'] as $term ) { - $res = $this->performCoreSearchMySQL($querytmp, $texts, true); - $this->results = enano_safe_array_merge($this->results, $res); - $res = $this->performCoreSearchMySQL($querytmp, $texts, false); - $this->results = enano_safe_array_merge($this->results, $res); - unset($querytmp['any'][$i]); + $term = escape_string_like($term); + if ( !$case_sensitive ) + $term = strtolower($term); + $where_any[] = $term; + } + foreach ( $query['req'] as $term ) + { + $term = escape_string_like($term); + if ( !$case_sensitive ) + $term = strtolower($term); + $where_any[] = $term; } - // Last resort - search for any of the terms instead of all of 'em - $res = $this->performCoreSearchMySQL($querybak, $texts, false, true); - $this->results = enano_safe_array_merge($this->results, $res); + $col_word = ( $case_sensitive ) ? 'word' : 'lcase(word)'; + $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; + + // generate query + // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back + // into one later in the processing stages + $group_by = ( $case_sensitive ) ? '' : ' GROUP BY lcase(word);'; + $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any}{$group_by}"; + if ( !($q = $db->sql_unbuffered_query($sql)) ) + $db->_die('Error is in perform_search(), includes/search.php, query 1'); - $this->highlightResults($querybak); - } - - /** - * This method assumes that $query is already parsed and $texts is an (associative) array of possible results - * @param array $query A search query parsed with Searcher::parseQuery() - * @param array $texts The list of possible results - * @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query - * @return array An associative array of results - * @access private - */ - function performCoreSearch($query, $texts, $exact_order = false, $any = false) - { - $textkeys = array_keys($texts); - $results = Array(); - if($exact_order) + $word_tracking = array(); + if ( $row = $db->fetchrow() ) { - $query = $this->concatQueryTerms($query); - } - $query['trm'] = array_merge($query['any'], $query['req']); - # Find all remotely possible results first - // Single-word terms - foreach($this->index as $term => $keys) - { - foreach($query['trm'] as $userterm) + do { - if($this->convertCase($userterm) == $this->convertCase($term)) + // get page list + $pages =& $row['page_names']; + $ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')'; + if ( strpos($pages, ',') ) { - $k = explode(',', $keys); - foreach($k as $idxkey) + // the term occurs in more than one page + + // Find page IDs that contain commas + // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older + // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for + // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation + // of the previous ID and should be concatenated to the previous entry. + $matches = explode(',', $pages); + $prev = false; + foreach ( $matches as $i => $_ ) { - if(isset($texts[$idxkey])) + $match =& $matches[$i]; + if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) { - $results[$idxkey] = $texts[$idxkey]; + $matches[$prev] .= ',' . $match; + unset($match, $matches[$i]); + continue; + } + $prev = $i; + } + unset($match); + + // Iterate through each of the results, assigning scores based on how many times the page has shown up. + // This works because this phase of the search is strongly word-based not page-based. If a page shows up + // multiple times while fetching the result rows from the search_index table, it simply means that page + // contains more than one of the terms the user searched for. + + foreach ( $matches as $match ) + { + if ( isset($scores[$match]) ) + { + $scores[$match]++; } else { - if(preg_match('#^([0-9]+)$#', $idxkey)) - { - $idxkey = intval($idxkey); - if(isset($texts[$idxkey])) $results[$idxkey] = $texts[$idxkey]; - } + $scores[$match] = 1; } - } - } - } - } - // Quoted terms - foreach($query['trm'] as $userterm) - { - if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue; - foreach($texts as $k => $t) - { - if(strstr($this->convertCase($t), $this->convertCase($userterm))) - { - // We have a match! - if(!isset($results[$k])) $results[$k] = $t; - } - } - } - // Remove excluded terms - foreach($results as $k => $r) - { - foreach($query['not'] as $not) - { - if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]); - } - } - if(!$any) - { - // Remove results not containing all terms - foreach($results as $k => $r) - { - foreach($query['any'] as $term) - { - if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); - } - } - } - // Remove results not containing all required terms - foreach($results as $k => $r) - { - foreach($query['req'] as $term) - { - if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); - } - } - return $results; - } - - /** - * This is the same as performCoreSearch, but $texts should be a MySQL result resource. This can save tremendous amounts of memory on large sites. - * @param array $query A search query parsed with Searcher::parseQuery() - * @param string $texts A text MySQL query that selects the text as the first column and the index key as the second column - * @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query - * @return array An associative array of results - * @access private - */ - function performCoreSearchMySQL($query, $texts, $exact_order = false, $any = false) - { - global $db; - $results = Array(); - if($exact_order) - { - $query = $this->concatQueryTerms($query); - } - $query['trm'] = array_merge($query['any'], $query['req']); - # Find all remotely possible results first - $texts = $db->sql_query($texts); - if ( !$texts ) - $db->_die('The error is in the search engine.'); - if ( $r = $db->fetchrow_num($texts) ) - { - do - { - foreach($this->index as $term => $keys) - { - foreach($query['trm'] as $userterm) - { - if($this->convertCase($userterm) == $this->convertCase($term)) + if ( isset($word_tracking[$match]) ) { - $k = explode(',', $keys); - foreach($k as $idxkey) - { - $row[0] = $r[0]; - $row[1] = $r[1]; - if(!isset($row[1])) - { - echo('PHP PARSER BUG: $row[1] is set but not set... includes/search.php:'.__LINE__); - $GLOBALS['template']->footer(); - exit; - } - if($row[1] == $idxkey) - $results[$idxkey] = $row[0]; - else - { - if(preg_match('#^([0-9]+)$#', $idxkey)) - { - $idxkey = intval($idxkey); - if($row[1] == $idxkey) $results[$idxkey] = $row[0]; - } - } - } + $word_tracking[$match][] = $row['word']; + } + else + { + $word_tracking[$match] = array($row['word']); } } } - // Quoted terms - foreach($query['trm'] as $userterm) + else { - if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue; - if(strstr($this->convertCase($r[0]), $this->convertCase($userterm))) + // the term only occurs in one page + if ( isset($scores[$pages]) ) { - // We have a match! - if(!isset($results[$r[1]])) $results[$r[1]] = $r[0]; + $scores[$pages]++; } - } - } while( $r = $db->fetchrow_num($texts) ); - } - // Remove excluded terms - foreach($results as $k => $r) - { - foreach($query['not'] as $not) - { - if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]); - } - } - if(!$any) - { - // Remove results not containing all terms - foreach($results as $k => $r) - { - foreach($query['any'] as $term) - { - if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); + else + { + $scores[$pages] = 1; + } + if ( isset($word_tracking[$pages]) ) + { + $word_tracking[$pages][] = $row['word']; + } + else + { + $word_tracking[$pages] = array($row['word']); + } } } - } - // Remove results not containing all terms - foreach($results as $k => $r) - { - foreach($query['req'] as $term) - { - if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); - } + while ( $row = $db->fetchrow() ); } - return $results; - } - - function concatQueryTerms($query) - { - $tmp = implode(' ', $query['any']); - unset($query['any']); - $query['any'] = Array(0 => $tmp); - return $query; - } + $db->free_result(); - /** - * Builds a basic assoc array with a more organized version of the query - */ - - function parseQuery($query) - { - $ret = array( - 'any' => array(), - 'req' => array(), - 'not' => array() - ); - $terms = array(); - $in_quote = false; - $start_term = 0; - $just_finished = false; - for ( $i = 0; $i < strlen($query); $i++ ) + // + // STAGE 2: FIRST ELIMINATION ROUND + // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it + // + + foreach ( $query['req'] as $term ) { - $chr = $query{$i}; - $prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; - $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; - - if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) + foreach ( $word_tracking as $i => $page ) { - $len = ( $next == '' ) ? $i + 1 : $i - $start_term; - $word = substr ( $query, $start_term, $len ); - $terms[] = $word; - $start_term = $i + 1; - } - - elseif ( $chr == '"' && $in_quote && $prev != '\\' ) - { - $word = substr ( $query, $start_term, $i - $start_term + 1 ); - $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; - $in_quote = false; - } - - elseif ( $chr == '"' && !$in_quote ) - { - $in_quote = true; - $start_pos = $i; - } - - } - - $ticker = 0; - - foreach ( $terms as $element => $__unused ) - { - $atom =& $terms[$element]; - - $ticker++; - - if ( $ticker == 20 ) - { - $this->warn('Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.'); - break; - } - - if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) - { - $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); - if ( strlen ( $word ) < 4 ) - { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['req'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['req'][] = $word; - } - elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) - { - $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); - if ( strlen ( $word ) < 4 ) + if ( !in_array($term, $page) ) { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['not'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['not'][] = $word; - } - elseif ( substr ( $atom, 0, 1 ) == '+' ) - { - $word = substr ( $atom, 1 ); - if ( strlen ( $word ) < 4 ) - { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['req'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['req'][] = $word; - } - elseif ( substr ( $atom, 0, 1 ) == '-' ) - { - $word = substr ( $atom, 1 ); - if ( strlen ( $word ) < 4 ) - { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['not'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['not'][] = $word; - } - elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) - { - $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); - if ( strlen ( $word ) < 4 ) - { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['any'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['any'][] = $word; - } - else - { - $word = $atom; - if ( strlen ( $word ) < 4 ) - { - $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); - $ticker--; - continue; - } - if(in_array($word, $ret['any'])) - { - $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); - $ticker--; - continue; - } - $ret['any'][] = $word; - } - } - return $ret; - } - - function highlightResults($query, $starttag = '', $endtag = '') - { - $query['trm'] = array_merge($query['any'], $query['req']); - //die('
'.print_r($query, true).'
'); - foreach($query['trm'] as $q) - { - foreach($this->results as $k => $r) - { - $startplace = 0; - //$this->results[$k] = htmlspecialchars($this->results[$k]); - for($i = 0; $i < strlen($r); $i++) - { - $word = substr($r, $i, strlen($q)); - if($this->convertCase($word) == $this->convertCase($q)) - { - $word = $starttag . $word . $endtag; - $this->results[$k] = substr($r, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+999999); - $startplace = $i - 75; - if($startplace < 0) $startplace = 0; - $this->results[$k] = '...'.trim(substr($this->results[$k], $startplace, strlen($word) + 150)).'...'; - continue 2; - } + unset($word_tracking[$i], $scores[$i]); } } } } + // + // STAGE 3: PHRASE SEARCHING + // Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because + // at this stage we can search the full page_text column instead of relying on a word list. + // + + // We can skip this stage if none of these special terms apply + + $text_col = ( $case_sensitive ) ? 'page_text' : 'lcase(page_text)'; + + if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) + { + + $where_any = array(); + foreach ( $query_phrase['any'] as $term ) + { + $term = escape_string_like($term); + if ( !$case_sensitive ) + $term = strtolower($term); + $where_any[] = $term; + } + + $where_any = ( count($where_any) > 0 ) ? "( $text_col LIKE '%" . implode("%' OR $text_col LIKE '%", $where_any) . "%' )" : ''; + + // Also do required columns, but use AND to ensure that all required terms are included + $where_req = array(); + foreach ( $query_phrase['req'] as $term ) + { + $term = escape_string_like($term); + if ( !$case_sensitive ) + $term = strtolower($term); + $where_req[] = $term; + } + $and_clause = ( $where_any != '' ) ? 'AND ' : ''; + $where_req = ( count($where_req) > 0 ) ? "{$and_clause}$text_col LIKE '%" . implode("%' AND $text_col LIKE '%", $where_req) . "%'" : ''; + + $sql = 'SELECT CONCAT("ns=",namespace,";pid=",page_id) AS id FROM ' . table_prefix . "page_text WHERE $where_any $where_req;"; + if ( !($q = $db->sql_unbuffered_query($sql)) ) + $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:
(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '
'); + + if ( $row = $db->fetchrow() ) + { + do + { + $id =& $row['id']; + if ( isset($scores[$id]) ) + { + $scores[$id]++; + } + else + { + $scores[$id] = 1; + } + } + while ( $row = $db->fetchrow() ); + } + $db->free_result(); + } + + // + // STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS + // At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query + // eliminate any terms that shouldn't be in there. + // + + // Generate master word list for the highlighter + $word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req'])); + + $text_where = array(); + foreach ( $scores as $page_id => $_ ) + { + $text_where[] = $db->escape($page_id); + } + $text_where = '( CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'' . implode('\' OR CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'', $text_where) . '\' )'; + + if ( count($query['not']) > 0 ) + $text_where .= ' AND'; + + $where_not = array(); + foreach ( $query['not'] as $term ) + { + $term = escape_string_like($term); + if ( !$case_sensitive ) + $term = strtolower($term); + $where_not[] = $term; + } + $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; + + $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t + LEFT JOIN " . table_prefix . "pages AS p + ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) + WHERE $text_where $where_not;"; + if ( !($q = $db->sql_unbuffered_query($sql)) ) + $db->_die('Error is in perform_search(), includes/search.php, query 3'); + + $page_data = array(); + if ( $row = $db->fetchrow() ) + { + do + { + $row['page_text'] = htmlspecialchars($row['page_text']); + $row['page_name'] = htmlspecialchars($row['page_name']); + + // Highlight results (this is wonderfully automated) + $row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive); + if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) ) + { + $row['page_text'] = substr($row['page_text'], 0, 150) . '...'; + } + $row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive); + + $page_data[$row['id']] = $row; + } + while ( $row = $db->fetchrow() ); + } + $db->free_result(); + + // + // STAGE 5 - SPECIAL PAGE TITLE SEARCH + // Iterate through $paths->pages and check the titles for search terms. Score accordingly. + // + + foreach ( $paths->pages as $page ) + { + if ( $page['namespace'] != 'Special' ) + continue; + $idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons']; + $any = array_merge($query['any'], $query_phrase['any']); + foreach ( $any as $term ) + { + if ( $case_sensitive ) + { + if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) ) + { + ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; + } + } + else + { + if ( strstr(strtolower($page['name']), strtolower($term)) || strstr(strtolower($page['urlname_nons']), strtolower($term)) ) + { + ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; + } + } + } + if ( isset($scores[$idstring]) ) + { + $page_data[$idstring] = array( + 'page_name' => $page['name'], + 'page_text' => '', + 'page_id' => $page['urlname_nons'], + 'namespace' => $page['namespace'], + 'score' => $scores[$idstring], + 'page_length' => 1, + 'page_note' => '[Special page]' + ); + } + } + + // + // STAGE 6 - SECOND ELIMINATION ROUND + // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it + // + + $required = array_merge($query['req'], $query_phrase['req']); + foreach ( $required as $term ) + { + foreach ( $page_data as $id => $page ) + { + if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) ) + { + unset($page_data[$id]); + } + } + } + + // At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own + // pages and add text, etc. as necessary. + // Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly + + $code = $plugins->setHook('search_global_inner'); + foreach ( $code as $cmd ) + { + eval($cmd); + } + + // a marvelous debugging aid :-) + // die('
' . htmlspecialchars(print_r($page_data, true)) . '
'); + + // + // STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS + // We now have the complete results of the search. We need to trim text down to show only portions of the page containing search + // terms, highlight any search terms within the page, and sort the final results array in descending order of score. + // + + // Sort scores array + arsort($scores); + + // Divisor for calculating relevance scores + $divisor = count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query_phrase['not']); + + foreach ( $scores as $page_id => $score ) + { + if ( !isset($page_data[$page_id]) ) + // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term + continue; + + // Make a copy of the datum, then delete the original (it frees up a LOT of RAM) + $datum = $page_data[$page_id]; + unset($page_data[$page_id]); + + // This is an internal value used for sorting - it's no longer needed. + unset($datum['id']); + + // Calculate score + if ( $score > $divisor ) + $score = $divisor; + $datum['score'] = round($score / $divisor, 2) * 100; + + // Store it in our until-now-unused results array + $results[] = $datum; + } + + // Our work here is done. :-D + return $results; } /** - * Developer-friendly way to do searches. :-) Uses the MySQL FULLTEXT index type. - * @package Enano - * @subpackage Search + * Parses a search query into an associative array. The resultant array will be filled with the following values, each an array: + * any: Search terms that can optionally be present + * req: Search terms that must be present + * not: Search terms that should not be present + * @param string Search query + * @param array Will be filled with parser warnings, such as query too short, words too short, etc. + * @return array */ -class MySQL_Fulltext_Search { - - /** - * Performs a search. - * @param string The search query - * @return resource MySQL result resource - this is an UNBUFFERED query. - */ - - function search($query) +function parse_search_query($query, &$warnings) +{ + $stopwords = get_stopwords(); + $ret = array( + 'any' => array(), + 'req' => array(), + 'not' => array() + ); + $warnings = array(); + $terms = array(); + $in_quote = false; + $start_term = 0; + $just_finished = false; + for ( $i = 0; $i < strlen($query); $i++ ) { - global $db, $session, $paths, $template, $plugins; // Common objects + $chr = $query{$i}; + $prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; + $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; - $fulltext_col = 'MATCH(t.page_id,t.namespace,p.name,t.page_text) AGAINST (\'' . $db->escape($query) . '\' IN BOOLEAN MODE)'; - $sql = "SELECT t.page_text,CONCAT('ns=',t.namespace,';pid=',t.page_id) AS page_identifier, $fulltext_col AS score, CHAR_LENGTH(t.page_text) AS length FROM ".table_prefix."page_text AS t - LEFT JOIN ".table_prefix."pages AS p - ON ( p.urlname=t.page_id AND p.namespace=t.namespace) - WHERE $fulltext_col > 0 - AND p.visible=1 - ORDER BY score DESC;"; - $q = $db->sql_unbuffered_query($sql); - if ( !$q ) - $db->_die(); + if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) + { + $len = ( $next == '' ) ? $i + 1 : $i - $start_term; + $word = substr ( $query, $start_term, $len ); + $terms[] = $word; + $start_term = $i + 1; + } - return $q; + elseif ( $chr == '"' && $in_quote && $prev != '\\' ) + { + $word = substr ( $query, $start_term, $i - $start_term + 1 ); + $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; + $in_quote = false; + } + + elseif ( $chr == '"' && !$in_quote ) + { + $in_quote = true; + $start_pos = $i; + } + } - function highlight_result($query, $result) + $ticker = 0; + + foreach ( $terms as $element => $__unused ) { - global $db, $session, $paths, $template, $plugins; // Common objects - $search = new Searcher(); - $parsed_query = $search->parseQuery($query); - return $this->highlight_result_inner($query, $result); + $atom =& $terms[$element]; + + $ticker++; + + if ( $ticker == 20 ) + { + $warnings[] = 'Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.'; + break; + } + + if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) + { + $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); + if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) + { + $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; + $ticker--; + continue; + } + if(in_array($word, $ret['req'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['req'][] = $word; + } + elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) + { + $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); + if ( strlen ( $word ) < 4 ) + { + $warnings[] = 'One or more of your search terms was excluded because terms must be at least 4 characters in length.'; + $ticker--; + continue; + } + if(in_array($word, $ret['not'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['not'][] = $word; + } + elseif ( substr ( $atom, 0, 1 ) == '+' ) + { + $word = substr ( $atom, 1 ); + if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) + { + $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; + $ticker--; + continue; + } + if(in_array($word, $ret['req'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['req'][] = $word; + } + elseif ( substr ( $atom, 0, 1 ) == '-' ) + { + $word = substr ( $atom, 1 ); + if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) + { + $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; + $ticker--; + continue; + } + if(in_array($word, $ret['not'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['not'][] = $word; + } + elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) + { + $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); + if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) + { + $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; + $ticker--; + continue; + } + if(in_array($word, $ret['any'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['any'][] = $word; + } + else + { + $word = $atom; + if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) + { + $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; + $ticker--; + continue; + } + if(in_array($word, $ret['any'])) + { + $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; + $ticker--; + continue; + } + $ret['any'][] = $word; + } + } + return $ret; +} + +/** + * Escapes a string for use in a LIKE clause. + * @param string + * @return string + */ + +function escape_string_like($string) +{ + global $db, $session, $paths, $template, $plugins; // Common objects + $string = $db->escape($string); + $string = str_replace(array('%', '_'), array('\%', '\_'), $string); + return $string; +} + +/** + * Wraps tags around all words in both the specified array. Does not perform any clipping. + * @param string Text to process + * @param array Word list + * @param bool If true, searches case-sensitively when highlighting words + * @return string + */ + +function highlight_search_result($pt, $words, $case_sensitive = false) +{ + $words2 = array(); + for ( $i = 0; $i < sizeof($words); $i++) + { + if(!empty($words[$i])) + $words2[] = preg_quote($words[$i]); } - function highlight_result_inner($query, $fulltext, $starttag = '', $endtag = '') + $flag = ( $case_sensitive ) ? '' : 'i'; + $regex = '/(' . implode('|', $words2) . ')/' . $flag; + $pt = preg_replace($regex, '\\1', $pt); + + return $pt; +} + +/** + * Wraps tags around all words in both the specified array and the specified text and clips the text to + * an appropriate length. + * @param string Text to process + * @param array Word list + * @param bool If true, searches case-sensitively when highlighting words + * @return string + */ + +function highlight_and_clip_search_result($pt, $words, $case_sensitive = false) +{ + $cut_off = false; + + $space_chars = Array("\t", "\n", "\r", " "); + + $pt = highlight_search_result($pt, $words, $case_sensitive); + + foreach ( $words as $word ) { - $result = false; - $query['trm'] = array_merge($query['any'], $query['req']); - //die('
'.print_r($query, true).'
'); - foreach($query['trm'] as $q) + // Boldface searched words + $ptlen = strlen($pt); + for ( $i = 0; $i < $ptlen; $i++ ) { - $startplace = 0; - //$this->results[$k] = htmlspecialchars($this->results[$k]); - for($i = 0; $i < strlen($r); $i++) + $len = strlen($word); + if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) { - $word = substr($r, $i, strlen($q)); - if($this->convertCase($word) == $this->convertCase($q)) + $chunk1 = substr($pt, 0, $i); + $chunk2 = substr($pt, $i, $len); + $chunk3 = substr($pt, ( $i + $len )); + $pt = $chunk1 . $chunk2 . $chunk3; + $ptlen = strlen($pt); + // Cut off text to 150 chars or so + if ( !$cut_off ) { - $word = $starttag . $word . $endtag; - $result = substr($fulltext, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+99999999); - $startplace = $i - 75; - if($startplace < 0) $startplace = 0; - $result = '...'.trim(substr($result, $startplace, strlen($word) + 150)).'...'; - continue 2; + $cut_off = true; + if ( $i - 75 > 0 ) + { + // Navigate backwards until a space character is found + $chunk = substr($pt, 0, ( $i - 75 )); + $final_chunk = $chunk; + for ( $j = strlen($chunk); $j > 0; $j = $j - 1 ) + { + if ( in_array($chunk{$j}, $space_chars) ) + { + $final_chunk = substr($chunk, $j + 1); + break; + } + } + $mid_chunk = substr($pt, ( $i - 75 ), 75); + + $clipped = '...' . $final_chunk . $mid_chunk . $chunk2; + + $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); + $final_chunk = $chunk; + for ( $j = 0; $j < strlen($chunk); $j++ ) + { + if ( in_array($chunk{$j}, $space_chars) ) + { + $final_chunk = substr($chunk, 0, $j); + break; + } + } + + $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); + + $clipped .= $end_chunk . $final_chunk . '...'; + + $pt = $clipped; + } + else if ( strlen($pt) > 200 ) + { + $mid_chunk = substr($pt, ( $i - 75 ), 75); + + $clipped = $chunk1 . $chunk2; + + $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); + $final_chunk = $chunk; + for ( $j = 0; $j < strlen($chunk); $j++ ) + { + if ( in_array($chunk{$j}, $space_chars) ) + { + $final_chunk = substr($chunk, 0, $j); + break; + } + } + + $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); + + $clipped .= $end_chunk . $final_chunk . '...'; + + $pt = $clipped; + + } + break 2; } } } - return $result; + $cut_off = false; } + return $pt; +} + +/** + * Returns a list of words that shouldn't under most circumstances be indexed for searching. Kudos to MySQL. + * @return array + * @see http://dev.mysql.com/doc/refman/5.0/en/fulltext-stopwords.html + */ + +function get_stopwords() +{ + static $stopwords; + if ( is_array($stopwords) ) + return $stopwords; + $stopwords = array('a\'s', 'able', 'after', 'afterwards', 'again', + 'against', 'ain\'t', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', + 'am', 'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', + 'anyways', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'aren\'t', 'around', 'as', 'aside', + 'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'be', 'became', 'because', 'become', 'becomes', + 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', + 'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c\'mon', 'c\'s', 'came', 'can', 'can\'t', 'cannot', + 'cant', 'cause', 'causes', 'certain', 'certainly', 'changes', 'clearly', 'co', 'com', 'come', 'comes', 'concerning', + 'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', + 'couldn\'t', 'course', 'despite', 'did', 'didn\'t', 'different', 'do', + 'does', 'doesn\'t', 'doing', 'don\'t', 'done', 'down', 'downwards', 'during', 'each', 'edu', 'eg', 'eight', + 'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every', + 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'far', 'few', 'fifth', + 'first', 'five', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'four', 'from', + 'further', 'get', 'gets', 'getting', 'given', 'gives', 'go', 'goes', 'going', 'gone', 'got', + 'gotten', 'had', 'hadn\'t', 'happens', 'hardly', 'has', 'hasn\'t', 'have', 'haven\'t', 'having', + 'he', 'he\'s', 'hello', 'help', 'hence', 'her', 'here', 'here\'s', 'hereafter', 'hereby', 'herein', 'hereupon', + 'hers', 'herself', 'hi', 'him', 'himself', 'his', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i\'d', + 'i\'ll', 'i\'m', 'i\'ve', 'ie', 'if', 'ignored', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate', + 'indicated', 'indicates', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'isn\'t', 'it', 'it\'d', 'it\'ll', + 'it\'s', 'its', 'itself', 'just', 'keep', 'keeps', 'kept', 'know', 'knows', 'known', 'last', 'lately', 'later', + 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let\'s', 'like', 'liked', 'likely', 'little', 'look', + 'looking', 'looks', 'ltd', 'mainly', 'many', 'may', 'maybe', 'me', 'mean', 'meanwhile', 'merely', 'might', 'more', + 'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', + 'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone', + 'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh', 'ok', + 'okay', 'old', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'ought', 'our', + 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'particular', 'particularly', 'per', 'perhaps', + 'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'que', 'quite', 'qv', 'rather', 'rd', + 're', 'really', 'reasonably', 'regarding', 'regardless', 'regards', 'relatively', 'respectively', 'right', 'said', + 'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', + 'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should', + 'shouldn\'t', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', + 'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup', + 'sure', 't\'s', 'take', 'taken', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that\'s', + 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'there\'s', 'thereafter', + 'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', + 'they\'ve', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', + 'thru', 'thus', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', + 'twice', 'two', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'up', 'upon', 'us', 'use', + 'used', 'useful', 'uses', 'using', 'usually', 'value', 'various', 'very', 'via', 'viz', 'vs', 'want', 'wants', + 'was', 'wasn\'t', 'way', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'welcome', 'well', 'went', 'were', 'weren\'t', + 'what', 'what\'s', 'whatever', 'when', 'whence', 'whenever', 'where', 'where\'s', 'whereafter', 'whereas', + 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'who\'s', 'whoever', + 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', 'without', 'won\'t', 'wonder', + 'would', 'would', 'wouldn\'t', 'yes', 'yet', 'you', 'you\'d', 'you\'ll', 'you\'re', 'you\'ve', 'your', 'yours', + 'yourself', 'yourselves', 'zero'); + return $stopwords; } ?> diff -r 96524a56d475 -r 2d4bb97fa75a includes/template.php --- a/includes/template.php Wed Nov 21 15:11:51 2007 -0500 +++ b/includes/template.php Wed Nov 21 15:13:06 2007 -0500 @@ -822,7 +822,7 @@ if(isset($_GET['sqldbg']) && $session->get_permissions('mod_misc')) { echo '

Query list as requested on URI

';
-        echo $db->sql_backtrace();
+        echo htmlspecialchars($db->sql_backtrace());
         echo '
'; } @@ -1803,7 +1803,7 @@ $this->sidebar_extra = ''; $this->sidebar_widgets = ''; $this->toolbar_menu = ''; - $this->additional_headers = ''; + $this->additional_headers = ''; $this->theme_list = Array(Array( 'theme_id'=>'oxygen', @@ -1829,6 +1829,10 @@ if ( $auto_init ) $this->init_vars(); } + function add_header($html) + { + $this->additional_headers .= "\n\n\n " . $html; + } function init_vars() { global $sideinfo; @@ -1875,7 +1879,7 @@ 'ADMIN_SID_QUES'=>$asq, 'ADMIN_SID_AMP'=>$asa, 'ADMIN_SID_AMP_HTML'=>'', - 'ADDITIONAL_HEADERS'=>'', + 'ADDITIONAL_HEADERS'=>$this->additional_headers, 'SIDEBAR_EXTRA'=>'', 'COPYRIGHT'=>'Enano and all of its code, graphics, and more code is copyright © 2006 Dan Fuhry.
This program is Free Software; see the file "GPL" included with this package for details.', 'TOOLBAR_EXTRAS'=>'', @@ -1923,11 +1927,15 @@ $this->tpl_bool['right_sidebar'] = $this->tpl_bool['sidebar_right']; // backward compatibility $this->tpl_bool['stupid_mode'] = true; } - function header() + function header($simple = false) { - if(!$this->no_headers) echo $this->process_template('header.tpl'); + $filename = ( $simple ) ? 'simple-header.tpl' : 'header.tpl'; + if ( !$this->no_headers ) + { + echo $this->process_template($filename); + } } - function footer() + function footer($simple = false) { global $db, $session, $paths, $template, $plugins; // Common objects if(!$this->no_headers) { @@ -1940,7 +1948,8 @@ else $nq = $db->num_queries; if($nq == 0) $nq = 'N/A'; $dbg = 'Time: '.$f.'s | Queries: '.$nq; - $t = $this->process_template('footer.tpl'); + $filename = ( $simple ) ? 'simple-footer.tpl' : 'footer.tpl'; + $t = $this->process_template($filename); $t = str_replace('[[Stats]]', $dbg, $t); if ( is_object($db) ) { diff -r 96524a56d475 -r 2d4bb97fa75a install.php --- a/install.php Wed Nov 21 15:11:51 2007 -0500 +++ b/install.php Wed Nov 21 15:13:06 2007 -0500 @@ -310,7 +310,7 @@ if ( !$conn ) return false; // Our list of tables included in Enano - $tables = Array( 'categories', 'comments', 'config', 'logs', 'page_text', 'session_keys', 'pages', 'users', 'users_extra', 'themes', 'buddies', 'banlist', 'files', 'privmsgs', 'sidebar', 'hits', 'search_index', 'groups', 'group_members', 'acl', 'search_cache', 'tags', 'page_groups', 'page_group_members' ); + $tables = Array( 'categories', 'comments', 'config', 'logs', 'page_text', 'session_keys', 'pages', 'users', 'users_extra', 'themes', 'buddies', 'banlist', 'files', 'privmsgs', 'sidebar', 'hits', 'search_index', 'groups', 'group_members', 'acl', 'tags', 'page_groups', 'page_group_members' ); // Drop each table individually; if it fails, it probably means we're trying to drop a // table that didn't exist in the Enano version we're deleting the database for. @@ -563,6 +563,14 @@ return true; } +function stg_build_index() +{ + global $db, $session, $paths, $template, $plugins; // Common objects; + if ( $paths->rebuild_search_index() ) + return true; + return false; +} + function stg_rename_config() { if ( !@rename('./config.new.php', './config.php') ) @@ -1602,6 +1610,8 @@ properly set cookies due to limitations with PHP. These limitations are exposed primarily when this issue is encountered during installation. If you choose to finish the installation, please be aware that you may be unable to log into your site.'); + run_installer_stage('buildindex', 'Initialize search index', 'stg_build_index', 'Something went wrong while the page manager was attempting to build a search index.'); + /* * HACKERS: * If you're making a custom distribution of Enano, put all your custom plugin-related code here. diff -r 96524a56d475 -r 2d4bb97fa75a plugins/SpecialAdmin.php --- a/plugins/SpecialAdmin.php Wed Nov 21 15:11:51 2007 -0500 +++ b/plugins/SpecialAdmin.php Wed Nov 21 15:13:06 2007 -0500 @@ -2479,12 +2479,12 @@ foreach($tables as $t) { // THE FOLLOWING COMMENT DOES NOT APPLY AS OF 1.0. - // Sorry folks - this script CAN'T backup enano_files, enano_search_index, and enano_search_cache due to the sheer size of the tables. + // Sorry folks - this script CAN'T backup enano_files and enano_search_index due to the sheer size of the tables. // If encryption is enabled the log data will be excluded too. echo export_table( $t, isset($_POST['do_struct']), - ( isset($_POST['do_data']) /* && $t != table_prefix.'files' && $t != table_prefix.'search_index' && $t != table_prefix.'search_cache' && ( !defined('SQL_BACKUP_CRYPT') || ( defined('SQL_BACKUP_CRYPT') && $t != table_prefix.'logs' ) ) */ ), + ( isset($_POST['do_data']) ), false ) . "\n"; } diff -r 96524a56d475 -r 2d4bb97fa75a plugins/SpecialSearch.php --- a/plugins/SpecialSearch.php Wed Nov 21 15:11:51 2007 -0500 +++ b/plugins/SpecialSearch.php Wed Nov 21 15:13:06 2007 -0500 @@ -52,7 +52,12 @@ function page_Special_Search() { global $db, $session, $paths, $template, $plugins; // Common objects - if(!$q = $paths->getParam(0)) $q = ( isset($_GET['q']) ) ? $_GET['q'] : false; + global $aggressive_optimize_html; + $aggressive_optimize_html = false; + + if ( !$q = $paths->getParam(0) ) + $q = ( isset($_GET['q']) ) ? $_GET['q'] : ''; + if(isset($_GET['words_any'])) { $q = ''; @@ -85,184 +90,162 @@ } $q = trim($q); - if ( !empty($q) && !isset($_GET['search']) ) + $template->header(); + + $qin = ( isset($q) ) ? str_replace('"', '\"', htmlspecialchars($q)) : ''; + $search_form = '
+   + ' . ( $session->auth_level > USER_LEVEL_MEMBER ? '' : '' ) . ' +
'; + + if ( !empty($q) ) { - list($pid, $ns) = RenderMan::strToPageID($q); - $pid = sanitize_page_id($pid); - $key = $paths->nslist[$ns] . $pid; - if ( isPage($key) ) - { - redirect(makeUrl($key), 'Results', 'found page', 0); - } - } - - $template->header(); - if(!empty($q)) - { - // See if any pages directly match the title + $search_start = microtime_float(); + + $results = perform_search($q, $warn, ( isset($_GET['match_case']) )); + $warn = array_unique($warn); - if ( strlen($q) >= 4 ) + if ( file_exists( ENANO_ROOT . '/themes/' . $template->theme . '/search-result.tpl' ) ) + { + $parser = $template->makeParser('search-result.tpl'); + } + else { - for ( $i = 0; $i < count ( $paths->pages ) / 2; $i++ ) - { - $pg =& $paths->pages[$i]; - $q_lc = strtolower( str_replace(' ', '_', $q) ); - $q_tl = strtolower( str_replace('_', ' ', $q) ); - $p_lc = strtolower($pg['urlname']); - $p_tl = strtolower($pg['name']); - if ( strstr($p_tl, $q_tl) || strstr($p_lc, $q_lc) && $pg['visible'] == 1 ) - { - echo '
Perhaps you were looking for ' . htmlspecialchars($pg['name']) . '?
'; - break; - } - } + $tpl_code = << + +
+

+

{PAGE_NOTE}{PAGE_TITLE}

+ {PAGE_TEXT} + {PAGE_URL} - + {PAGE_LENGTH} {PAGE_LENGTH_UNIT} - + Relevance: {RELEVANCE_SCORE}% +

+
+ + + +LONGSTRING; + $parser = $template->makeParserText($tpl_code); } - - switch(SEARCH_MODE) + foreach ( $results as $i => $_ ) { + $result =& $results[$i]; + $result['page_text'] = str_replace(array('', ''), array('', ''), $result['page_text']); + if ( !empty($result['page_text']) ) + $result['page_text'] .= '
'; + $result['page_name'] = str_replace(array('', ''), array('', ''), $result['page_name']); + if ( $result['page_length'] >= 1048576 ) + { + $result['page_length'] = round($result['page_length'] / 1048576, 1); + $length_unit = 'MB'; + } + else if ( $result['page_length'] >= 1024 ) + { + $result['page_length'] = round($result['page_length'] / 1024, 1); + $length_unit = 'KB'; + } + else + { + $length_unit = 'bytes'; + } + $url = makeUrlComplete($result['namespace'], $result['page_id']); + $url = preg_replace('/\?.+$/', '', $url); + $parser->assign_vars(array( + 'PAGE_TITLE' => $result['page_name'], + 'PAGE_TEXT' => $result['page_text'], + 'PAGE_LENGTH' => $result['page_length'], + 'RELEVANCE_SCORE' => $result['score'], + 'RESULT_URL' => makeUrlNS($result['namespace'], $result['page_id'], false, true), + 'PAGE_LENGTH_UNIT' => $length_unit, + 'PAGE_URL' => $url, + 'PAGE_NOTE' => ( isset($result['page_note']) ? $result['page_note'] . ' ' : '' ) + )); + $has_content = ( $result['namespace'] == 'Special' ); - case "FULLTEXT": - if ( isset($_GET['offset']) ) - { - $offset = intval($_GET['offset']); - } - else - { - $offset = 0; - } - $sql = $db->sql_query('SELECT search_id FROM '.table_prefix.'search_cache WHERE query=\''.$db->escape($q).'\';'); - if(!$sql) - { - $db->_die('Error scanning search query cache'); - } - if($db->numrows() > 0) - { - $row = $db->fetchrow(); - $db->free_result(); - search_fetch_fulltext_results(intval($row['search_id']), $offset); - } - else - { - // Perform search - - $search = new MySQL_Fulltext_Search(); - - // Parse the query - $parse = new Searcher(); - $query = $parse->parseQuery($q); - unset($parse); + $code = $plugins->setHook('search_global_results'); + foreach ( $code as $cmd ) + { + eval($cmd); + } + + $parser->assign_bool(array( + 'special_page' => $has_content + )); + $result = $parser->run(); + } + unset($result); + + $per_page = 10; + $start = ( isset($_GET['start']) ? intval($_GET['start']) : 0 ); + $start_string = $start + 1; + $per_string = $start_string + $per_page - 1; + $num_results = count($results); + if ( $per_string > $num_results ) + $per_string = $num_results; + + $search_time = microtime_float() - $search_start; + $search_time = round($search_time, 3); + + $q_trim = ( strlen($q) > 30 ) ? substr($q, 0, 27) . '...' : $q; + $q_trim = htmlspecialchars($q_trim); + + $result_string = ( count($results) > 0 ) ? "Results $start_string - $per_string of about $num_results for " . $q_trim . " in {$search_time}s." : 'No results.'; + + echo '
+
+ ' . $result_string . ' +
+ Site search +
+
+ ' . $search_form . ' +
'; - // Send query to MySQL - $sql = $search->search($q); - $results = Array(); - if ( $row = $db->fetchrow($sql) ) - { - do { - $results[] = $row; - } while ( $row = $db->fetchrow($sql) ); - } - else - { - // echo '
No pages that matched your search criteria could be found.
'; - } - $texts = Array(); - foreach ( $results as $result ) - { - $texts[] = render_fulltext_result($result, $query); - } - - // Store the result in the search cache...if someone makes the same query later we can skip searching and rendering - // This cache is cleared when an affected page is saved. - - $results = serialize($texts); - - $sql = $db->sql_query('INSERT INTO '.table_prefix.'search_cache(search_time,query,results) VALUES('.time().', \''.$db->escape($q).'\', \''.$db->escape($results).'\');'); - if($sql) - { - search_render_fulltext_results(unserialize($results), $offset, $q); - } - else - { - $db->_die('Error inserting search into cache'); - } - - } - break; - - case "BUILTIN": - $titles = $paths->makeTitleSearcher(isset($_GET['match_case'])); - if ( isset($_GET['offset']) ) - { - $offset = intval($_GET['offset']); - } - else - { - $offset = 0; - } - $sql = $db->sql_query('SELECT search_id FROM '.table_prefix.'search_cache WHERE query=\''.$db->escape($q).'\';'); - if(!$sql) - { - $db->_die('Error scanning search query cache'); - } - if($db->numrows() > 0) - { - $row = $db->fetchrow(); - $db->free_result(); - search_show_results(intval($row['search_id']), $offset); - } - else - { - $titles->search($q, $paths->get_page_titles()); - $search = $paths->makeSearcher(isset($_GET['match_case'])); - $texts = $paths->fetch_page_search_resource(); - $search->searchMySQL($q, $texts); - - $results = Array(); - $results['text'] = $search->results; - $results['page'] = $titles->results; - $results['warn'] = $search->warnings; - - $results = serialize($results); - - $sql = $db->sql_query('INSERT INTO '.table_prefix.'search_cache(search_time,query,results) VALUES('.time().', \''.$db->escape($q).'\', \''.$db->escape($results).'\');'); - if($sql) - { - search_render_results(unserialize($results), $offset, $q); - } - else - { - $db->_die('Error inserting search into cache'); - } - } - break; + if ( count($warn) > 0 ) + { + echo '
'; + echo 'Some problems were encountered during your search.
+ There was a problem with your search query, and as a result there may be a reduced number of search results.'; + echo '
  • ' . implode('
  • ', $warn) . '
'; + echo '
'; } - $code = $plugins->setHook('search_results'); // , Array('query'=>$q)); - foreach ( $code as $cmd ) + + if ( count($results) > 0 ) { - eval($cmd); + $html = paginate_array( + $results, + count($results), + makeUrlNS('Special', 'Search', 'q=' . str_replace('%', '%%', htmlspecialchars(urlencode($q))) . '&start=%s'), + $start, + $per_page + ); + echo $html; } - ?> -
-

- sid_super ): ?> - - - - - - Advanced Search -

-
- Your search for "' . htmlspecialchars($q) . '" didn\'t turn up any results.'; + echo '

There are a few things you can try:

'; + echo ''; + } } else { - ?> -
+ ?>
- + auth_level > USER_LEVEL_MEMBER ? '' : '' ); + endif; ?>
@@ -298,255 +281,10 @@
Advanced Search
- footer(); } -function search_show_results($search_id, $start = 0) -{ - global $db, $session, $paths, $template, $plugins; // Common objects - $q = $db->sql_query('SELECT query,results,search_time FROM '.table_prefix.'search_cache WHERE search_id='.intval($search_id).';'); - if(!$q) - return $db->get_error('Error selecting cached search results'); - $row = $db->fetchrow(); - $db->free_result(); - $results = unserialize($row['results']); - search_render_results($results, $start, $row['query']); -} - -function search_render_results($results, $start = 0, $q = '') -{ - global $db, $session, $paths, $template, $plugins; // Common objects - $nr1 = sizeof($results['page']); - $nr2 = sizeof($results['text']); - $nr = ( $nr1 > $nr2 ) ? $nr1 : $nr2; - $results['page'] = array_slice($results['page'], $start, SEARCH_RESULTS_PER_PAGE); - $results['text'] = array_slice($results['text'], $start, SEARCH_RESULTS_PER_PAGE); - - // Pagination - $pagination = ''; - if ( $nr1 > SEARCH_RESULTS_PER_PAGE || $nr2 > SEARCH_RESULTS_PER_PAGE ) - { - $pagination .= '
- - - '; - $num_pages = ceil($nr / SEARCH_RESULTS_PER_PAGE); - $j = 0; - for ( $i = 1; $i <= $num_pages; $i++ ) - { - if ($j == $start) - $pagination .= ''; - else - $pagination .= ''; - $j = $j + SEARCH_RESULTS_PER_PAGE; - } - $pagination .= '
Page:' . $i . '' . $i . '
'; - } - - echo $pagination; - - if ( $nr1 >= $start ) - { - echo '

Page title matches

'; - if(count($results['page']) < 1) - { - echo '
No pages with a title that matched your search criteria could be found.
'; - } - else - { - echo '

'; - foreach($results['page'] as $page => $text) - { - echo ''.$paths->pages[$page]['name'].'
'; - } - echo '

'; - } - } - if ( $nr2 >= $start ) - { - echo '

Page text matches

'; - if(count($results['text']) < 1) - { - echo '
No page text that matched your search criteria could be found.
'; - } - else - { - foreach($results['text'] as $kpage => $text) - { - preg_match('#^ns=('.implode('|', array_keys($paths->nslist)).');pid=(.*?)$#i', $kpage, $matches); - $page = $paths->nslist[$matches[1]] . $matches[2]; - echo '

'.$paths->pages[$page]['name'].'
'.$text.'

'; - } - } - } - if(count($results['warn']) > 0) - echo '
Your search may not include all results.
The following errors were encountered during the search:
  • '.implode('
  • ', $results['warn']).'
'; - echo $pagination; -} - -function render_fulltext_result($result, $query) -{ - global $db, $session, $paths, $template, $plugins; // Common objects - preg_match('#^ns=('.implode('|', array_keys($paths->nslist)).');pid=(.*?)$#i', $result['page_identifier'], $matches); - $page = $paths->nslist[$matches[1]] . $matches[2]; - //$score = round($result['score'] * 100, 1); - $score = number_format($result['score'], 2); - $char_length = $result['length']; - $result_template = << -

{TITLE}

-

{TEXT}

-

- {NAMESPACE} - Relevance score: {SCORE} ({LENGTH} bytes) -

- -TPLCODE; - $parser = $template->makeParserText($result_template); - - $pt =& $result['page_text']; - $space_chars = Array("\t", "\n", "\r", " "); - - $words = array_merge($query['any'], $query['req']); - $pt = htmlspecialchars($pt); - $words2 = array(); - - for ( $i = 0; $i < sizeof($words); $i++) - { - if(!empty($words[$i])) - $words2[] = preg_quote($words[$i]); - } - - $regex = '/(' . implode('|', $words2) . ')/i'; - $pt = preg_replace($regex, '\\1', $pt); - - $title = preg_replace($regex, '\\1', htmlspecialchars($paths->pages[$page]['name'])); - - $cut_off = false; - - foreach ( $words as $word ) - { - // Boldface searched words - $ptlen = strlen($pt); - for ( $i = 0; $i < $ptlen; $i++ ) - { - $len = strlen($word); - if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) - { - $chunk1 = substr($pt, 0, $i); - $chunk2 = substr($pt, $i, $len); - $chunk3 = substr($pt, ( $i + $len )); - $pt = $chunk1 . $chunk2 . $chunk3; - $ptlen = strlen($pt); - // Cut off text to 150 chars or so - if ( !$cut_off ) - { - $cut_off = true; - if ( $i - 75 > 0 ) - { - // Navigate backwards until a space character is found - $chunk = substr($pt, 0, ( $i - 75 )); - $final_chunk = $chunk; - for ( $j = strlen($chunk); $j > 0; $j = $j - 1 ) - { - if ( in_array($chunk{$j}, $space_chars) ) - { - $final_chunk = substr($chunk, $j + 1); - break; - } - } - $mid_chunk = substr($pt, ( $i - 75 ), 75); - - $clipped = '...' . $final_chunk . $mid_chunk . $chunk2; - - $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); - $final_chunk = $chunk; - for ( $j = 0; $j < strlen($chunk); $j++ ) - { - if ( in_array($chunk{$j}, $space_chars) ) - { - $final_chunk = substr($chunk, 0, $j); - break; - } - } - - $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); - - $clipped .= $end_chunk . $final_chunk . '...'; - - $pt = $clipped; - } - else if ( strlen($pt) > 200 ) - { - $mid_chunk = substr($pt, ( $i - 75 ), 75); - - $clipped = $chunk1 . $chunk2; - - $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); - $final_chunk = $chunk; - for ( $j = 0; $j < strlen($chunk); $j++ ) - { - if ( in_array($chunk{$j}, $space_chars) ) - { - $final_chunk = substr($chunk, 0, $j); - break; - } - } - - $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); - - $clipped .= $end_chunk . $final_chunk . '...'; - - $pt = $clipped; - - } - break 2; - } - } - } - $cut_off = false; - } - - $parser->assign_vars(Array( - 'TITLE' => $title, - 'TEXT' => $pt, - 'NAMESPACE' => $matches[1], - 'SCORE' => $score, - 'LENGTH' => $char_length, - 'HREF' => makeUrl($page) - )); - - return $parser->run(); - -} - -function search_fetch_fulltext_results($search_id, $offset = 0) -{ - global $db, $session, $paths, $template, $plugins; // Common objects - $q = $db->sql_query('SELECT query,results,search_time FROM '.table_prefix.'search_cache WHERE search_id='.intval($search_id).';'); - if(!$q) - return $db->get_error('Error selecting cached search results'); - $row = $db->fetchrow(); - $db->free_result(); - $results = unserialize($row['results']); - search_render_fulltext_results($results, $offset, $row['query']); -} - -function search_render_fulltext_results($results, $offset = 0, $query) -{ - $num_results = sizeof($results); - $slice = array_slice($results, $offset, SEARCH_RESULTS_PER_PAGE); - - if ( $num_results < 1 ) - { - echo '
No page text that matched your search criteria could be found.
'; - return null; - } - - $html = paginate_array($results, sizeof($results), makeUrlNS('Special', 'Search', 'q=' . urlencode($query) . '&offset=%s'), $offset, 10); - echo $html . '
'; - -} - ?> diff -r 96524a56d475 -r 2d4bb97fa75a schema.sql --- a/schema.sql Wed Nov 21 15:11:51 2007 -0500 +++ b/schema.sql Wed Nov 21 15:13:06 2007 -0500 @@ -14,7 +14,7 @@ page_id varchar(64), namespace varchar(64), category_id varchar(64) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}comments( comment_id int(12) NOT NULL auto_increment, @@ -27,12 +27,12 @@ user_id mediumint(8) NOT NULL DEFAULT -1, time int(12) NOT NULL DEFAULT 0, PRIMARY KEY ( comment_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}config( config_name varchar(63), config_value text -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}logs( log_type varchar(16), @@ -46,7 +46,7 @@ author varchar(63), edit_summary text, minor_edit tinyint(1) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}page_text( page_id varchar(63), @@ -69,7 +69,7 @@ delvotes int(10) NOT NULL default 0, password varchar(40) NOT NULL DEFAULT '', delvote_ips text DEFAULT NULL -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}session_keys( session_key varchar(32), @@ -78,7 +78,7 @@ auth_level tinyint(1) NOT NULL default '0', source_ip varchar(10) default '0x7f000001', time bigint(15) default '0' -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}themes( theme_id varchar(63), @@ -86,7 +86,7 @@ theme_order smallint(5) NOT NULL default '1', default_style varchar(63) NOT NULL DEFAULT '', enabled tinyint(1) NOT NULL default '1' -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}users( user_id mediumint(8) NOT NULL auto_increment, @@ -106,7 +106,7 @@ temp_password_time int(12) NOT NULL DEFAULT 0, user_coppa tinyint(1) NOT NULL DEFAULT 0, PRIMARY KEY (user_id) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}users_extra( user_id mediumint(8) NOT NULL, @@ -120,7 +120,7 @@ user_hobbies text, email_public tinyint(1) NOT NULL DEFAULT 0, PRIMARY KEY ( user_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}banlist( ban_id mediumint(8) NOT NULL auto_increment, @@ -129,7 +129,7 @@ is_regex tinyint(1) DEFAULT 0, reason text, PRIMARY KEY ( ban_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}files( file_id int(12) NOT NULL auto_increment, @@ -141,7 +141,7 @@ file_extension varchar(8) default NULL, file_key varchar(32) NOT NULL, PRIMARY KEY (file_id) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}buddies( buddy_id int(15) NOT NULL auto_increment, @@ -149,7 +149,7 @@ buddy_user_id mediumint(8), is_friend tinyint(1) NOT NULL default '1', PRIMARY KEY (buddy_id) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}privmsgs( message_id int(15) NOT NULL auto_increment, @@ -161,7 +161,7 @@ folder_name varchar(63), message_read tinyint(1) NOT NULL DEFAULT 0, PRIMARY KEY (message_id) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}sidebar( item_id smallint(3) NOT NULL auto_increment, @@ -172,7 +172,7 @@ block_type tinyint(1) NOT NULL DEFAULT 0, block_content text, PRIMARY KEY ( item_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}hits( hit_id bigint(20) NOT NULL auto_increment, @@ -181,13 +181,13 @@ page_id varchar(63), namespace varchar(63), PRIMARY KEY ( hit_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}search_index( word varbinary(64) NOT NULL, page_names text, PRIMARY KEY ( word ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}groups( group_id mediumint(5) UNSIGNED NOT NULL auto_increment, @@ -195,7 +195,7 @@ group_type tinyint(1) NOT NULL DEFAULT 1, PRIMARY KEY ( group_id ), system_group tinyint(1) NOT NULL DEFAULT 0 -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}group_members( member_id int(12) UNSIGNED NOT NULL auto_increment, @@ -204,7 +204,7 @@ is_mod tinyint(1) NOT NULL DEFAULT 0, pending tinyint(1) NOT NULL DEFAULT 0, PRIMARY KEY ( member_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; CREATE TABLE {{TABLE_PREFIX}}acl( rule_id int(12) UNSIGNED NOT NULL auto_increment, @@ -214,15 +214,7 @@ namespace varchar(24), rules text, PRIMARY KEY ( rule_id ) -) CHARACTER SET `utf8`; - -CREATE TABLE {{TABLE_PREFIX}}search_cache( - search_id int(15) NOT NULL auto_increment, - search_time int(11) NOT NULL, - query text, - results longblob, - PRIMARY KEY ( search_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; -- Added in 1.0.1 @@ -232,7 +224,7 @@ pg_name varchar(255) NOT NULL DEFAULT '', pg_target varchar(255) DEFAULT NULL, PRIMARY KEY ( pg_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; -- Added in 1.0.1 @@ -242,7 +234,7 @@ page_id varchar(63) NOT NULL, namespace varchar(63) NOT NULL DEFAULT 'Article', PRIMARY KEY ( pg_member_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; -- Added in 1.0.1 @@ -253,7 +245,7 @@ namespace varchar(255) NOT NULL, user mediumint(8) NOT NULL DEFAULT 1, PRIMARY KEY ( tag_id ) -) CHARACTER SET `utf8`; +) CHARACTER SET `utf8` COLLATE `utf8_bin`; INSERT INTO {{TABLE_PREFIX}}config(config_name, config_value) VALUES ('site_name', '{{SITE_NAME}}'), diff -r 96524a56d475 -r 2d4bb97fa75a upgrade.php --- a/upgrade.php Wed Nov 21 15:11:51 2007 -0500 +++ b/upgrade.php Wed Nov 21 15:13:06 2007 -0500 @@ -734,11 +734,18 @@ // OK, do the loop, baby!!! foreach($schema as $q) { - $r = $db->sql_query($q); - if(!$r) + if ( substr($q, 0, 1) == '@' ) { - echo $db->get_error(); - break 2; + // if the first character is @, don't fail on error + $db->sql_query(substr($q, 1)); + } + else + { + if ( !$db->sql_query($q) ) + { + echo $db->get_error(); + break 2; + } } } diff -r 96524a56d475 -r 2d4bb97fa75a upgrade.sql --- a/upgrade.sql Wed Nov 21 15:11:51 2007 -0500 +++ b/upgrade.sql Wed Nov 21 15:13:06 2007 -0500 @@ -7,6 +7,13 @@ ---BEGIN 1.0.2b1--- -- This is really optional, but could reduce confusion if regex page groups get truncated for no apparent reason. ALTER TABLE {{TABLE_PREFIX}}page_groups MODIFY COLUMN pg_target text DEFAULT NULL; +-- I have no idea how or why, but the f'ing index didn't get created for who-knows-how-many releases. +-- We'll attempt to create it here, but don't die if it fails +@ALTER TABLE {{TABLE_PREFIX}}page_text ENGINE = MYISAM COLLATE = utf8_bin; +@CREATE FULLTEXT INDEX {{TABLE_PREFIX}}page_search_idx ON {{TABLE_PREFIX}}page_text(page_id, namespace, page_text); +ALTER TABLE {{TABLE_PREFIX}}search_index COLLATE = utf8_bin, MODIFY COLUMN word varchar(64) NOT NULL; +-- The search cache is no longer needed because of the new unified search engine +@DROP TABLE {{TABLE_PREFIX}}search_cache; ---END 1.0.2b1--- ---BEGIN 1.0.1.1--- ---END 1.0.1.1--- @@ -16,6 +23,7 @@ -- Fix for obnoxious $_GET issue UPDATE {{TABLE_PREFIX}}sidebar SET block_type=1,block_content='

$INPUT_AUTH$

' WHERE block_name='Search' AND item_id=4; -- Added on advice from Neal +-- Remember that 1 = AUTH_DENY. INSERT INTO {{TABLE_PREFIX}}acl(target_type,target_id,page_id,namespace,rules) VALUES(2,1,'Memberlist','Special','read=1;mod_misc=1;upload_files=1;upload_new_version=1;create_page=1;edit_acl=1;'); -- Bugfix for MySQL 5.0.45, see http://forum.enanocms.org/viewtopic.php?f=5&t=8 ALTER TABLE {{TABLE_PREFIX}}pages MODIFY COLUMN delvote_ips text DEFAULT NULL;