includes/search.php
changeset 461 717e71109645
parent 411 d1a95497b68f
child 499 6b7fdd898ba3
equal deleted inserted replaced
460:3a1c99845ca8 461:717e71109645
   575   // Sort scores array
   575   // Sort scores array
   576   arsort($scores);
   576   arsort($scores);
   577 
   577 
   578   // Divisor for calculating relevance scores
   578   // Divisor for calculating relevance scores
   579   $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5;
   579   $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5;
   580 
   580   
   581   foreach ( $scores as $page_id => $score )
   581   foreach ( $scores as $page_id => $score )
   582   {
   582   {
   583     if ( !isset($page_data[$page_id]) )
   583     if ( !isset($page_data[$page_id]) )
   584       // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
   584       // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
   585       continue;
   585       continue;
   922   }
   922   }
   923   return $pt;
   923   return $pt;
   924 }
   924 }
   925 
   925 
   926 /**
   926 /**
   927  * Returns a list of words that shouldn't under most circumstances be indexed for searching. Kudos to MySQL.
   927  * Returns a list of words that shouldn't under most circumstances be indexed for searching.
   928  * @return array
   928  * @return array
   929  * @see http://dev.mysql.com/doc/refman/5.0/en/fulltext-stopwords.html
       
   930  */
   929  */
   931 
   930 
   932 function get_stopwords()
   931 function get_stopwords()
   933 {
   932 {
   934   static $stopwords;
   933   static $stopwords;
   935   if ( is_array($stopwords) )
   934   if ( is_array($stopwords) )
   936     return $stopwords;
   935     return $stopwords;
   937 
   936 
   938   $stopwords = array('a\'s', 'able', 'after', 'afterwards', 'again',
   937   $stopwords = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it',
   939                      'against', 'ain\'t', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always',
   938                      'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with', 'and',
   940                      'am', 'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway',
   939                      'the');
   941                      'anyways', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'aren\'t', 'around', 'as', 'aside',
   940   
   942                      'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'be', 'became', 'because', 'become', 'becomes',
       
   943                      'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best',
       
   944                      'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c\'mon', 'c\'s', 'came', 'can', 'can\'t', 'cannot',
       
   945                      'cant', 'cause', 'causes', 'certain', 'certainly', 'changes', 'clearly', 'co', 'com', 'come', 'comes', 'concerning',
       
   946                      'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could',
       
   947                      'couldn\'t', 'course', 'despite', 'did', 'didn\'t', 'different', 'do',
       
   948                      'does', 'doesn\'t', 'doing', 'don\'t', 'done', 'down', 'downwards', 'during', 'each', 'edu', 'eg', 'eight',
       
   949                      'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every',
       
   950                      'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'far', 'few', 'fifth',
       
   951                      'first', 'five', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'four', 'from',
       
   952                      'further', 'get', 'gets', 'getting', 'given', 'gives', 'go', 'goes', 'going', 'gone', 'got',
       
   953                      'gotten', 'had', 'hadn\'t', 'happens', 'hardly', 'has', 'hasn\'t', 'have', 'haven\'t', 'having',
       
   954                      'he', 'he\'s', 'hello', 'help', 'hence', 'her', 'here', 'here\'s', 'hereafter', 'hereby', 'herein', 'hereupon',
       
   955                      'hers', 'herself', 'hi', 'him', 'himself', 'his', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i\'d',
       
   956                      'i\'ll', 'i\'m', 'i\'ve', 'ie', 'if', 'ignored', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate',
       
   957                      'indicated', 'indicates', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'isn\'t', 'it', 'it\'d', 'it\'ll',
       
   958                      'it\'s', 'its', 'itself', 'just', 'keep', 'keeps', 'kept', 'know', 'knows', 'known', 'last', 'lately', 'later',
       
   959                      'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let\'s', 'like', 'liked', 'likely', 'little', 'look',
       
   960                      'looking', 'looks', 'ltd', 'mainly', 'many', 'may', 'maybe', 'me', 'mean', 'meanwhile', 'merely', 'might', 'more',
       
   961                      'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary',
       
   962                      'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone',
       
   963                      'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh', 'ok',
       
   964                      'okay', 'old', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'ought', 'our',
       
   965                      'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'particular', 'particularly', 'per', 'perhaps',
       
   966                      'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'que', 'quite', 'qv', 'rather', 'rd',
       
   967                      're', 'really', 'reasonably', 'regarding', 'regardless', 'regards', 'relatively', 'respectively', 'right', 'said',
       
   968                      'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems',
       
   969                      'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should',
       
   970                      'shouldn\'t', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes',
       
   971                      'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup',
       
   972                      'sure', 't\'s', 'take', 'taken', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that\'s',
       
   973                      'thats', 'the', 'their', 'theirs', 'them', 'then', 'thence', 'there', 'there\'s', 'thereafter',
       
   974                      'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re',
       
   975                      'they\'ve', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout',
       
   976                      'thru', 'thus', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying',
       
   977                      'twice', 'two', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'upon', 'use',
       
   978                      'used', 'useful', 'uses', 'using', 'usually', 'value', 'various', 'very',
       
   979                      'was', 'wasn\'t', 'way', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'welcome', 'well', 'went', 'were', 'weren\'t',
       
   980                      'what', 'what\'s', 'whatever', 'when', 'whence', 'whenever', 'where', 'where\'s', 'whereafter', 'whereas',
       
   981                      'which', 'while', 'who', 'who\'s', 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within',
       
   982                      'without', 'won\'t', 'wonder', 'would', 'would', 'wouldn\'t', 'yes', 'yet', 'you', 'you\'d', 'you\'ll', 'you\'re',
       
   983                      'you\'ve', 'your', 'yours', 'zero');
       
   984   return $stopwords;
   941   return $stopwords;
   985 }
   942 }
   986 
   943 
   987 ?>
   944 ?>