author | Dan |
Fri, 30 Nov 2007 22:13:03 -0500 | |
changeset 301 | 7e094a04d84e |
parent 292 | b3cfaf0a505c |
child 320 | 112debff64bd |
permissions | -rw-r--r-- |
1 | 1 |
<?php |
2 |
||
3 |
/* |
|
4 |
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
|
166
d53cc29308f4
Rebrand as 1.1.1; everything should now be bumped to "unstable" status
Dan
parents:
142
diff
changeset
|
5 |
* Version 1.1.1 |
1 | 6 |
* Copyright (C) 2006-2007 Dan Fuhry |
7 |
* search.php - algorithm used to search pages |
|
8 |
* |
|
9 |
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
|
10 |
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
|
11 |
* |
|
12 |
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied |
|
13 |
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details. |
|
14 |
*/ |
|
15 |
||
16 |
/** |
|
17 |
* Implementation of array_merge() that preserves key names. $arr2 takes precedence over $arr1. |
|
18 |
* @param array $arr1 |
|
19 |
* @param array $arr2 |
|
20 |
* @return array |
|
21 |
*/ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
22 |
|
1 | 23 |
function enano_safe_array_merge($arr1, $arr2) |
24 |
{ |
|
25 |
$arr3 = $arr1; |
|
26 |
foreach($arr2 as $k => $v) |
|
27 |
{ |
|
28 |
$arr3[$k] = $v; |
|
29 |
} |
|
30 |
return $arr3; |
|
31 |
} |
|
32 |
||
33 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
34 |
* In Enano versions prior to 1.0.2, this class provided a search function that was keyword-based and allowed boolean searches. It was |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
35 |
* cut from Coblynau and replaced with perform_search(), later in this file, because of speed issues. Now mostly deprecated. The only |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
36 |
* thing remaining is the buildIndex function, which is still used by the path manager and the new search framework. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
37 |
* |
1 | 38 |
* @package Enano |
39 |
* @subpackage Page management frontend |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
40 |
* @license GNU General Public License <http://enanocms.org/Special:GNU_General_Public_License> |
1 | 41 |
*/ |
42 |
||
43 |
class Searcher |
|
44 |
{ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
45 |
|
1 | 46 |
var $results; |
47 |
var $index; |
|
48 |
var $warnings; |
|
49 |
var $match_case = false; |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
50 |
|
1 | 51 |
function buildIndex($texts) |
52 |
{ |
|
53 |
$this->index = Array(); |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
54 |
$stopwords = get_stopwords(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
55 |
|
1 | 56 |
foreach($texts as $i => $l) |
57 |
{ |
|
58 |
$seed = md5(microtime(true) . mt_rand()); |
|
59 |
$texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]); |
|
60 |
$texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]); |
|
61 |
$texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]); |
|
62 |
$texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]); |
|
63 |
$texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]); |
|
64 |
$l = $texts[$i]; |
|
65 |
$words = Array(); |
|
66 |
$good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' '; |
|
67 |
$good_chars = enano_str_split($good_chars, 1); |
|
68 |
$letters = enano_str_split($l, 1); |
|
69 |
foreach($letters as $x => $t) |
|
70 |
{ |
|
71 |
if(!in_array($t, $good_chars)) |
|
72 |
unset($letters[$x]); |
|
73 |
} |
|
74 |
$letters = implode('', $letters); |
|
75 |
$words = explode(' ', $letters); |
|
76 |
foreach($words as $c => $w) |
|
77 |
{ |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
78 |
if(strlen($w) < 2 || in_array($w, $stopwords)) |
1 | 79 |
unset($words[$c]); |
80 |
else |
|
81 |
$words[$c] = $w; |
|
82 |
} |
|
83 |
$words = array_values($words); |
|
84 |
foreach($words as $c => $w) |
|
85 |
{ |
|
86 |
if(isset($this->index[$w])) |
|
87 |
{ |
|
88 |
if(!in_array($i, $this->index[$w])) |
|
89 |
$this->index[$w][] = $i; |
|
90 |
} |
|
91 |
else |
|
92 |
{ |
|
93 |
$this->index[$w] = Array(); |
|
94 |
$this->index[$w][] = $i; |
|
95 |
} |
|
96 |
} |
|
97 |
} |
|
98 |
foreach($this->index as $k => $v) |
|
99 |
{ |
|
100 |
$this->index[$k] = implode(',', $this->index[$k]); |
|
101 |
} |
|
102 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
103 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
104 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
105 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
106 |
* Searches the site for the specified string and returns an array with each value being an array filled with the following: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
107 |
* page_id: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
108 |
* namespace: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
109 |
* page_length: integer, the length of the full page in bytes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
110 |
* page_text: string, the contents of the page (trimmed to ~150 bytes if necessary) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
111 |
* score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
112 |
* @param string Search query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
113 |
* @param string Will be filled with any warnings encountered whilst parsing the query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
114 |
* @param bool Case sensitivity - defaults to false |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
115 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
116 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
117 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
118 |
function perform_search($query, &$warnings, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
119 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
120 |
global $db, $session, $paths, $template, $plugins; // Common objects |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
121 |
$warnings = array(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
122 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
123 |
$query = parse_search_query($query, $warnings); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
124 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
125 |
// Segregate search terms containing spaces |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
126 |
$query_phrase = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
127 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
128 |
'req' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
129 |
); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
130 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
131 |
foreach ( $query['any'] as $i => $_ ) |
1 | 132 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
133 |
$term =& $query['any'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
134 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
135 |
// the indexer only indexes words a-z with apostrophes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
136 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
137 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
138 |
$query_phrase['any'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
139 |
unset($term, $query['any'][$i]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
140 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
141 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
142 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
143 |
$query['any'] = array_values($query['any']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
144 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
145 |
foreach ( $query['req'] as $i => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
146 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
147 |
$term =& $query['req'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
148 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
149 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
1 | 150 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
151 |
$query_phrase['req'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
152 |
unset($term, $query['req'][$i]); |
1 | 153 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
154 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
155 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
156 |
$query['req'] = array_values($query['req']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
157 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
158 |
$results = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
159 |
$scores = array(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
160 |
$ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
161 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
162 |
// FIXME: Update to use FULLTEXT algo when available. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
163 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
164 |
// Build an SQL query to load from the index table |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
165 |
if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
166 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
167 |
// This is both because of technical restrictions and devastation that would occur on shared servers/large sites. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
168 |
$warnings[] = 'You need to have at least one keyword in your search query. Searching only for pages not containing a term is not allowed.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
169 |
return array(); |
1 | 170 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
171 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
172 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
173 |
// STAGE 1 |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
174 |
// Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
175 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
176 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
177 |
// Skip this if no indexable words are included |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
178 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
179 |
if ( count($query['any']) > 0 || count($query['req']) > 0 ) |
1 | 180 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
181 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
182 |
foreach ( $query['any'] as $term ) |
1 | 183 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
184 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
185 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
186 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
187 |
$where_any[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
188 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
189 |
foreach ( $query['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
190 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
191 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
192 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
193 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
194 |
$where_any[] = $term; |
1 | 195 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
196 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
197 |
$col_word = ( $case_sensitive ) ? 'word' : 'lcase(word)'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
198 |
$where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
199 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
200 |
// generate query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
201 |
// using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
202 |
// into one later in the processing stages |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
203 |
// $group_by = ( $case_sensitive ) ? '' : ' GROUP BY lcase(word);'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
204 |
$sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any}"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
205 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
206 |
$db->_die('Error is in perform_search(), includes/search.php, query 1'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
207 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
208 |
$word_tracking = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
209 |
if ( $row = $db->fetchrow() ) |
1 | 210 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
211 |
do |
1 | 212 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
213 |
// get page list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
214 |
$pages =& $row['page_names']; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
215 |
if ( strpos($pages, ',') ) |
1 | 216 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
217 |
// the term occurs in more than one page |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
218 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
219 |
// Find page IDs that contain commas |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
220 |
// This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
221 |
// databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
222 |
// IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
223 |
// of the previous ID and should be concatenated to the previous entry. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
224 |
$matches = explode(',', $pages); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
225 |
$prev = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
226 |
foreach ( $matches as $i => $_ ) |
1 | 227 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
228 |
$match =& $matches[$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
229 |
if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
1 | 230 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
231 |
$matches[$prev] .= ',' . $match; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
232 |
unset($match, $matches[$i]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
233 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
234 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
235 |
$prev = $i; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
236 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
237 |
unset($match); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
238 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
239 |
// Iterate through each of the results, assigning scores based on how many times the page has shown up. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
240 |
// This works because this phase of the search is strongly word-based not page-based. If a page shows up |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
241 |
// multiple times while fetching the result rows from the search_index table, it simply means that page |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
242 |
// contains more than one of the terms the user searched for. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
243 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
244 |
foreach ( $matches as $match ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
245 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
246 |
$word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
247 |
if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
248 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
249 |
continue; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
250 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
251 |
if ( isset($word_tracking[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
252 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
253 |
if ( isset($word_tracking[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
254 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
255 |
$word_tracking[$match][] = ($word_cs); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
256 |
} |
1 | 257 |
} |
258 |
else |
|
259 |
{ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
260 |
$word_tracking[$match] = array($word_cs); |
1 | 261 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
262 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
263 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
264 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
265 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
266 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
267 |
if ( isset($paths->pages[$pathskey]) ) |
1 | 268 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
269 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
270 |
if ( $test_func($paths->pages[$pathskey]['name'], $row['word']) || $test_func($paths->pages[$pathskey]['urlname_nons'], $row['word']) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
271 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
272 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
273 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
274 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
275 |
if ( isset($scores[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
276 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
277 |
$scores[$match] = $scores[$match] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
278 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
279 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
280 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
281 |
$scores[$match] = $inc; |
1 | 282 |
} |
283 |
} |
|
284 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
285 |
else |
1 | 286 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
287 |
// the term only occurs in one page |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
288 |
$word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
289 |
if ( isset($word_tracking[$pages]) && in_array($word_cs, $word_tracking[$pages]) ) |
1 | 290 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
291 |
continue; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
292 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
293 |
if ( isset($word_tracking[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
294 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
295 |
if ( isset($word_tracking[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
296 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
297 |
$word_tracking[$pages][] = ($word_cs); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
298 |
} |
1 | 299 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
300 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
301 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
302 |
$word_tracking[$pages] = array($word_cs); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
303 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
304 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
305 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
306 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
307 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $pages, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
308 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
309 |
if ( isset($paths->pages[$pathskey]) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
310 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
311 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
312 |
if ( $test_func($paths->pages[$pathskey]['name'], $row['word']) || $test_func($paths->pages[$pathskey]['urlname_nons'], $row['word']) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
313 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
314 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
315 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
316 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
317 |
if ( isset($scores[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
318 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
319 |
$scores[$pages] = $scores[$pages] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
320 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
321 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
322 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
323 |
$scores[$pages] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
324 |
} |
1 | 325 |
} |
326 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
327 |
while ( $row = $db->fetchrow() ); |
1 | 328 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
329 |
$db->free_result(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
330 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
331 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
332 |
// STAGE 2: FIRST ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
333 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
334 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
335 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
336 |
foreach ( $query['req'] as $term ) |
1 | 337 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
338 |
foreach ( $word_tracking as $i => $page ) |
1 | 339 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
340 |
if ( !in_array($term, $page) ) |
1 | 341 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
342 |
unset($word_tracking[$i], $scores[$i]); |
1 | 343 |
} |
344 |
} |
|
345 |
} |
|
346 |
} |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
347 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
348 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
349 |
// STAGE 3: PHRASE SEARCHING |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
350 |
// Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
351 |
// at this stage we can search the full page_text column instead of relying on a word list. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
352 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
353 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
354 |
// We can skip this stage if none of these special terms apply |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
355 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
356 |
$text_col = ( $case_sensitive ) ? 'page_text' : 'lcase(page_text)'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
357 |
$name_col = ( $case_sensitive ) ? 'name' : 'lcase(name)'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
358 |
$text_col_join = ( $case_sensitive ) ? 't.page_text' : 'lcase(t.page_text)'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
359 |
$name_col_join = ( $case_sensitive ) ? 'p.name' : 'lcase(p.name)'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
360 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
361 |
if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
362 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
363 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
364 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
365 |
foreach ( $query_phrase['any'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
366 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
367 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
368 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
369 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
370 |
$where_any[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
371 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
372 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
373 |
$where_any = ( count($where_any) > 0 ) ? implode(" OR\n ", $where_any) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
374 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
375 |
// Also do required terms, but use AND to ensure that all required terms are included |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
376 |
$where_req = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
377 |
foreach ( $query_phrase['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
378 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
379 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
380 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
381 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
382 |
$where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
383 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
384 |
$and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
385 |
$where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
386 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
387 |
$sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, p.name FROM ' . table_prefix . "page_text AS t\n" |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
388 |
. " LEFT JOIN " . table_prefix . "pages AS p\n" |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
389 |
. " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n" |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
390 |
. " WHERE\n $where_any\n $where_req;"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
391 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
392 |
$db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
393 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
394 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
395 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
396 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
397 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
398 |
$id =& $row['id']; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
399 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
400 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
401 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
402 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $id, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
403 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
404 |
if ( isset($paths->pages[$pathskey]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
405 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
406 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
407 |
foreach ( array_merge($query_phrase['any'], $query_phrase['req']) as $term ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
408 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
409 |
if ( $test_func($paths->pages[$pathskey]['name'], $term) || $test_func($paths->pages[$pathskey]['urlname_nons'], $term) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
410 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
411 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
412 |
break; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
413 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
414 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
415 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
416 |
if ( isset($scores[$id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
417 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
418 |
$scores[$id] = $scores[$id] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
419 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
420 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
421 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
422 |
$scores[$id] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
423 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
424 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
425 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
426 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
427 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
428 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
429 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
430 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
431 |
// STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
432 |
// At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
433 |
// eliminate any terms that shouldn't be in there. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
434 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
435 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
436 |
// Generate master word list for the highlighter |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
437 |
$word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req'])); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
438 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
439 |
$text_where = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
440 |
foreach ( $scores as $page_id => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
441 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
442 |
$text_where[] = $db->escape($page_id); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
443 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
444 |
$text_where = '( CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'' . implode('\' OR CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'', $text_where) . '\' )'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
445 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
446 |
if ( count($query['not']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
447 |
$text_where .= ' AND'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
448 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
449 |
$where_not = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
450 |
foreach ( $query['not'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
451 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
452 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
453 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
454 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
455 |
$where_not[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
456 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
457 |
$where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
458 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
459 |
$sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
460 |
LEFT JOIN " . table_prefix . "pages AS p |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
461 |
ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
462 |
WHERE $text_where $where_not;"; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
463 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
464 |
$db->_die('Error is in perform_search(), includes/search.php, query 3'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
465 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
466 |
$page_data = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
467 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
468 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
469 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
470 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
471 |
$row['page_text'] = htmlspecialchars($row['page_text']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
472 |
$row['page_name'] = htmlspecialchars($row['page_name']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
473 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
474 |
// Highlight results (this is wonderfully automated) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
475 |
$row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
476 |
if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
477 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
478 |
$row['page_text'] = substr($row['page_text'], 0, 150) . '...'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
479 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
480 |
$row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
481 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
482 |
$page_data[$row['id']] = $row; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
483 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
484 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
485 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
486 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
487 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
488 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
489 |
// STAGE 5 - SPECIAL PAGE TITLE SEARCH |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
490 |
// Iterate through $paths->pages and check the titles for search terms. Score accordingly. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
491 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
492 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
493 |
foreach ( $paths->pages as $id => $page ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
494 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
495 |
if ( $page['namespace'] != 'Special' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
496 |
continue; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
497 |
if ( !is_int($id) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
498 |
continue; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
499 |
$idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons']; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
500 |
$any = array_values(array_unique(array_merge($query['any'], $query_phrase['any']))); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
501 |
foreach ( $any as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
502 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
503 |
if ( $case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
504 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
505 |
if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
506 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
507 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
508 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
509 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
510 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
511 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
512 |
if ( stristr($page['name'], $term) || stristr($page['urlname_nons'], $term) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
513 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
514 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
515 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
516 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
517 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
518 |
if ( isset($scores[$idstring]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
519 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
520 |
$page_data[$idstring] = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
521 |
'page_name' => $page['name'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
522 |
'page_text' => '', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
523 |
'page_id' => $page['urlname_nons'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
524 |
'namespace' => $page['namespace'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
525 |
'score' => $scores[$idstring], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
526 |
'page_length' => 1, |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
527 |
'page_note' => '[Special page]' |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
528 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
529 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
530 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
531 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
532 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
533 |
// STAGE 6 - SECOND ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
534 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
535 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
536 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
537 |
$required = array_merge($query['req'], $query_phrase['req']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
538 |
foreach ( $required as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
539 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
540 |
foreach ( $page_data as $id => $page ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
541 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
542 |
if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
543 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
544 |
unset($page_data[$id]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
545 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
546 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
547 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
548 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
549 |
// At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
550 |
// pages and add text, etc. as necessary. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
551 |
// Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
552 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
553 |
$code = $plugins->setHook('search_global_inner'); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
554 |
foreach ( $code as $cmd ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
555 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
556 |
eval($cmd); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
557 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
558 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
559 |
// a marvelous debugging aid :-) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
560 |
// die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
561 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
562 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
563 |
// STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
564 |
// We now have the complete results of the search. We need to trim text down to show only portions of the page containing search |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
565 |
// terms, highlight any search terms within the page, and sort the final results array in descending order of score. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
566 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
567 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
568 |
// Sort scores array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
569 |
arsort($scores); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
570 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
571 |
// Divisor for calculating relevance scores |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
572 |
$divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query_phrase['not']) ) * 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
573 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
574 |
foreach ( $scores as $page_id => $score ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
575 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
576 |
if ( !isset($page_data[$page_id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
577 |
// It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
578 |
continue; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
579 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
580 |
// Make a copy of the datum, then delete the original (it frees up a LOT of RAM) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
581 |
$datum = $page_data[$page_id]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
582 |
unset($page_data[$page_id]); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
583 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
584 |
// This is an internal value used for sorting - it's no longer needed. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
585 |
unset($datum['id']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
586 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
587 |
// Calculate score |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
588 |
// if ( $score > $divisor ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
589 |
// $score = $divisor; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
590 |
$datum['score'] = round($score / $divisor, 2) * 100; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
591 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
592 |
// Store it in our until-now-unused results array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
593 |
$results[] = $datum; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
594 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
595 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
596 |
// Our work here is done. :-D |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
597 |
return $results; |
1 | 598 |
} |
599 |
||
600 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
601 |
* Parses a search query into an associative array. The resultant array will be filled with the following values, each an array: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
602 |
* any: Search terms that can optionally be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
603 |
* req: Search terms that must be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
604 |
* not: Search terms that should not be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
605 |
* @param string Search query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
606 |
* @param array Will be filled with parser warnings, such as query too short, words too short, etc. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
607 |
* @return array |
1 | 608 |
*/ |
609 |
||
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
610 |
function parse_search_query($query, &$warnings) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
611 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
612 |
$stopwords = get_stopwords(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
613 |
$ret = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
614 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
615 |
'req' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
616 |
'not' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
617 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
618 |
$warnings = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
619 |
$terms = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
620 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
621 |
$start_term = 0; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
622 |
$just_finished = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
623 |
for ( $i = 0; $i < strlen($query); $i++ ) |
1 | 624 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
625 |
$chr = $query{$i}; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
626 |
$prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
627 |
$next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
628 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
629 |
if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
630 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
631 |
$len = ( $next == '' ) ? $i + 1 : $i - $start_term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
632 |
$word = substr ( $query, $start_term, $len ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
633 |
$terms[] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
634 |
$start_term = $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
635 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
636 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
637 |
elseif ( $chr == '"' && $in_quote && $prev != '\\' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
638 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
639 |
$word = substr ( $query, $start_term, $i - $start_term + 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
640 |
$start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
641 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
642 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
643 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
644 |
elseif ( $chr == '"' && !$in_quote ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
645 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
646 |
$in_quote = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
647 |
$start_pos = $i; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
648 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
649 |
|
1 | 650 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
651 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
652 |
$ticker = 0; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
653 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
654 |
foreach ( $terms as $element => $__unused ) |
1 | 655 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
656 |
$atom =& $terms[$element]; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
657 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
658 |
$ticker++; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
659 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
660 |
if ( $ticker == 20 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
661 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
662 |
$warnings[] = 'Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
663 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
664 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
665 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
666 |
if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
667 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
668 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
669 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
670 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
671 |
$warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
672 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
673 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
674 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
675 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
676 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
677 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
678 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
679 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
680 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
681 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
682 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
683 |
elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
684 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
685 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
686 |
if ( strlen ( $word ) < 4 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
687 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
688 |
$warnings[] = 'One or more of your search terms was excluded because terms must be at least 4 characters in length.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
689 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
690 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
691 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
692 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
693 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
694 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
695 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
696 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
697 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
698 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
699 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
700 |
elseif ( substr ( $atom, 0, 1 ) == '+' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
701 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
702 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
703 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
704 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
705 |
$warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
706 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
707 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
708 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
709 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
710 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
711 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
712 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
713 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
714 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
715 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
716 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
717 |
elseif ( substr ( $atom, 0, 1 ) == '-' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
718 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
719 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
720 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
721 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
722 |
$warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
723 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
724 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
725 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
726 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
727 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
728 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
729 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
730 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
731 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
732 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
733 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
734 |
elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
735 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
736 |
$word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
737 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
738 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
739 |
$warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
740 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
741 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
742 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
743 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
744 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
745 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
746 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
747 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
748 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
749 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
750 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
751 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
752 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
753 |
$word = $atom; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
754 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
755 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
756 |
$warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
757 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
758 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
759 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
760 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
761 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
762 |
$warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
763 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
764 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
765 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
766 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
767 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
768 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
769 |
return $ret; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
770 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
771 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
772 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
773 |
* Escapes a string for use in a LIKE clause. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
774 |
* @param string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
775 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
776 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
777 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
778 |
function escape_string_like($string) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
779 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
780 |
global $db, $session, $paths, $template, $plugins; // Common objects |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
781 |
$string = $db->escape($string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
782 |
$string = str_replace(array('%', '_'), array('\%', '\_'), $string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
783 |
return $string; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
784 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
785 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
786 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
787 |
* Wraps <highlight></highlight> tags around all words in both the specified array. Does not perform any clipping. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
788 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
789 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
790 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
791 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
792 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
793 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
794 |
function highlight_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
795 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
796 |
$words2 = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
797 |
for ( $i = 0; $i < sizeof($words); $i++) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
798 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
799 |
if(!empty($words[$i])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
800 |
$words2[] = preg_quote($words[$i]); |
1 | 801 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
802 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
803 |
$flag = ( $case_sensitive ) ? '' : 'i'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
804 |
$regex = '/(' . implode('|', $words2) . ')/' . $flag; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
805 |
$pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
806 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
807 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
808 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
809 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
810 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
811 |
* Wraps <highlight></highlight> tags around all words in both the specified array and the specified text and clips the text to |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
812 |
* an appropriate length. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
813 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
814 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
815 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
816 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
817 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
818 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
819 |
function highlight_and_clip_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
820 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
821 |
$cut_off = false; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
822 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
823 |
$space_chars = Array("\t", "\n", "\r", " "); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
824 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
825 |
$pt = highlight_search_result($pt, $words, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
826 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
827 |
foreach ( $words as $word ) |
1 | 828 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
829 |
// Boldface searched words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
830 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
831 |
for ( $i = 0; $i < $ptlen; $i++ ) |
1 | 832 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
833 |
$len = strlen($word); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
834 |
if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) |
1 | 835 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
836 |
$chunk1 = substr($pt, 0, $i); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
837 |
$chunk2 = substr($pt, $i, $len); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
838 |
$chunk3 = substr($pt, ( $i + $len )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
839 |
$pt = $chunk1 . $chunk2 . $chunk3; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
840 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
841 |
// Cut off text to 150 chars or so |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
842 |
if ( !$cut_off ) |
1 | 843 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
844 |
$cut_off = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
845 |
if ( $i - 75 > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
846 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
847 |
// Navigate backwards until a space character is found |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
848 |
$chunk = substr($pt, 0, ( $i - 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
849 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
850 |
for ( $j = strlen($chunk); $j > 0; $j = $j - 1 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
851 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
852 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
853 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
854 |
$final_chunk = substr($chunk, $j + 1); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
855 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
856 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
857 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
858 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
859 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
860 |
$clipped = '...' . $final_chunk . $mid_chunk . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
861 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
862 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
863 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
864 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
865 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
866 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
867 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
868 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
869 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
870 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
871 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
872 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
873 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
874 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
875 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
876 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
877 |
$pt = $clipped; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
878 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
879 |
else if ( strlen($pt) > 200 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
880 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
881 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
882 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
883 |
$clipped = $chunk1 . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
884 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
885 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
886 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
887 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
888 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
889 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
890 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
891 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
892 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
893 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
894 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
895 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
896 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
897 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
898 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
899 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
900 |
$pt = $clipped; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
901 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
902 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
903 |
break 2; |
1 | 904 |
} |
905 |
} |
|
906 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
907 |
$cut_off = false; |
1 | 908 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
909 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
910 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
911 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
912 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
913 |
* Returns a list of words that shouldn't under most circumstances be indexed for searching. Kudos to MySQL. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
914 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
915 |
* @see http://dev.mysql.com/doc/refman/5.0/en/fulltext-stopwords.html |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
916 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
917 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
918 |
function get_stopwords() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
919 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
920 |
static $stopwords; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
921 |
if ( is_array($stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
922 |
return $stopwords; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
923 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
924 |
$stopwords = array('a\'s', 'able', 'after', 'afterwards', 'again', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
925 |
'against', 'ain\'t', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
926 |
'am', 'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
927 |
'anyways', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'aren\'t', 'around', 'as', 'aside', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
928 |
'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'be', 'became', 'because', 'become', 'becomes', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
929 |
'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
930 |
'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c\'mon', 'c\'s', 'came', 'can', 'can\'t', 'cannot', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
931 |
'cant', 'cause', 'causes', 'certain', 'certainly', 'changes', 'clearly', 'co', 'com', 'come', 'comes', 'concerning', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
932 |
'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
933 |
'couldn\'t', 'course', 'despite', 'did', 'didn\'t', 'different', 'do', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
934 |
'does', 'doesn\'t', 'doing', 'don\'t', 'done', 'down', 'downwards', 'during', 'each', 'edu', 'eg', 'eight', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
935 |
'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
936 |
'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'far', 'few', 'fifth', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
937 |
'first', 'five', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'four', 'from', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
938 |
'further', 'get', 'gets', 'getting', 'given', 'gives', 'go', 'goes', 'going', 'gone', 'got', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
939 |
'gotten', 'had', 'hadn\'t', 'happens', 'hardly', 'has', 'hasn\'t', 'have', 'haven\'t', 'having', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
940 |
'he', 'he\'s', 'hello', 'help', 'hence', 'her', 'here', 'here\'s', 'hereafter', 'hereby', 'herein', 'hereupon', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
941 |
'hers', 'herself', 'hi', 'him', 'himself', 'his', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i\'d', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
942 |
'i\'ll', 'i\'m', 'i\'ve', 'ie', 'if', 'ignored', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
943 |
'indicated', 'indicates', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'isn\'t', 'it', 'it\'d', 'it\'ll', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
944 |
'it\'s', 'its', 'itself', 'just', 'keep', 'keeps', 'kept', 'know', 'knows', 'known', 'last', 'lately', 'later', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
945 |
'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let\'s', 'like', 'liked', 'likely', 'little', 'look', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
946 |
'looking', 'looks', 'ltd', 'mainly', 'many', 'may', 'maybe', 'me', 'mean', 'meanwhile', 'merely', 'might', 'more', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
947 |
'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
948 |
'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
949 |
'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh', 'ok', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
950 |
'okay', 'old', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'ought', 'our', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
951 |
'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'particular', 'particularly', 'per', 'perhaps', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
952 |
'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'que', 'quite', 'qv', 'rather', 'rd', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
953 |
're', 'really', 'reasonably', 'regarding', 'regardless', 'regards', 'relatively', 'respectively', 'right', 'said', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
954 |
'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
955 |
'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
956 |
'shouldn\'t', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
957 |
'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
958 |
'sure', 't\'s', 'take', 'taken', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that\'s', |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
959 |
'thats', 'the', 'their', 'theirs', 'them', 'then', 'thence', 'there', 'there\'s', 'thereafter', |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
960 |
'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
961 |
'they\'ve', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
962 |
'thru', 'thus', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
963 |
'twice', 'two', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'upon', 'use', |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
964 |
'used', 'useful', 'uses', 'using', 'usually', 'value', 'various', 'very', |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
965 |
'was', 'wasn\'t', 'way', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'welcome', 'well', 'went', 'were', 'weren\'t', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
966 |
'what', 'what\'s', 'whatever', 'when', 'whence', 'whenever', 'where', 'where\'s', 'whereafter', 'whereas', |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
967 |
'which', 'while', 'who', 'who\'s', 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
968 |
'without', 'won\'t', 'wonder', 'would', 'would', 'wouldn\'t', 'yes', 'yet', 'you', 'you\'d', 'you\'ll', 'you\'re', |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
969 |
'you\'ve', 'your', 'yours', 'zero'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
970 |
return $stopwords; |
1 | 971 |
} |
972 |
||
973 |
?> |