|
1 <?php |
|
2 /*********************************************************************** |
|
3 |
|
4 Copyright (C) 2002-2008 PunBB.org |
|
5 |
|
6 This file is part of PunBB. |
|
7 |
|
8 PunBB is free software; you can redistribute it and/or modify it |
|
9 under the terms of the GNU General Public License as published |
|
10 by the Free Software Foundation; either version 2 of the License, |
|
11 or (at your option) any later version. |
|
12 |
|
13 PunBB is distributed in the hope that it will be useful, but |
|
14 WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
16 GNU General Public License for more details. |
|
17 |
|
18 You should have received a copy of the GNU General Public License |
|
19 along with this program; if not, write to the Free Software |
|
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, |
|
21 MA 02111-1307 USA |
|
22 |
|
23 ************************************************************************/ |
|
24 |
|
25 |
|
26 // The contents of this file are very much inspired by the file functions_search.php |
|
27 // from the phpBB Group forum software phpBB2 (http://www.phpbb.com). |
|
28 |
|
29 |
|
30 // Make sure no one attempts to run this script "directly" |
|
31 if (!defined('PUN')) |
|
32 exit; |
|
33 |
|
34 |
|
35 // |
|
36 // "Cleans up" a text string and returns an array of unique words |
|
37 // This function depends on the current locale setting |
|
38 // |
|
39 function split_words($text) |
|
40 { |
|
41 global $pun_user; |
|
42 static $noise_match, $noise_replace, $stopwords; |
|
43 |
|
44 if (empty($noise_match)) |
|
45 { |
|
46 $noise_match = array('[quote', '[code', '[url', '[img', '[email', '[color', '[colour', 'quote]', 'code]', 'url]', 'img]', 'email]', 'color]', 'colour]', '^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '+', '[', ']', '{', '}', ':', '\\', '/', '=', '#', ';', '!', '*'); |
|
47 $noise_replace = array('', '', '', '', '', '', '', '', '', '', '', '', '', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); |
|
48 |
|
49 $stopwords = (array)@file(PUN_ROOT.'lang/'.$pun_user['language'].'/stopwords.txt'); |
|
50 $stopwords = array_map('trim', $stopwords); |
|
51 } |
|
52 |
|
53 // Clean up |
|
54 $patterns[] = '#&[\#a-z0-9]+?;#i'; |
|
55 $patterns[] = '#\b[\w]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/~]+)?#'; |
|
56 $patterns[] = '#\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]#'; |
|
57 $text = preg_replace($patterns, ' ', ' '.strtolower($text).' '); |
|
58 |
|
59 // Filter out junk |
|
60 $text = str_replace($noise_match, $noise_replace, $text); |
|
61 |
|
62 // Strip out extra whitespace between words |
|
63 $text = trim(preg_replace('#\s+#', ' ', $text)); |
|
64 |
|
65 // Fill an array with all the words |
|
66 $words = explode(' ', $text); |
|
67 |
|
68 if (!empty($words)) |
|
69 { |
|
70 while (list($i, $word) = @each($words)) |
|
71 { |
|
72 $words[$i] = trim($word, '.'); |
|
73 $num_chars = pun_strlen($word); |
|
74 |
|
75 if ($num_chars < 3 || $num_chars > 20 || in_array($words[$i], $stopwords)) |
|
76 unset($words[$i]); |
|
77 } |
|
78 } |
|
79 |
|
80 return array_unique($words); |
|
81 } |
|
82 |
|
83 |
|
84 // |
|
85 // Updates the search index with the contents of $post_id (and $subject) |
|
86 // |
|
87 function update_search_index($mode, $post_id, $message, $subject = null) |
|
88 { |
|
89 global $db_type, $pun_db; |
|
90 |
|
91 // Split old and new post/subject to obtain array of 'words' |
|
92 $words_message = split_words($message); |
|
93 $words_subject = ($subject) ? split_words($subject) : array(); |
|
94 |
|
95 if ($mode == 'edit') |
|
96 { |
|
97 $result = $pun_db->query('SELECT w.id, w.word, m.subject_match FROM '.$pun_db->prefix.'search_words AS w INNER JOIN '.$pun_db->prefix.'search_matches AS m ON w.id=m.word_id WHERE m.post_id='.$post_id, true) or error(__FILE__, __LINE__); |
|
98 |
|
99 // Declare here to stop array_keys() and array_diff() from complaining if not set |
|
100 $cur_words['post'] = array(); |
|
101 $cur_words['subject'] = array(); |
|
102 |
|
103 while ($row = $pun_db->fetch_row($result)) |
|
104 { |
|
105 $match_in = ($row[2]) ? 'subject' : 'post'; |
|
106 $cur_words[$match_in][$row[1]] = $row[0]; |
|
107 } |
|
108 |
|
109 $pun_db->free_result($result); |
|
110 |
|
111 $words['add']['post'] = array_diff($words_message, array_keys($cur_words['post'])); |
|
112 $words['add']['subject'] = array_diff($words_subject, array_keys($cur_words['subject'])); |
|
113 $words['del']['post'] = array_diff(array_keys($cur_words['post']), $words_message); |
|
114 $words['del']['subject'] = array_diff(array_keys($cur_words['subject']), $words_subject); |
|
115 } |
|
116 else |
|
117 { |
|
118 $words['add']['post'] = $words_message; |
|
119 $words['add']['subject'] = $words_subject; |
|
120 $words['del']['post'] = array(); |
|
121 $words['del']['subject'] = array(); |
|
122 } |
|
123 |
|
124 unset($words_message); |
|
125 unset($words_subject); |
|
126 |
|
127 // Get unique words from the above arrays |
|
128 $unique_words = array_unique(array_merge($words['add']['post'], $words['add']['subject'])); |
|
129 |
|
130 if (!empty($unique_words)) |
|
131 { |
|
132 $result = $pun_db->query('SELECT id, word FROM '.$pun_db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $unique_words)).')', true) or error(__FILE__, __LINE__); |
|
133 |
|
134 $word_ids = array(); |
|
135 while ($row = $pun_db->fetch_row($result)) |
|
136 $word_ids[$row[1]] = $row[0]; |
|
137 |
|
138 $pun_db->free_result($result); |
|
139 |
|
140 $new_words = array_diff($unique_words, array_keys($word_ids)); |
|
141 unset($unique_words); |
|
142 |
|
143 if (!empty($new_words)) |
|
144 { |
|
145 switch ($db_type) |
|
146 { |
|
147 case 'mysql': |
|
148 case 'mysqli': |
|
149 $pun_db->query('INSERT INTO '.$pun_db->prefix.'search_words (word) VALUES'.implode(',', preg_replace('#^(.*)$#', '(\'\1\')', $new_words))) or error(__FILE__, __LINE__); |
|
150 break; |
|
151 |
|
152 default: |
|
153 while (list(, $word) = @each($new_words)) |
|
154 $pun_db->query('INSERT INTO '.$pun_db->prefix.'search_words (word) VALUES(\''.$word.'\')') or error(__FILE__, __LINE__); |
|
155 break; |
|
156 } |
|
157 } |
|
158 |
|
159 unset($new_words); |
|
160 } |
|
161 |
|
162 // Delete matches (only if editing a post) |
|
163 while (list($match_in, $wordlist) = @each($words['del'])) |
|
164 { |
|
165 $subject_match = ($match_in == 'subject') ? 1 : 0; |
|
166 |
|
167 if (!empty($wordlist)) |
|
168 { |
|
169 $sql = ''; |
|
170 while (list(, $word) = @each($wordlist)) |
|
171 $sql .= (($sql != '') ? ',' : '').$cur_words[$match_in][$word]; |
|
172 |
|
173 $pun_db->query('DELETE FROM '.$pun_db->prefix.'search_matches WHERE word_id IN('.$sql.') AND post_id='.$post_id.' AND subject_match='.$subject_match) or error(__FILE__, __LINE__); |
|
174 } |
|
175 } |
|
176 |
|
177 // Add new matches |
|
178 while (list($match_in, $wordlist) = @each($words['add'])) |
|
179 { |
|
180 $subject_match = ($match_in == 'subject') ? 1 : 0; |
|
181 |
|
182 if (!empty($wordlist)) |
|
183 $pun_db->query('INSERT INTO '.$pun_db->prefix.'search_matches (post_id, word_id, subject_match) SELECT '.$post_id.', id, '.$subject_match.' FROM '.$pun_db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $wordlist)).')') or error(__FILE__, __LINE__); |
|
184 } |
|
185 |
|
186 unset($words); |
|
187 } |
|
188 |
|
189 |
|
190 // |
|
191 // Strip search index of indexed words in $post_ids |
|
192 // |
|
193 function strip_search_index($post_ids) |
|
194 { |
|
195 global $db_type, $pun_db; |
|
196 |
|
197 switch ($db_type) |
|
198 { |
|
199 case 'mysql': |
|
200 case 'mysqli': |
|
201 { |
|
202 $result = $pun_db->query('SELECT word_id FROM '.$pun_db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id') or error(__FILE__, __LINE__); |
|
203 |
|
204 if ($pun_db->num_rows($result)) |
|
205 { |
|
206 $word_ids = ''; |
|
207 while ($row = $pun_db->fetch_row($result)) |
|
208 $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
|
209 |
|
210 $result = $pun_db->query('SELECT word_id FROM '.$pun_db->prefix.'search_matches WHERE word_id IN('.$word_ids.') GROUP BY word_id HAVING COUNT(word_id)=1') or error(__FILE__, __LINE__); |
|
211 |
|
212 if ($pun_db->num_rows($result)) |
|
213 { |
|
214 $word_ids = ''; |
|
215 while ($row = $pun_db->fetch_row($result)) |
|
216 $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
|
217 |
|
218 $pun_db->query('DELETE FROM '.$pun_db->prefix.'search_words WHERE id IN('.$word_ids.')') or error(__FILE__, __LINE__); |
|
219 } |
|
220 } |
|
221 |
|
222 break; |
|
223 } |
|
224 |
|
225 default: |
|
226 $pun_db->query('DELETE FROM '.$pun_db->prefix.'search_words WHERE id IN(SELECT word_id FROM '.$pun_db->prefix.'search_matches WHERE word_id IN(SELECT word_id FROM '.$pun_db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id) GROUP BY word_id HAVING COUNT(word_id)=1)') or error(__FILE__, __LINE__); |
|
227 break; |
|
228 } |
|
229 |
|
230 $pun_db->query('DELETE FROM '.$pun_db->prefix.'search_matches WHERE post_id IN('.$post_ids.')') or error(__FILE__, __LINE__); |
|
231 } |