includes/paths.php
changeset 1266 f3933b355229
parent 1264 28c82f292a52
child 1274 673a1b6712fa
equal deleted inserted replaced
1265:7091cff2ca01 1266:f3933b355229
   721  														ON ( t.page_id=p.urlname AND t.namespace=p.namespace )
   721  														ON ( t.page_id=p.urlname AND t.namespace=p.namespace )
   722  													WHERE p.namespace=t.namespace
   722  													WHERE p.namespace=t.namespace
   723  														AND ( p.password=\'\' OR p.password=\'da39a3ee5e6b4b0d3255bfef95601890afd80709\' )
   723  														AND ( p.password=\'\' OR p.password=\'da39a3ee5e6b4b0d3255bfef95601890afd80709\' )
   724  														AND p.visible=1;'; // Only indexes "visible" pages
   724  														AND p.visible=1;'; // Only indexes "visible" pages
   725 		return $texts;
   725 		return $texts;
       
   726 	}
       
   727 	
       
   728 	/**
       
   729 	 * Get the unique words on a page. Returns an array listing all items in small array $arr1 that are not in very large array $arr2.
       
   730 	 * @param array
       
   731 	 * @param array
       
   732 	 * @return array
       
   733 	 */
       
   734 	
       
   735 	function get_unique_words($arr1, $arr2)
       
   736 	{
       
   737 		$no = array();
       
   738 		foreach ( $arr2 as $w )
       
   739 		{
       
   740 			if ( ($k = array_search($w, $arr1, true)) !== false )
       
   741 			{
       
   742 				$no[$k] = true;
       
   743 			}
       
   744 		}
       
   745 		$ret = array();
       
   746 		foreach ( $arr1 as $k => $w )
       
   747 		{
       
   748 			if ( !isset($no[$k]) )
       
   749 				$ret[] = $w;
       
   750 		}
       
   751 		return $ret;
   726 	}
   752 	}
   727 	
   753 	
   728 	/**
   754 	/**
   729  	* Builds a word list for search indexing.
   755  	* Builds a word list for search indexing.
   730  	* @param string Text to index
   756  	* @param string Text to index
   861 					// Indexing identifier for the page in the DB
   887 					// Indexing identifier for the page in the DB
   862 					$page_uniqid = "ns={$row['namespace']};pid=" . sanitize_page_id($row['page_id']);
   888 					$page_uniqid = "ns={$row['namespace']};pid=" . sanitize_page_id($row['page_id']);
   863 					$page_uniqid = $db->escape($page_uniqid);
   889 					$page_uniqid = $db->escape($page_uniqid);
   864 					
   890 					
   865 					// List of words on the page
   891 					// List of words on the page
       
   892 					if ( $debug )
       
   893 						echo "wordlist...";
   866 					$wordlist = $this->calculate_word_list($row['page_text'], $row['page_id'], $row['name']);
   894 					$wordlist = $this->calculate_word_list($row['page_text'], $row['page_id'], $row['name']);
   867 					
   895 					
   868 					// Index calculation complete -- run inserts
   896 					// Index calculation complete -- run inserts
   869 					$inserts = array();
   897 					$inserts = array();
       
   898 					$qt = array();
       
   899 					$unique_words = $this->get_unique_words($wordlist, $master_word_list);
   870 					foreach ( $wordlist as $word )
   900 					foreach ( $wordlist as $word )
   871 					{
   901 					{
       
   902 						$qs = microtime_float();
   872 						if ( in_array($word, $stopwords) || strval(intval($word)) === $word || strlen($word) < 3 )
   903 						if ( in_array($word, $stopwords) || strval(intval($word)) === $word || strlen($word) < 3 )
   873 							continue;
   904 							continue;
   874 						$word_db = $db->escape($word);
   905 						$word_db = $db->escape($word);
   875 						$word_db_lc = $db->escape(strtolower($word));
   906 						$word_db_lc = $db->escape(strtolower($word));
   876 						if ( !in_array($word, $master_word_list) )
   907 						if ( in_array($word, $unique_words) )
   877 						{
   908 						{
   878 							$inserts[] = "( '$word_db', '$word_db_lc', '$page_uniqid' )";
   909 							$inserts[] = "( '$word_db', '$word_db_lc', '$page_uniqid' )";
   879 						}
   910 						}
   880 						else
   911 						else
   881 						{
   912 						{
   886 													"page_names || ',$page_uniqid'";
   917 													"page_names || ',$page_uniqid'";
   887 							$q = $db->sql_query('UPDATE ' . table_prefix . "search_index SET page_names = $pid_col WHERE word = '$word_db';", false);
   918 							$q = $db->sql_query('UPDATE ' . table_prefix . "search_index SET page_names = $pid_col WHERE word = '$word_db';", false);
   888 							if ( !$q )
   919 							if ( !$q )
   889 								$db->_die();
   920 								$db->_die();
   890 						}
   921 						}
       
   922 						$qt[] = microtime_float() - $qs;
   891 					}
   923 					}
       
   924 					if ( $debug && count($qt) > 0 )
       
   925 						echo "QT: " . number_format(array_sum($qt) / count($qt), 4) . " * " . count($qt) . '; wl_len: ' . count($master_word_list) .' ';
   892 					if ( count($inserts) > 0 )
   926 					if ( count($inserts) > 0 )
   893 					{
   927 					{
   894 						if ( $verbose && $debug )
   928 						if ( $verbose && $debug )
   895 							echo 'i';
   929 							echo 'i';
   896 						$inserts = implode(",\n  ", $inserts);
   930 						$inserts = implode(",\n  ", $inserts);
   897 						$q = $db->sql_query('INSERT INTO ' . table_prefix . "search_index(word, word_lcase, page_names) VALUES\n  $inserts;", false);
   931 						$q = $db->sql_query('INSERT INTO ' . table_prefix . "search_index(word, word_lcase, page_names) VALUES\n  $inserts;", false);
   898 						if ( !$q )
   932 						if ( !$q )
   899 							$db->_die();
   933 							$db->_die();
   900 					}
   934 					}
   901 					
   935 					
   902 					$master_word_list = array_unique(array_merge($master_word_list, $wordlist));
   936 					$master_word_list = array_merge($master_word_list, $unique_words);
   903 					if ( $verbose )
   937 					if ( $verbose )
   904 					{
   938 					{
   905 						if ( isset($_SERVER['REQUEST_URI']) )
   939 						if ( isset($_SERVER['REQUEST_URI']) )
   906 							echo '<br />';
   940 							echo '<br />';
   907 						echo "\n";
   941 						echo "\n";
   908 					}
   942 					}
   909 					unset($inserts, $wordlist, $page_uniqid, $word_db, $q, $word, $row);
   943 					unset($inserts, $wordlist, $page_uniqid, $word_db, $q, $word, $row, $unique_words);
   910 				}
   944 				}
   911 				while ( $row = $db->fetchrow($texts) );
   945 				while ( $row = $db->fetchrow($texts) );
   912 			}
   946 			}
   913 			$db->free_result($texts);
   947 			$db->free_result($texts);
   914 		}
   948 		}