search_functions.php
Go to the documentation of this file.
1 <script language="PHP">
2 require_once "search_config.php";
3 
4 function end_form($value)
5 {
6  global $config;
7  global $translator;
8  if ($config['DISABLE_INDEX'] == false)
9  {
10  echo " <input type=\"text\" id=\"MSearchField\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"S\" onfocus=\"searchBox.OnSearchFieldFocus(true)\" onblur=\"searchBox.OnSearchFieldFocus(false)\"/>\n </form>\n </div><div class=\"right\"></div>\n </div>\n </li>\n </ul>\n </div>\n</div>\n";
11  }
12  if ($config['GENERATE_TREEVIEW'])
13  {
14  echo $translator['split_bar'];
15  }
16 }
17 
18 function end_page()
19 {
20  echo "</body></html>";
21 }
22 
23 function search_results()
24 {
25  global $translator;
26  return $translator['search_results_title'];
27 }
28 
29 function matches_text($num)
30 {
31  global $translator;
32  $string = $translator['search_results'][($num>2)?2:$num];
33  // The eval is used so that translator strings can contain $num.
34  eval("\$result = \"$string\";");
35  return $result;
36 }
37 
38 function report_matches()
39 {
40  global $translator;
41  return $translator['search_matches'];
42 }
43 
44 function readInt($file)
45 {
46  $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
47  $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
48  return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
49 }
50 
51 function readString($file)
52 {
53  $result="";
54  while (ord($c=fgetc($file))) $result.=$c;
55  return $result;
56 }
57 
58 function readHeader($file)
59 {
60  $header =fgetc($file); $header.=fgetc($file);
61  $header.=fgetc($file); $header.=fgetc($file);
62  return $header;
63 }
64 
65 function computeIndex($word)
66 {
67  // Simple hashing that allows for substring search
68  if (strlen($word)<2) return -1;
69  // high char of the index
70  $hi = ord($word{0});
71  if ($hi==0) return -1;
72  // low char of the index
73  $lo = ord($word{1});
74  if ($lo==0) return -1;
75  // return index
76  return $hi*256+$lo;
77 }
78 
79 function search($file,$word,&$statsList)
80 {
81  $index = computeIndex($word);
82  if ($index!=-1) // found a valid index
83  {
84  fseek($file,$index*4+4); // 4 bytes per entry, skip header
85  $index = readInt($file);
86  if ($index) // found words matching the hash key
87  {
88  $start=sizeof($statsList);
89  $count=$start;
90  fseek($file,$index);
91  $w = readString($file);
92  while ($w)
93  {
94  $statIdx = readInt($file);
95  if ($word==substr($w,0,strlen($word)))
96  { // found word that matches (as substring)
97  $statsList[$count++]=array(
98  "word"=>$word,
99  "match"=>$w,
100  "index"=>$statIdx,
101  "full"=>strlen($w)==strlen($word),
102  "docs"=>array()
103  );
104  }
105  $w = readString($file);
106  }
107  $totalHi=0;
108  $totalFreqHi=0;
109  $totalFreqLo=0;
110  for ($count=$start;$count<sizeof($statsList);$count++)
111  {
112  $statInfo = &$statsList[$count];
113  $multiplier = 1;
114  // whole word matches have a double weight
115  if ($statInfo["full"]) $multiplier=2;
116  fseek($file,$statInfo["index"]);
117  $numDocs = readInt($file);
118  $docInfo = array();
119  // read docs info + occurrence frequency of the word
120  for ($i=0;$i<$numDocs;$i++)
121  {
122  $idx=readInt($file);
123  $freq=readInt($file);
124  $docInfo[$i]=array("idx" => $idx,
125  "freq" => $freq>>1,
126  "rank" => 0.0,
127  "hi" => $freq&1
128  );
129  if ($freq&1) // word occurs in high priority doc
130  {
131  $totalHi++;
132  $totalFreqHi+=$freq*$multiplier;
133  }
134  else // word occurs in low priority doc
135  {
136  $totalFreqLo+=$freq*$multiplier;
137  }
138  }
139  // read name and url info for the doc
140  for ($i=0;$i<$numDocs;$i++)
141  {
142  fseek($file,$docInfo[$i]["idx"]);
143  $docInfo[$i]["name"]=readString($file);
144  $docInfo[$i]["url"]=readString($file);
145  }
146  $statInfo["docs"]=$docInfo;
147  }
148  $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
149  for ($count=$start;$count<sizeof($statsList);$count++)
150  {
151  $statInfo = &$statsList[$count];
152  $multiplier = 1;
153  // whole word matches have a double weight
154  if ($statInfo["full"]) $multiplier=2;
155  for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
156  {
157  $docInfo = &$statInfo["docs"];
158  // compute frequency rank of the word in each doc
159  $freq=$docInfo[$i]["freq"];
160  if ($docInfo[$i]["hi"])
161  {
162  $statInfo["docs"][$i]["rank"]=
163  (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
164  }
165  else
166  {
167  $statInfo["docs"][$i]["rank"]=
168  (float)($freq*$multiplier)/$totalFreq;
169  }
170  }
171  }
172  }
173  }
174  return $statsList;
175 }
176 
177 function combine_results($results,&$docs)
178 {
179  foreach ($results as $wordInfo)
180  {
181  $docsList = &$wordInfo["docs"];
182  foreach ($docsList as $di)
183  {
184  $key=$di["url"];
185  $rank=$di["rank"];
186  if (isset($docs[$key]))
187  {
188  $docs[$key]["rank"]+=$rank;
189  }
190  else
191  {
192  $docs[$key] = array("url"=>$key,
193  "name"=>$di["name"],
194  "rank"=>$rank
195  );
196  }
197  $docs[$key]["words"][] = array(
198  "word"=>$wordInfo["word"],
199  "match"=>$wordInfo["match"],
200  "freq"=>$di["freq"]
201  );
202  }
203  }
204  return $docs;
205 }
206 
207 function filter_results($docs,&$requiredWords,&$forbiddenWords)
208 {
209  $filteredDocs=array();
210  while (list ($key, $val) = each ($docs))
211  {
212  $words = &$docs[$key]["words"];
213  $copy=1; // copy entry by default
214  if (sizeof($requiredWords)>0)
215  {
216  foreach ($requiredWords as $reqWord)
217  {
218  $found=0;
219  foreach ($words as $wordInfo)
220  {
221  $found = $wordInfo["word"]==$reqWord;
222  if ($found) break;
223  }
224  if (!$found)
225  {
226  $copy=0; // document contains none of the required words
227  break;
228  }
229  }
230  }
231  if (sizeof($forbiddenWords)>0)
232  {
233  foreach ($words as $wordInfo)
234  {
235  if (in_array($wordInfo["word"],$forbiddenWords))
236  {
237  $copy=0; // document contains a forbidden word
238  break;
239  }
240  }
241  }
242  if ($copy) $filteredDocs[$key]=$docs[$key];
243  }
244  return $filteredDocs;
245 }
246 
247 function compare_rank($a,$b)
248 {
249  if ($a["rank"] == $b["rank"])
250  {
251  return 0;
252  }
253  return ($a["rank"]>$b["rank"]) ? -1 : 1;
254 }
255 
256 function sort_results($docs,&$sorted)
257 {
258  $sorted = $docs;
259  usort($sorted,"compare_rank");
260  return $sorted;
261 }
262 
263 function report_results(&$docs)
264 {
265  echo "<div class=\"header\">";
266  echo " <div class=\"headertitle\">\n";
267  echo " <h1>".search_results()."</h1>\n";
268  echo " </div>\n";
269  echo "</div>\n";
270  echo "<div class=\"searchresults\">\n";
271  echo "<table cellspacing=\"2\">\n";
272  $numDocs = sizeof($docs);
273  if ($numDocs==0)
274  {
275  echo " <tr>\n";
276  echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
277  echo " </tr>\n";
278  }
279  else
280  {
281  echo " <tr>\n";
282  echo " <td colspan=\"2\">".matches_text($numDocs);
283  echo "\n";
284  echo " </td>\n";
285  echo " </tr>\n";
286  $num=1;
287  foreach ($docs as $doc)
288  {
289  echo " <tr>\n";
290  echo " <td align=\"right\">$num.</td>";
291  echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
292  echo " <tr>\n";
293  echo " <td></td><td class=\"tiny\">".report_matches()." ";
294  foreach ($doc["words"] as $wordInfo)
295  {
296  $word = $wordInfo["word"];
297  $matchRight = substr($wordInfo["match"],strlen($word));
298  echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
299  }
300  echo " </td>\n";
301  echo " </tr>\n";
302  $num++;
303  }
304  }
305  echo "</table>\n";
306  echo "</div>\n";
307 }
308 
309 function run_query($query)
310 {
311  if(strcmp('4.1.0', phpversion()) > 0)
312  {
313  die("Error: PHP version 4.1.0 or above required!");
314  }
315  if (!($file=fopen("search/search.idx","rb")))
316  {
317  die("Error: Search index file could NOT be opened!");
318  }
319  if (readHeader($file)!="DOXS")
320  {
321  die("Error: Header of index file is invalid!");
322  }
323  $results = array();
324  $requiredWords = array();
325  $forbiddenWords = array();
326  $foundWords = array();
327  $word=strtok($query," ");
328  while ($word) // for each word in the search query
329  {
330  if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
331  if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
332  if (!in_array($word,$foundWords))
333  {
334  $foundWords[]=$word;
335  search($file,strtolower($word),$results);
336  }
337  $word=strtok(" ");
338  }
339  fclose($file);
340  $docs = array();
341  combine_results($results,$docs);
342  // filter out documents with forbidden word or that do not contain
343  // required words
344  $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
345  // sort the results based on rank
346  $sorted = array();
347  sort_results($filteredDocs,$sorted);
348  return $sorted;
349 }
350 
351 function main()
352 {
353  $query = "";
354  if (array_key_exists("query", $_GET))
355  {
356  $query=$_GET["query"];
357  }
358  $sorted = run_query($query);
359  // Now output the HTML stuff...
360  // End the HTML form
361  end_form(preg_replace("/[^a-zA-Z0-9\-\_\.\x80-\xFF]/i", " ", $query ));
362  // report results to the user
363  report_results($sorted);
364  end_page();
365 }
366 </script>
static SrcLangExt language
Definition: scanner.cpp:10895
static constexpr double as
Definition: Units.h:101
Definition: search.py:1
auto array(Array const &a)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:228
int strcmp(const String &s1, const String &s2)
Definition: relates.cpp:14