doxysearch.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * Copyright (C) 1997-2015 by Dimitri van Heesch.
4  *
5  * Permission to use, copy, modify, and distribute this software and its
6  * documentation under the terms of the GNU General Public License is hereby
7  * granted. No representations are made about the suitability of this software
8  * for any purpose. It is provided "as is" without express or implied warranty.
9  * See the GNU General Public License for more details.
10  *
11  * Documents produced by Doxygen are derivative works derived from the
12  * input used in their production; they are not affected by this license.
13  *
14  */
15 
16 // STL includes
17 #include <cstdio>
18 #include <cstdlib>
19 #include <string>
20 #include <vector>
21 #include <sstream>
22 #include <iostream>
23 #include <fstream>
24 #include <string>
25 #include <algorithm>
26 
27 // Xapian includes
28 #include <xapian.h>
29 
30 #ifdef _WIN32
31 #include <windows.h>
32 #else
33 #include <sys/stat.h>
34 #endif
35 
36 #define FIELD_TYPE 1
37 #define FIELD_NAME 2
38 #define FIELD_ARGS 3
39 #define FIELD_TAG 4
40 #define FIELD_URL 5
41 #define FIELD_KEYW 6
42 #define FIELD_DOC 7
43 
44 #define HEX2DEC(x) (((x)>='0' && (x)<='9')?((x)-'0'):\
45  ((x)>='a' && (x)<='f')?((x)-'a'+10):\
46  ((x)>='A' && (x)<='F')?((x)-'A'+10):-1)
47 
48 
49 bool dirExists(const std::string& dirName)
50 {
51 #ifdef _WIN32
52  DWORD ftyp = GetFileAttributesA(dirName.c_str());
53  if (ftyp == INVALID_FILE_ATTRIBUTES)
54  return false; //something is wrong with your path!
55 
56  if (ftyp & FILE_ATTRIBUTE_DIRECTORY)
57  return true; // this is a directory!
58 #else
59  struct stat sb;
60 
61  if (stat(dirName.c_str(), &sb)==0 && S_ISDIR(sb.st_mode))
62  {
63  return true;
64  }
65 #endif
66 
67  return false;
68 }
69 
70 
71 /** decodes a URI encoded string into a normal string. */
72 static std::string uriDecode(const std::string & sSrc)
73 {
74  // Note from RFC1630: "Sequences which start with a percent
75  // sign but are not followed by two hexadecimal characters
76  // (0-9, A-F) are reserved for future extension"
77 
78  const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
79  const int SRC_LEN = sSrc.length();
80  const unsigned char * const SRC_END = pSrc + SRC_LEN;
81  // last decodable '%'
82  const unsigned char * const SRC_LAST_DEC = SRC_END - 2;
83 
84  char * const pStart = new char[SRC_LEN];
85  char * pEnd = pStart;
86 
87  while (pSrc < SRC_LAST_DEC)
88  {
89  if (*pSrc == '%') // replace %2A with corresponding ASCII character
90  {
91  char dec1, dec2;
92  unsigned char c1=*(pSrc+1);
93  unsigned char c2=*(pSrc+2);
94  if (-1 != (dec1 = HEX2DEC(c1))
95  && -1 != (dec2 = HEX2DEC(c2)))
96  {
97  *pEnd++ = (dec1 << 4) + dec2;
98  pSrc += 3;
99  continue;
100  }
101  }
102  else if (*pSrc == '+') // replace '+' with space
103  {
104  *pEnd++ = ' '; pSrc++;
105  continue;
106  }
107  *pEnd++ = *pSrc++;
108  }
109 
110  // the last 2- chars
111  while (pSrc < SRC_END) *pEnd++ = *pSrc++;
112 
113  std::string sResult(pStart, pEnd);
114  delete [] pStart;
115  return sResult;
116 }
117 
118 /** return list of strings that result when splitting \a s using
119  * delimiter \a delim
120  */
121 static std::vector<std::string> split(const std::string &s, char delim)
122 {
123  std::vector<std::string> elems;
124  std::stringstream ss(s);
125  std::string item;
126  while (getline(ss, item, delim)) elems.push_back(item);
127  return elems;
128 }
129 
130 /** Read type T from string \a s */
131 template<class T>
133 {
134  std::istringstream stream (s);
135  T t;
136  stream >> t;
137  return t;
138 }
139 
140 /** Class that holds the starting position of a word */
142 {
143  WordPosition(int s,int i) : start(s), index(i) {}
144  int start;
145  int index;
146 };
147 
148 /** Class representing the '<' operator for WordPosition objects based on position. */
150 {
151  bool operator()(const WordPosition &p1,const WordPosition &p2)
152  {
153  return p1.start<p2.start;
154  }
155 };
156 
157 /** Class that holds a text fragment */
158 struct Fragment
159 {
160  Fragment(const std::string &t,int occ) : text(t), occurrences(occ) {}
163 };
164 
165 /** Class representing the '>' operator for Fragment objects based on occurrence. */
167 {
168  bool operator()(const Fragment &p1,const Fragment &p2)
169  {
170  return p1.occurrences>p2.occurrences;
171  }
172 };
173 
174 /** Class representing a range within a string */
175 struct Range
176 {
177  Range(int s,int e) : start(s), end(e) {}
178  int start;
179  int end;
180 };
181 
182 /** Returns true if [start..start+len] is inside one of the \a ranges. */
183 static bool insideRange(const std::vector<Range> &ranges,int start,int len)
184 {
185  for (std::vector<Range>::const_iterator it = ranges.begin();
186  it!=ranges.end(); ++it
187  )
188  {
189  Range r = *it;
190  if (start>=r.start && start+len<r.end)
191  {
192  return true;
193  }
194  }
195  return false;
196 }
197 
198 /** Returns a list of text \a fragments from \a s containing one or
199  * more \a words. The list is sorted according to the
200  * number of occurrences of words within the fragment.
201  */
202 static void highlighter(const std::string &s,
203  const std::vector<std::string> &words,
204  std::vector<Fragment> &fragments)
205 {
206  const std::string spanStart="<span class=\"hl\">";
207  const std::string spanEnd="</span>";
208  const std::string dots="...";
209  const int fragLen = 60;
210  int sl=s.length();
211 
212  // find positions of words in s
213  size_t j=0;
214  std::vector<WordPosition> positions;
215  for (std::vector<std::string>::const_iterator it=words.begin();
216  it!=words.end();
217  ++it,++j
218  )
219  {
220  int pos=0;
221  size_t i;
222  std::string word = *it;
223  while ((i=s.find(word,pos))!=std::string::npos)
224  {
225  positions.push_back(WordPosition(i,j));
226  pos=i+word.length();
227  }
228  }
229  // sort on position
230  std::sort(positions.begin(),positions.end(),WordPosition_less());
231  // get fragments around words
232  std::vector<Range> ranges;
233  for (std::vector<WordPosition>::const_iterator it=positions.begin();
234  it!=positions.end();
235  ++it)
236  {
237  WordPosition wp = *it;
238  std::string w = words[wp.index];
239  int i=wp.start;
240  int wl=w.length();
241  if (!insideRange(ranges,i,wl))
242  {
243  if (wl>fragLen)
244  {
245  fragments.push_back(Fragment(spanStart+w+spanEnd,1));
246  ranges.push_back(Range(i,i+wl));
247  }
248  else
249  {
250  std::string startFragment,endFragment;
251  int bi=i-(fragLen-wl)/2;
252  int ei=i+wl+(fragLen-wl)/2;
253  int occ=0;
254  if (bi<0) { ei-=bi; bi=0; } else startFragment=dots;
255  if (ei>sl) { ei=sl; } else endFragment=dots;
256  while (bi>0 && !isspace(s[bi])) bi--; // round to start of the word
257  while (ei<sl && !isspace(s[ei])) ei++; // round to end of the word
258  // highlight any word in s between indexes bi and ei
259  std::string fragment=startFragment;
260  int pos=bi;
261  for (std::vector<WordPosition>::const_iterator it2=positions.begin();
262  it2!=positions.end();
263  ++it2)
264  {
265  WordPosition wp2 = *it2;
266  std::string w2 = words[wp2.index];
267  int wl2 = w2.length();
268  if (wp2.start>=bi && wp2.start+wl2<=ei) // word is inside the range!
269  {
270  fragment+=s.substr(pos,wp2.start-pos)+
271  spanStart+
272  s.substr(wp2.start,wl2)+
273  spanEnd;
274  pos=wp2.start+wl2;
275  occ++;
276  }
277  }
278  fragment+=s.substr(pos,ei-pos)+endFragment;
279  fragments.push_back(Fragment(fragment,occ));
280  ranges.push_back(Range(bi,ei));
281  }
282  }
283  }
284  std::sort(fragments.begin(),fragments.end(),Fragment_greater());
285 }
286 
287 /** Escapes a string such that is can be included in a JSON structure */
289 {
290  std::stringstream dst;
291  for (unsigned int i=0;i<s.length();i++)
292  {
293  char ch = s[i];
294  switch (ch)
295  {
296  case '\"': dst << "\\\""; break;
297  default: dst << ch; break;
298  }
299  }
300  return dst.str();
301 }
302 
303 static void showError(const std::string &callback,const std::string &error)
304 {
305  std::cout << callback << "({\"error\":\"" << error << "\"})";
306  exit(0);
307 }
308 
309 /** Main routine */
310 int main(int argc,char **argv)
311 {
312  // process inputs that were passed to us via QUERY_STRING
313  std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n";
314  std::string callback;
315  try
316  {
317  // get input parameters
318  const char *queryEnv = getenv("QUERY_STRING");
319  std::string queryString;
320  if (queryEnv)
321  {
322  queryString = queryEnv;
323  }
324  else if (argc>=2)
325  {
326  queryString = argv[1];
327  }
328  else
329  {
330  std::cout << "No input!\n";
331  exit(1);
332  }
333 
334  // parse query string
335  std::vector<std::string> parts = split(queryString,'&');
336  std::string searchFor,callback;
337  int num=1,page=0;
338  for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it)
339  {
340  std::vector<std::string> kv = split(*it,'=');
341  if (kv.size()==2)
342  {
343  std::string val = uriDecode(kv[1]);
344  if (kv[0]=="q") searchFor = val;
345  else if (kv[0]=="n") num = fromString<int>(val);
346  else if (kv[0]=="p") page = fromString<int>(val);
347  else if (kv[0]=="cb") callback = val;
348  }
349  }
350 
351  std::string indexDir = "doxysearch.db";
352 
353  if (queryString=="test") // user test
354  {
355  bool dbOk = dirExists(indexDir);
356  if (dbOk)
357  {
358  std::cout << "Test successful.";
359  }
360  else
361  {
362  std::cout << "Test failed: cannot find search index " << indexDir;
363  }
364  exit(0);
365  }
366 
367  // create query
368  Xapian::Database db(indexDir);
369  Xapian::Enquire enquire(db);
370  Xapian::Query query;
371  std::vector<std::string> words = split(searchFor,' ');
372  for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it)
373  {
374  query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it));
375  }
376  enquire.set_query(query);
377 
378  // get results
379  Xapian::MSet matches = enquire.get_mset(page*num,num);
380  unsigned int hits = matches.get_matches_estimated();
381  unsigned int offset = page*num;
382  unsigned int pages = num>0 ? (hits+num-1)/num : 0;
383  if (offset>hits) offset=hits;
384  if (offset+num>hits) num=hits-offset;
385 
386  // write results as JSONP
387  std::cout << callback.c_str() << "(";
388  std::cout << "{" << std::endl
389  << " \"hits\":" << hits << "," << std::endl
390  << " \"first\":" << offset << "," << std::endl
391  << " \"count\":" << num << "," << std::endl
392  << " \"page\":" << page << "," << std::endl
393  << " \"pages\":" << pages << "," << std::endl
394  << " \"query\": \"" << escapeString(searchFor) << "\"," << std::endl
395  << " \"items\":[" << std::endl;
396  // foreach search result
397  unsigned int o = offset;
398  for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o)
399  {
400  std::vector<Fragment> hl;
401  Xapian::Document doc = i.get_document();
402  highlighter(doc.get_value(FIELD_DOC),words,hl);
403  std::cout << " {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl
404  << " \"name\": \"" << doc.get_value(FIELD_NAME) << escapeString(doc.get_value(FIELD_ARGS)) << "\"," << std::endl
405  << " \"tag\": \"" << doc.get_value(FIELD_TAG) << "\"," << std::endl
406  << " \"url\": \"" << doc.get_value(FIELD_URL) << "\"," << std::endl;
407  std::cout << " \"fragments\":[" << std::endl;
408  int c=0;
409  bool first=true;
410  for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c)
411  {
412  if (!first) std::cout << "," << std::endl;
413  std::cout << " \"" << escapeString((*it).text) << "\"";
414  first=false;
415  }
416  if (!first) std::cout << std::endl;
417  std::cout << " ]" << std::endl;
418  std::cout << " }";
419  if (o<offset+num-1) std::cout << ",";
420  std::cout << std::endl;
421  }
422  std::cout << " ]" << std::endl << "})" << std::endl;
423  }
424  catch (const Xapian::Error &e) // Xapian exception
425  {
426  showError(callback,e.get_description());
427  }
428  catch (...) // Any other exception
429  {
430  showError(callback,"Unknown Exception!");
431  exit(1);
432  }
433  return 0;
434 }
static std::string escapeString(const std::string &s)
Definition: doxysearch.cpp:288
end
while True: pbar.update(maxval-len(onlies[E][S])) #print iS, "/", len(onlies[E][S]) found = False for...
Range(int s, int e)
Definition: doxysearch.cpp:177
std::string text
Definition: doxysearch.cpp:161
string delim()
Definition: fcldump.cxx:40
#define FIELD_URL
Definition: doxysearch.cpp:40
#define FIELD_TAG
Definition: doxysearch.cpp:39
std::string string
Definition: nybbler.cc:12
error
Definition: include.cc:26
#define FIELD_TYPE
Definition: doxysearch.cpp:36
intermediate_table::const_iterator const_iterator
#define HEX2DEC(x)
Definition: doxysearch.cpp:44
#define FIELD_ARGS
Definition: doxysearch.cpp:38
static void highlighter(const std::string &s, const std::vector< std::string > &words, std::vector< Fragment > &fragments)
Definition: doxysearch.cpp:202
static std::string uriDecode(const std::string &sSrc)
Definition: doxysearch.cpp:72
const double e
T fromString(const std::string &s)
Definition: doxysearch.cpp:132
int start
Definition: doxysearch.cpp:178
nvidia::inferenceserver::client::Error Error
Definition: triton_utils.h:15
int occurrences
Definition: doxysearch.cpp:162
std::string getenv(std::string const &name)
Definition: getenv.cc:15
static std::vector< std::string > split(const std::string &s, char delim)
Definition: doxysearch.cpp:121
Fragment(const std::string &t, int occ)
Definition: doxysearch.cpp:160
static void showError(const std::string &callback, const std::string &error)
Definition: doxysearch.cpp:303
bool dirExists(const std::string &dirName)
Definition: doxysearch.cpp:49
QCString doc
bool operator()(const Fragment &p1, const Fragment &p2)
Definition: doxysearch.cpp:168
static bool insideRange(const std::vector< Range > &ranges, int start, int len)
Definition: doxysearch.cpp:183
int main(int argc, char **argv)
Definition: doxysearch.cpp:310
int end
Definition: doxysearch.cpp:179
#define FIELD_NAME
Definition: doxysearch.cpp:37
query_result< Args... > query(sqlite3 *db, std::string const &ddl)
Definition: select.h:75
WordPosition(int s, int i)
Definition: doxysearch.cpp:143
bool operator()(const WordPosition &p1, const WordPosition &p2)
Definition: doxysearch.cpp:151
#define FIELD_DOC
Definition: doxysearch.cpp:42
static QCString * s
Definition: config.cpp:1042
union ptb::content::word::word word
QTextStream & endl(QTextStream &s)