Classes | Public Types | Public Member Functions | Private Types | Private Member Functions | Static Private Member Functions | Private Attributes | Static Private Attributes | Friends | List of all members
internal::GenericRegex< Encoding, Allocator > Class Template Reference

Regular expression engine with subset of ECMAscript grammar. More...

#include <regex.h>

Classes

struct  Frag
 
struct  Range
 
struct  State
 

Public Types

typedef Encoding EncodingType
 
typedef Encoding::Ch Ch
 

Public Member Functions

 GenericRegex (const Ch *source, Allocator *allocator=0)
 
 ~GenericRegex ()
 
bool IsValid () const
 

Private Types

enum  Operator {
  kZeroOrOne, kZeroOrMore, kOneOrMore, kConcatenation,
  kAlternation, kLeftParenthesis
}
 

Private Member Functions

StateGetState (SizeType index)
 
const StateGetState (SizeType index) const
 
RangeGetRange (SizeType index)
 
const RangeGetRange (SizeType index) const
 
template<typename InputStream >
void Parse (DecodedStream< InputStream, Encoding > &ds)
 
SizeType NewState (SizeType out, SizeType out1, unsigned codepoint)
 
void PushOperand (Stack< Allocator > &operandStack, unsigned codepoint)
 
void ImplicitConcatenation (Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
 
SizeType Append (SizeType l1, SizeType l2)
 
void Patch (SizeType l, SizeType s)
 
bool Eval (Stack< Allocator > &operandStack, Operator op)
 
bool EvalQuantifier (Stack< Allocator > &operandStack, unsigned n, unsigned m)
 
void CloneTopOperand (Stack< Allocator > &operandStack)
 
template<typename InputStream >
bool ParseUnsigned (DecodedStream< InputStream, Encoding > &ds, unsigned *u)
 
template<typename InputStream >
bool ParseRange (DecodedStream< InputStream, Encoding > &ds, SizeType *range)
 
SizeType NewRange (unsigned codepoint)
 
template<typename InputStream >
bool CharacterEscape (DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
 

Static Private Member Functions

static SizeType Min (SizeType a, SizeType b)
 

Private Attributes

Stack< Allocator > states_
 
Stack< Allocator > ranges_
 
SizeType root_
 
SizeType stateCount_
 
SizeType rangeCount_
 
bool anchorBegin_
 
bool anchorEnd_
 

Static Private Attributes

static const unsigned kAnyCharacterClass = 0xFFFFFFFF
 For '.'. More...
 
static const unsigned kRangeCharacterClass = 0xFFFFFFFE
 
static const unsigned kRangeNegationFlag = 0x80000000
 
static const unsigned kInfinityQuantifier = ~0u
 

Friends

template<typename , typename >
class GenericRegexSearch
 

Detailed Description

template<typename Encoding, typename Allocator = CrtAllocator>
class internal::GenericRegex< Encoding, Allocator >

Regular expression engine with subset of ECMAscript grammar.

Supported regular expression syntax:

Note
This is a Thompson NFA engine, implemented with reference to Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", https://swtch.com/~rsc/regexp/regexp1.html

Definition at line 114 of file regex.h.

Member Typedef Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
typedef Encoding::Ch internal::GenericRegex< Encoding, Allocator >::Ch

Definition at line 117 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
typedef Encoding internal::GenericRegex< Encoding, Allocator >::EncodingType

Definition at line 116 of file regex.h.

Member Enumeration Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
enum internal::GenericRegex::Operator
private
Enumerator
kZeroOrOne 
kZeroOrMore 
kOneOrMore 
kConcatenation 
kAlternation 
kLeftParenthesis 

Definition at line 136 of file regex.h.

Constructor & Destructor Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
internal::GenericRegex< Encoding, Allocator >::GenericRegex ( const Ch source,
Allocator *  allocator = 0 
)
inline

Definition at line 120 of file regex.h.

120  :
121  states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
123  {
125  DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
126  Parse(ds);
127  }
SizeType stateCount_
Definition: regex.h:588
Read-only string stream.
Definition: fwd.h:47
Stack< Allocator > ranges_
Definition: regex.h:586
SizeType rangeCount_
Definition: regex.h:589
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition: regex.h:190
Stack< Allocator > states_
Definition: regex.h:585
template<typename Encoding , typename Allocator = CrtAllocator>
internal::GenericRegex< Encoding, Allocator >::~GenericRegex ( )
inline

Definition at line 129 of file regex.h.

129 {}

Member Function Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::Append ( SizeType  l1,
SizeType  l2 
)
inlineprivate

Definition at line 339 of file regex.h.

339  {
340  SizeType old = l1;
341  while (GetState(l1).out != kRegexInvalidState)
342  l1 = GetState(l1).out;
343  GetState(l1).out = l2;
344  return old;
345  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:156
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:169
template<typename Encoding , typename Allocator = CrtAllocator>
template<typename InputStream >
bool internal::GenericRegex< Encoding, Allocator >::CharacterEscape ( DecodedStream< InputStream, Encoding > &  ds,
unsigned *  escapedCodepoint 
)
inlineprivate

Definition at line 557 of file regex.h.

557  {
558  unsigned codepoint;
559  switch (codepoint = ds.Take()) {
560  case '^':
561  case '$':
562  case '|':
563  case '(':
564  case ')':
565  case '?':
566  case '*':
567  case '+':
568  case '.':
569  case '[':
570  case ']':
571  case '{':
572  case '}':
573  case '\\':
574  *escapedCodepoint = codepoint; return true;
575  case 'f': *escapedCodepoint = 0x000C; return true;
576  case 'n': *escapedCodepoint = 0x000A; return true;
577  case 'r': *escapedCodepoint = 0x000D; return true;
578  case 't': *escapedCodepoint = 0x0009; return true;
579  case 'v': *escapedCodepoint = 0x000B; return true;
580  default:
581  return false; // Unsupported escape character
582  }
583  }
template<typename Encoding , typename Allocator = CrtAllocator>
void internal::GenericRegex< Encoding, Allocator >::CloneTopOperand ( Stack< Allocator > &  operandStack)
inlineprivate

Definition at line 449 of file regex.h.

449  {
450  const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
451  SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
452  State* s = states_.template Push<State>(count);
453  memcpy(s, &GetState(src.minIndex), count * sizeof(State));
454  for (SizeType j = 0; j < count; j++) {
455  if (s[j].out != kRegexInvalidState)
456  s[j].out += count;
457  if (s[j].out1 != kRegexInvalidState)
458  s[j].out1 += count;
459  }
460  *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
461  stateCount_ += count;
462  }
SizeType stateCount_
Definition: regex.h:588
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:169
static QCString * s
Definition: config.cpp:1042
Stack< Allocator > states_
Definition: regex.h:585
template<typename Encoding , typename Allocator = CrtAllocator>
bool internal::GenericRegex< Encoding, Allocator >::Eval ( Stack< Allocator > &  operandStack,
Operator  op 
)
inlineprivate

Definition at line 354 of file regex.h.

354  {
355  switch (op) {
356  case kConcatenation:
357  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
358  {
359  Frag e2 = *operandStack.template Pop<Frag>(1);
360  Frag e1 = *operandStack.template Pop<Frag>(1);
361  Patch(e1.out, e2.start);
362  *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
363  }
364  return true;
365 
366  case kAlternation:
367  if (operandStack.GetSize() >= sizeof(Frag) * 2) {
368  Frag e2 = *operandStack.template Pop<Frag>(1);
369  Frag e1 = *operandStack.template Pop<Frag>(1);
370  SizeType s = NewState(e1.start, e2.start, 0);
371  *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
372  return true;
373  }
374  return false;
375 
376  case kZeroOrOne:
377  if (operandStack.GetSize() >= sizeof(Frag)) {
378  Frag e = *operandStack.template Pop<Frag>(1);
379  SizeType s = NewState(kRegexInvalidState, e.start, 0);
380  *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
381  return true;
382  }
383  return false;
384 
385  case kZeroOrMore:
386  if (operandStack.GetSize() >= sizeof(Frag)) {
387  Frag e = *operandStack.template Pop<Frag>(1);
388  SizeType s = NewState(kRegexInvalidState, e.start, 0);
389  Patch(e.out, s);
390  *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
391  return true;
392  }
393  return false;
394 
395  default:
397  if (operandStack.GetSize() >= sizeof(Frag)) {
398  Frag e = *operandStack.template Pop<Frag>(1);
399  SizeType s = NewState(kRegexInvalidState, e.start, 0);
400  Patch(e.out, s);
401  *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
402  return true;
403  }
404  return false;
405  }
406  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
const double e
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:319
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
static SizeType Min(SizeType a, SizeType b)
Definition: regex.h:447
void Patch(SizeType l, SizeType s)
Definition: regex.h:347
SizeType Append(SizeType l1, SizeType l2)
Definition: regex.h:339
static QCString * s
Definition: config.cpp:1042
template<typename Encoding , typename Allocator = CrtAllocator>
bool internal::GenericRegex< Encoding, Allocator >::EvalQuantifier ( Stack< Allocator > &  operandStack,
unsigned  n,
unsigned  m 
)
inlineprivate

Definition at line 408 of file regex.h.

408  {
409  RAPIDJSON_ASSERT(n <= m);
410  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
411 
412  if (n == 0) {
413  if (m == 0) // a{0} not support
414  return false;
415  else if (m == kInfinityQuantifier)
416  Eval(operandStack, kZeroOrMore); // a{0,} -> a*
417  else {
418  Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
419  for (unsigned i = 0; i < m - 1; i++)
420  CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
421  for (unsigned i = 0; i < m - 1; i++)
422  Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
423  }
424  return true;
425  }
426 
427  for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
428  CloneTopOperand(operandStack);
429 
430  if (m == kInfinityQuantifier)
431  Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
432  else if (m > n) {
433  CloneTopOperand(operandStack); // a{3,5} -> a a a a
434  Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
435  for (unsigned i = n; i < m - 1; i++)
436  CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
437  for (unsigned i = n; i < m; i++)
438  Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
439  }
440 
441  for (unsigned i = 0; i < n - 1; i++)
442  Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
443 
444  return true;
445  }
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:354
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
static const unsigned kInfinityQuantifier
Definition: regex.h:591
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition: regex.h:449
std::void_t< T > n
template<typename Encoding , typename Allocator = CrtAllocator>
Range& internal::GenericRegex< Encoding, Allocator >::GetRange ( SizeType  index)
inlineprivate

Definition at line 179 of file regex.h.

179  {
181  return ranges_.template Bottom<Range>()[index];
182  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Stack< Allocator > ranges_
Definition: regex.h:586
SizeType rangeCount_
Definition: regex.h:589
template<typename Encoding , typename Allocator = CrtAllocator>
const Range& internal::GenericRegex< Encoding, Allocator >::GetRange ( SizeType  index) const
inlineprivate

Definition at line 184 of file regex.h.

184  {
186  return ranges_.template Bottom<Range>()[index];
187  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Stack< Allocator > ranges_
Definition: regex.h:586
SizeType rangeCount_
Definition: regex.h:589
template<typename Encoding , typename Allocator = CrtAllocator>
State& internal::GenericRegex< Encoding, Allocator >::GetState ( SizeType  index)
inlineprivate

Definition at line 169 of file regex.h.

169  {
171  return states_.template Bottom<State>()[index];
172  }
SizeType stateCount_
Definition: regex.h:588
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Stack< Allocator > states_
Definition: regex.h:585
template<typename Encoding , typename Allocator = CrtAllocator>
const State& internal::GenericRegex< Encoding, Allocator >::GetState ( SizeType  index) const
inlineprivate

Definition at line 174 of file regex.h.

174  {
176  return states_.template Bottom<State>()[index];
177  }
SizeType stateCount_
Definition: regex.h:588
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Stack< Allocator > states_
Definition: regex.h:585
template<typename Encoding , typename Allocator = CrtAllocator>
void internal::GenericRegex< Encoding, Allocator >::ImplicitConcatenation ( Stack< Allocator > &  atomCountStack,
Stack< Allocator > &  operatorStack 
)
inlineprivate

Definition at line 333 of file regex.h.

333  {
334  if (*atomCountStack.template Top<unsigned>())
335  *operatorStack.template Push<Operator>() = kConcatenation;
336  (*atomCountStack.template Top<unsigned>())++;
337  }
template<typename Encoding , typename Allocator = CrtAllocator>
bool internal::GenericRegex< Encoding, Allocator >::IsValid ( ) const
inline

Definition at line 131 of file regex.h.

131  {
132  return root_ != kRegexInvalidState;
133  }
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
template<typename Encoding , typename Allocator = CrtAllocator>
static SizeType internal::GenericRegex< Encoding, Allocator >::Min ( SizeType  a,
SizeType  b 
)
inlinestaticprivate

Definition at line 447 of file regex.h.

447 { return a < b ? a : b; }
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1124
static bool * b
Definition: config.cpp:1043
template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::NewRange ( unsigned  codepoint)
inlineprivate

Definition at line 549 of file regex.h.

549  {
550  Range* r = ranges_.template Push<Range>();
551  r->start = r->end = codepoint;
552  r->next = kRegexInvalidRange;
553  return rangeCount_++;
554  }
Stack< Allocator > ranges_
Definition: regex.h:586
SizeType rangeCount_
Definition: regex.h:589
int start
Definition: doxysearch.cpp:178
int end
Definition: doxysearch.cpp:179
static const SizeType kRegexInvalidRange
Definition: regex.h:76
template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::NewState ( SizeType  out,
SizeType  out1,
unsigned  codepoint 
)
inlineprivate

Definition at line 319 of file regex.h.

319  {
320  State* s = states_.template Push<State>();
321  s->out = out;
322  s->out1 = out1;
323  s->codepoint = codepoint;
324  s->rangeStart = kRegexInvalidRange;
325  return stateCount_++;
326  }
SizeType stateCount_
Definition: regex.h:588
static QCString * s
Definition: config.cpp:1042
static const SizeType kRegexInvalidRange
Definition: regex.h:76
Stack< Allocator > states_
Definition: regex.h:585
template<typename Encoding , typename Allocator = CrtAllocator>
template<typename InputStream >
void internal::GenericRegex< Encoding, Allocator >::Parse ( DecodedStream< InputStream, Encoding > &  ds)
inlineprivate

Definition at line 190 of file regex.h.

190  {
191  Allocator allocator;
192  Stack<Allocator> operandStack(&allocator, 256); // Frag
193  Stack<Allocator> operatorStack(&allocator, 256); // Operator
194  Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
195 
196  *atomCountStack.template Push<unsigned>() = 0;
197 
198  unsigned codepoint;
199  while (ds.Peek() != 0) {
200  switch (codepoint = ds.Take()) {
201  case '^':
202  anchorBegin_ = true;
203  break;
204 
205  case '$':
206  anchorEnd_ = true;
207  break;
208 
209  case '|':
210  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
211  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
212  return;
213  *operatorStack.template Push<Operator>() = kAlternation;
214  *atomCountStack.template Top<unsigned>() = 0;
215  break;
216 
217  case '(':
218  *operatorStack.template Push<Operator>() = kLeftParenthesis;
219  *atomCountStack.template Push<unsigned>() = 0;
220  break;
221 
222  case ')':
223  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
224  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
225  return;
226  if (operatorStack.Empty())
227  return;
228  operatorStack.template Pop<Operator>(1);
229  atomCountStack.template Pop<unsigned>(1);
230  ImplicitConcatenation(atomCountStack, operatorStack);
231  break;
232 
233  case '?':
234  if (!Eval(operandStack, kZeroOrOne))
235  return;
236  break;
237 
238  case '*':
239  if (!Eval(operandStack, kZeroOrMore))
240  return;
241  break;
242 
243  case '+':
244  if (!Eval(operandStack, kOneOrMore))
245  return;
246  break;
247 
248  case '{':
249  {
250  unsigned n, m;
251  if (!ParseUnsigned(ds, &n))
252  return;
253 
254  if (ds.Peek() == ',') {
255  ds.Take();
256  if (ds.Peek() == '}')
258  else if (!ParseUnsigned(ds, &m) || m < n)
259  return;
260  }
261  else
262  m = n;
263 
264  if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
265  return;
266  ds.Take();
267  }
268  break;
269 
270  case '.':
271  PushOperand(operandStack, kAnyCharacterClass);
272  ImplicitConcatenation(atomCountStack, operatorStack);
273  break;
274 
275  case '[':
276  {
277  SizeType range;
278  if (!ParseRange(ds, &range))
279  return;
281  GetState(s).rangeStart = range;
282  *operandStack.template Push<Frag>() = Frag(s, s, s);
283  }
284  ImplicitConcatenation(atomCountStack, operatorStack);
285  break;
286 
287  case '\\': // Escape character
288  if (!CharacterEscape(ds, &codepoint))
289  return; // Unsupported escape character
290  // fall through to default
291 
292  default: // Pattern character
293  PushOperand(operandStack, codepoint);
294  ImplicitConcatenation(atomCountStack, operatorStack);
295  }
296  }
297 
298  while (!operatorStack.Empty())
299  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
300  return;
301 
302  // Link the operand to matching state.
303  if (operandStack.GetSize() == sizeof(Frag)) {
304  Frag* e = operandStack.template Pop<Frag>(1);
306  root_ = e->start;
307 
308 #if RAPIDJSON_REGEX_VERBOSE
309  printf("root: %d\n", root_);
310  for (SizeType i = 0; i < stateCount_ ; i++) {
311  State& s = GetState(i);
312  printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
313  }
314  printf("\n");
315 #endif
316  }
317  }
SizeType stateCount_
Definition: regex.h:588
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:354
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static const unsigned kInfinityQuantifier
Definition: regex.h:591
const double e
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:319
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition: regex.h:328
std::void_t< T > n
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition: regex.h:479
State & GetState(SizeType index)
Definition: regex.h:169
static const unsigned kRangeCharacterClass
Definition: regex.h:146
static const unsigned kAnyCharacterClass
For &#39;.&#39;.
Definition: regex.h:145
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition: regex.h:408
void Patch(SizeType l, SizeType s)
Definition: regex.h:347
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition: regex.h:465
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:557
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition: regex.h:333
static QCString * s
Definition: config.cpp:1042
template<typename Encoding , typename Allocator = CrtAllocator>
template<typename InputStream >
bool internal::GenericRegex< Encoding, Allocator >::ParseRange ( DecodedStream< InputStream, Encoding > &  ds,
SizeType range 
)
inlineprivate

Definition at line 479 of file regex.h.

479  {
480  bool isBegin = true;
481  bool negate = false;
482  int step = 0;
485  unsigned codepoint;
486  while ((codepoint = ds.Take()) != 0) {
487  if (isBegin) {
488  isBegin = false;
489  if (codepoint == '^') {
490  negate = true;
491  continue;
492  }
493  }
494 
495  switch (codepoint) {
496  case ']':
497  if (start == kRegexInvalidRange)
498  return false; // Error: nothing inside []
499  if (step == 2) { // Add trailing '-'
500  SizeType r = NewRange('-');
502  GetRange(current).next = r;
503  }
504  if (negate)
506  *range = start;
507  return true;
508 
509  case '\\':
510  if (ds.Peek() == 'b') {
511  ds.Take();
512  codepoint = 0x0008; // Escape backspace character
513  }
514  else if (!CharacterEscape(ds, &codepoint))
515  return false;
516  // fall through to default
517 
518  default:
519  switch (step) {
520  case 1:
521  if (codepoint == '-') {
522  step++;
523  break;
524  }
525  // fall through to step 0 for other characters
526 
527  case 0:
528  {
529  SizeType r = NewRange(codepoint);
530  if (current != kRegexInvalidRange)
531  GetRange(current).next = r;
532  if (start == kRegexInvalidRange)
533  start = r;
534  current = r;
535  }
536  step = 1;
537  break;
538 
539  default:
540  RAPIDJSON_ASSERT(step == 2);
541  GetRange(current).end = codepoint;
542  step = 0;
543  }
544  }
545  }
546  return false;
547  }
SizeType NewRange(unsigned codepoint)
Definition: regex.h:549
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
static const unsigned kRangeNegationFlag
Definition: regex.h:147
static Entry * current
Range & GetRange(SizeType index)
Definition: regex.h:179
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:557
static const SizeType kRegexInvalidRange
Definition: regex.h:76
template<typename Encoding , typename Allocator = CrtAllocator>
template<typename InputStream >
bool internal::GenericRegex< Encoding, Allocator >::ParseUnsigned ( DecodedStream< InputStream, Encoding > &  ds,
unsigned *  u 
)
inlineprivate

Definition at line 465 of file regex.h.

465  {
466  unsigned r = 0;
467  if (ds.Peek() < '0' || ds.Peek() > '9')
468  return false;
469  while (ds.Peek() >= '0' && ds.Peek() <= '9') {
470  if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
471  return false; // overflow
472  r = r * 10 + (ds.Take() - '0');
473  }
474  *u = r;
475  return true;
476  }
template<typename Encoding , typename Allocator = CrtAllocator>
void internal::GenericRegex< Encoding, Allocator >::Patch ( SizeType  l,
SizeType  s 
)
inlineprivate

Definition at line 347 of file regex.h.

347  {
348  for (SizeType next; l != kRegexInvalidState; l = next) {
349  next = GetState(l).out;
350  GetState(l).out = s;
351  }
352  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static QStrList * l
Definition: config.cpp:1044
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:156
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:169
static QCString * s
Definition: config.cpp:1042
template<typename Encoding , typename Allocator = CrtAllocator>
void internal::GenericRegex< Encoding, Allocator >::PushOperand ( Stack< Allocator > &  operandStack,
unsigned  codepoint 
)
inlineprivate

Definition at line 328 of file regex.h.

328  {
330  *operandStack.template Push<Frag>() = Frag(s, s, s);
331  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:319
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
static QCString * s
Definition: config.cpp:1042

Friends And Related Function Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
template<typename , typename >
friend class GenericRegexSearch
friend

Definition at line 118 of file regex.h.

Member Data Documentation

template<typename Encoding , typename Allocator = CrtAllocator>
bool internal::GenericRegex< Encoding, Allocator >::anchorBegin_
private

Definition at line 594 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
bool internal::GenericRegex< Encoding, Allocator >::anchorEnd_
private

Definition at line 595 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
const unsigned internal::GenericRegex< Encoding, Allocator >::kAnyCharacterClass = 0xFFFFFFFF
staticprivate

For '.'.

Definition at line 145 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
const unsigned internal::GenericRegex< Encoding, Allocator >::kInfinityQuantifier = ~0u
staticprivate

Definition at line 591 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
const unsigned internal::GenericRegex< Encoding, Allocator >::kRangeCharacterClass = 0xFFFFFFFE
staticprivate

Definition at line 146 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
const unsigned internal::GenericRegex< Encoding, Allocator >::kRangeNegationFlag = 0x80000000
staticprivate

Definition at line 147 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::rangeCount_
private

Definition at line 589 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
Stack<Allocator> internal::GenericRegex< Encoding, Allocator >::ranges_
private

Definition at line 586 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::root_
private

Definition at line 587 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
SizeType internal::GenericRegex< Encoding, Allocator >::stateCount_
private

Definition at line 588 of file regex.h.

template<typename Encoding , typename Allocator = CrtAllocator>
Stack<Allocator> internal::GenericRegex< Encoding, Allocator >::states_
private

Definition at line 585 of file regex.h.


The documentation for this class was generated from the following file: