Public Types | Static Public Member Functions | List of all members
UTF8< CharType > Struct Template Reference

UTF-8 encoding. More...

#include <encodings.h>

Public Types

enum  { supportUnicode = 1 }
 
typedef CharType Ch
 

Static Public Member Functions

template<typename OutputStream >
static void Encode (OutputStream &os, unsigned codepoint)
 
template<typename OutputStream >
static void EncodeUnsafe (OutputStream &os, unsigned codepoint)
 
template<typename InputStream >
static bool Decode (InputStream &is, unsigned *codepoint)
 
template<typename InputStream , typename OutputStream >
static bool Validate (InputStream &is, OutputStream &os)
 
static unsigned char GetRange (unsigned char c)
 
template<typename InputByteStream >
static CharType TakeBOM (InputByteStream &is)
 
template<typename InputByteStream >
static Ch Take (InputByteStream &is)
 
template<typename OutputByteStream >
static void PutBOM (OutputByteStream &os)
 
template<typename OutputByteStream >
static void Put (OutputByteStream &os, Ch c)
 

Detailed Description

template<typename CharType = char>
struct UTF8< CharType >

UTF-8 encoding.

http://en.wikipedia.org/wiki/UTF-8 http://tools.ietf.org/html/rfc3629

Template Parameters
CharTypeCode unit for storing 8-bit UTF-8 data. Default is char.
Note
implements Encoding concept

Definition at line 96 of file encodings.h.

Member Typedef Documentation

template<typename CharType = char>
typedef CharType UTF8< CharType >::Ch

Definition at line 97 of file encodings.h.

Member Enumeration Documentation

template<typename CharType = char>
anonymous enum
Enumerator
supportUnicode 

Definition at line 99 of file encodings.h.

Member Function Documentation

template<typename CharType = char>
template<typename InputStream >
static bool UTF8< CharType >::Decode ( InputStream &  is,
unsigned *  codepoint 
)
inlinestatic

Definition at line 146 of file encodings.h.

146  {
147 #define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
148 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
149 #define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
150  typename InputStream::Ch c = is.Take();
151  if (!(c & 0x80)) {
152  *codepoint = static_cast<unsigned char>(c);
153  return true;
154  }
155 
156  unsigned char type = GetRange(static_cast<unsigned char>(c));
157  if (type >= 32) {
158  *codepoint = 0;
159  } else {
160  *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
161  }
162  bool result = true;
163  switch (type) {
164  case 2: RAPIDJSON_TAIL(); return result;
165  case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
166  case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
167  case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
168  case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
169  case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
170  case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
171  default: return false;
172  }
173 #undef RAPIDJSON_COPY
174 #undef RAPIDJSON_TRANS
175 #undef RAPIDJSON_TAIL
176  }
static QCString result
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:204
#define RAPIDJSON_TAIL()
#define RAPIDJSON_COPY()
static QCString type
Definition: declinfo.cpp:672
#define RAPIDJSON_TRANS(mask)
template<typename CharType = char>
template<typename OutputStream >
static void UTF8< CharType >::Encode ( OutputStream &  os,
unsigned  codepoint 
)
inlinestatic

Definition at line 102 of file encodings.h.

102  {
103  if (codepoint <= 0x7F)
104  os.Put(static_cast<Ch>(codepoint & 0xFF));
105  else if (codepoint <= 0x7FF) {
106  os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107  os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
108  }
109  else if (codepoint <= 0xFFFF) {
110  os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
113  }
114  else {
115  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
116  os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120  }
121  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
template<typename CharType = char>
template<typename OutputStream >
static void UTF8< CharType >::EncodeUnsafe ( OutputStream &  os,
unsigned  codepoint 
)
inlinestatic

Definition at line 124 of file encodings.h.

124  {
125  if (codepoint <= 0x7F)
126  PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
127  else if (codepoint <= 0x7FF) {
128  PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
129  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
130  }
131  else if (codepoint <= 0xFFFF) {
132  PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
133  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
134  PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
135  }
136  else {
137  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
138  PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
139  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
140  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
141  PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
142  }
143  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
Definition: stream.h:91
template<typename CharType = char>
static unsigned char UTF8< CharType >::GetRange ( unsigned char  c)
inlinestatic

Definition at line 204 of file encodings.h.

204  {
205  // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
206  // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
207  static const unsigned char type[] = {
208  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
211  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212  0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
213  0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
214  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
215  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
216  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
217  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
218  };
219  return type[c];
220  }
template<typename CharType = char>
template<typename OutputByteStream >
static void UTF8< CharType >::Put ( OutputByteStream &  os,
Ch  c 
)
inlinestatic

Definition at line 250 of file encodings.h.

250  {
251  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
252  os.Put(static_cast<typename OutputByteStream::Ch>(c));
253  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:445
template<typename CharType = char>
template<typename OutputByteStream >
static void UTF8< CharType >::PutBOM ( OutputByteStream &  os)
inlinestatic

Definition at line 242 of file encodings.h.

242  {
243  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
244  os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
245  os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
246  os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
247  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:445
template<typename CharType = char>
template<typename InputByteStream >
static Ch UTF8< CharType >::Take ( InputByteStream &  is)
inlinestatic

Definition at line 236 of file encodings.h.

236  {
237  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
238  return static_cast<Ch>(is.Take());
239  }
CharType Ch
Definition: encodings.h:97
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:445
template<typename CharType = char>
template<typename InputByteStream >
static CharType UTF8< CharType >::TakeBOM ( InputByteStream &  is)
inlinestatic

Definition at line 223 of file encodings.h.

223  {
224  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
225  typename InputByteStream::Ch c = Take(is);
226  if (static_cast<unsigned char>(c) != 0xEFu) return c;
227  c = is.Take();
228  if (static_cast<unsigned char>(c) != 0xBBu) return c;
229  c = is.Take();
230  if (static_cast<unsigned char>(c) != 0xBFu) return c;
231  c = is.Take();
232  return c;
233  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:445
static Ch Take(InputByteStream &is)
Definition: encodings.h:236
template<typename CharType = char>
template<typename InputStream , typename OutputStream >
static bool UTF8< CharType >::Validate ( InputStream &  is,
OutputStream &  os 
)
inlinestatic

Definition at line 179 of file encodings.h.

179  {
180 #define RAPIDJSON_COPY() os.Put(c = is.Take())
181 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
182 #define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
183  Ch c;
184  RAPIDJSON_COPY();
185  if (!(c & 0x80))
186  return true;
187 
188  bool result = true;
189  switch (GetRange(static_cast<unsigned char>(c))) {
190  case 2: RAPIDJSON_TAIL(); return result;
191  case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
192  case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
193  case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
194  case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
195  case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
196  case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
197  default: return false;
198  }
199 #undef RAPIDJSON_COPY
200 #undef RAPIDJSON_TRANS
201 #undef RAPIDJSON_TAIL
202  }
static QCString result
CharType Ch
Definition: encodings.h:97
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:204
#define RAPIDJSON_TAIL()
#define RAPIDJSON_COPY()
#define RAPIDJSON_TRANS(mask)

The documentation for this struct was generated from the following file: