All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Public Types | Static Public Member Functions | List of all members
rapidjson::UTF8< CharType > Struct Template Reference

UTF-8 encoding. More...

#include <encodings.h>

Public Types

enum  { supportUnicode = 1 }
 
typedef CharType Ch
 

Static Public Member Functions

template<typename OutputStream >
static void Encode (OutputStream &os, unsigned codepoint)
 
template<typename InputStream >
static bool Decode (InputStream &is, unsigned *codepoint)
 
template<typename InputStream , typename OutputStream >
static bool Validate (InputStream &is, OutputStream &os)
 
static unsigned char GetRange (unsigned char c)
 
template<typename InputByteStream >
static CharType TakeBOM (InputByteStream &is)
 
template<typename InputByteStream >
static Ch Take (InputByteStream &is)
 
template<typename OutputByteStream >
static void PutBOM (OutputByteStream &os)
 
template<typename OutputByteStream >
static void Put (OutputByteStream &os, Ch c)
 

Detailed Description

template<typename CharType = char>
struct rapidjson::UTF8< CharType >

UTF-8 encoding.

http://en.wikipedia.org/wiki/UTF-8 http://tools.ietf.org/html/rfc3629

Template Parameters
CharTypeCode unit for storing 8-bit UTF-8 data. Default is char.
Note
implements Encoding concept

Definition at line 101 of file encodings.h.

Member Typedef Documentation

template<typename CharType = char>
typedef CharType rapidjson::UTF8< CharType >::Ch

Definition at line 102 of file encodings.h.

Member Enumeration Documentation

template<typename CharType = char>
anonymous enum
Enumerator
supportUnicode 

Definition at line 104 of file encodings.h.

Member Function Documentation

template<typename CharType = char>
template<typename InputStream >
static bool rapidjson::UTF8< CharType >::Decode ( InputStream &  is,
unsigned *  codepoint 
)
inlinestatic

Definition at line 129 of file encodings.h.

129  {
130 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
131 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
132 #define TAIL() COPY(); TRANS(0x70)
133  Ch c = is.Take();
134  if (!(c & 0x80)) {
135  *codepoint = (unsigned char)c;
136  return true;
137  }
138 
139  unsigned char type = GetRange((unsigned char)c);
140  *codepoint = (0xFF >> type) & (unsigned char)c;
141  bool result = true;
142  switch (type) {
143  case 2: TAIL(); return result;
144  case 3: TAIL(); TAIL(); return result;
145  case 4: COPY(); TRANS(0x50); TAIL(); return result;
146  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
147  case 6: TAIL(); TAIL(); TAIL(); return result;
148  case 10: COPY(); TRANS(0x20); TAIL(); return result;
149  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
150  default: return false;
151  }
152 #undef COPY
153 #undef TRANS
154 #undef TAIL
155  }
#define COPY()
#define TAIL()
#define TRANS(mask)
CharType Ch
Definition: encodings.h:102
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:183
template<typename CharType = char>
template<typename OutputStream >
static void rapidjson::UTF8< CharType >::Encode ( OutputStream &  os,
unsigned  codepoint 
)
inlinestatic

Definition at line 107 of file encodings.h.

107  {
108  if (codepoint <= 0x7F)
109  os.Put(static_cast<Ch>(codepoint & 0xFF));
110  else if (codepoint <= 0x7FF) {
111  os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
112  os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
113  }
114  else if (codepoint <= 0xFFFF) {
115  os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
116  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
117  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
118  }
119  else {
120  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
121  os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
122  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
123  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
124  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
125  }
126  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:269
template<typename CharType = char>
static unsigned char rapidjson::UTF8< CharType >::GetRange ( unsigned char  c)
inlinestatic

Definition at line 183 of file encodings.h.

183  {
184  // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
185  // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
186  static const unsigned char type[] = {
187  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
189  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191  0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
192  0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
193  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
194  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
195  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
196  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
197  };
198  return type[c];
199  }
template<typename CharType = char>
template<typename OutputByteStream >
static void rapidjson::UTF8< CharType >::Put ( OutputByteStream &  os,
Ch  c 
)
inlinestatic

Definition at line 227 of file encodings.h.

227  {
228  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
229  os.Put(static_cast<typename OutputByteStream::Ch>(c));
230  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:301
template<typename CharType = char>
template<typename OutputByteStream >
static void rapidjson::UTF8< CharType >::PutBOM ( OutputByteStream &  os)
inlinestatic

Definition at line 221 of file encodings.h.

221  {
222  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
223  os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
224  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:301
template<typename CharType = char>
template<typename InputByteStream >
static Ch rapidjson::UTF8< CharType >::Take ( InputByteStream &  is)
inlinestatic

Definition at line 215 of file encodings.h.

215  {
216  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
217  return is.Take();
218  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:301
template<typename CharType = char>
template<typename InputByteStream >
static CharType rapidjson::UTF8< CharType >::TakeBOM ( InputByteStream &  is)
inlinestatic

Definition at line 202 of file encodings.h.

202  {
203  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
204  Ch c = Take(is);
205  if ((unsigned char)c != 0xEFu) return c;
206  c = is.Take();
207  if ((unsigned char)c != 0xBBu) return c;
208  c = is.Take();
209  if ((unsigned char)c != 0xBFu) return c;
210  c = is.Take();
211  return c;
212  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:301
static Ch Take(InputByteStream &is)
Definition: encodings.h:215
CharType Ch
Definition: encodings.h:102
template<typename CharType = char>
template<typename InputStream , typename OutputStream >
static bool rapidjson::UTF8< CharType >::Validate ( InputStream &  is,
OutputStream &  os 
)
inlinestatic

Definition at line 158 of file encodings.h.

158  {
159 #define COPY() os.Put(c = is.Take())
160 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
161 #define TAIL() COPY(); TRANS(0x70)
162  Ch c;
163  COPY();
164  if (!(c & 0x80))
165  return true;
166 
167  bool result = true;
168  switch (GetRange((unsigned char)c)) {
169  case 2: TAIL(); return result;
170  case 3: TAIL(); TAIL(); return result;
171  case 4: COPY(); TRANS(0x50); TAIL(); return result;
172  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
173  case 6: TAIL(); TAIL(); TAIL(); return result;
174  case 10: COPY(); TRANS(0x20); TAIL(); return result;
175  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
176  default: return false;
177  }
178 #undef COPY
179 #undef TRANS
180 #undef TAIL
181  }
#define COPY()
#define TAIL()
#define TRANS(mask)
CharType Ch
Definition: encodings.h:102
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:183

The documentation for this struct was generated from the following file: