15 #ifndef RAPIDJSON_ENCODINGS_H_ 16 #define RAPIDJSON_ENCODINGS_H_ 20 #if defined(_MSC_VER) && !defined(__clang__) 22 RAPIDJSON_DIAG_OFF(4244)
23 RAPIDJSON_DIAG_OFF(4702)
24 #elif defined(__GNUC__) 26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
47 template<typename OutputStream>
48 static void Encode(OutputStream& os, unsigned codepoint);
54 template <typename InputStream>
55 static bool Decode(InputStream& is, unsigned* codepoint);
62 template <typename InputStream, typename OutputStream>
63 static bool Validate(InputStream& is, OutputStream& os);
65 // The following functions are deal with byte streams.
68 template <typename InputByteStream>
69 static CharType TakeBOM(InputByteStream& is);
72 template <typename InputByteStream>
73 static Ch Take(InputByteStream& is);
76 template <typename OutputByteStream>
77 static void PutBOM(OutputByteStream& os);
80 template <typename OutputByteStream>
81 static void Put(OutputByteStream& os, Ch c);
95 template<
typename CharType =
char>
101 template<
typename OutputStream>
102 static void Encode(OutputStream& os,
unsigned codepoint) {
103 if (codepoint <= 0x7F)
104 os.Put(static_cast<Ch>(codepoint & 0xFF));
105 else if (codepoint <= 0x7FF) {
106 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
109 else if (codepoint <= 0xFFFF) {
110 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
116 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
123 template<
typename OutputStream>
125 if (codepoint <= 0x7F)
126 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
127 else if (codepoint <= 0x7FF) {
128 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
129 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
131 else if (codepoint <= 0xFFFF) {
132 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
133 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
134 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
138 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
139 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
140 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
141 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
145 template <
typename InputStream>
146 static bool Decode(InputStream& is,
unsigned* codepoint) {
147 #define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) 148 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 149 #define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) 150 typename InputStream::Ch
c = is.Take();
152 *codepoint =
static_cast<unsigned char>(
c);
156 unsigned char type =
GetRange(static_cast<unsigned char>(c));
160 *codepoint = (0xFFu >>
type) & static_cast<unsigned char>(c);
171 default:
return false;
173 #undef RAPIDJSON_COPY 174 #undef RAPIDJSON_TRANS 175 #undef RAPIDJSON_TAIL 178 template <
typename InputStream,
typename OutputStream>
179 static bool Validate(InputStream& is, OutputStream& os) {
180 #define RAPIDJSON_COPY() os.Put(c = is.Take()) 181 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 182 #define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) 189 switch (
GetRange(static_cast<unsigned char>(c))) {
197 default:
return false;
199 #undef RAPIDJSON_COPY 200 #undef RAPIDJSON_TRANS 201 #undef RAPIDJSON_TAIL 207 static const unsigned char type[] = {
208 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
213 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
214 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
215 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
216 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
217 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
222 template <
typename InputByteStream>
223 static CharType
TakeBOM(InputByteStream& is) {
225 typename InputByteStream::Ch
c =
Take(is);
226 if (static_cast<unsigned char>(c) != 0xEFu)
return c;
228 if (static_cast<unsigned char>(c) != 0xBBu)
return c;
230 if (static_cast<unsigned char>(c) != 0xBFu)
return c;
235 template <
typename InputByteStream>
236 static Ch
Take(InputByteStream& is) {
238 return static_cast<Ch
>(is.Take());
241 template <
typename OutputByteStream>
242 static void PutBOM(OutputByteStream& os) {
244 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
245 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
246 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
249 template <
typename OutputByteStream>
250 static void Put(OutputByteStream& os, Ch
c) {
252 os.Put(static_cast<typename OutputByteStream::Ch>(c));
268 template<
typename CharType =
wchar_t>
275 template<
typename OutputStream>
276 static void Encode(OutputStream& os,
unsigned codepoint) {
278 if (codepoint <= 0xFFFF) {
280 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
284 unsigned v = codepoint - 0x10000;
285 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
286 os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
291 template<
typename OutputStream>
294 if (codepoint <= 0xFFFF) {
296 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
300 unsigned v = codepoint - 0x10000;
301 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
302 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
306 template <
typename InputStream>
307 static bool Decode(InputStream& is,
unsigned* codepoint) {
309 typename InputStream::Ch
c = is.Take();
310 if (c < 0xD800 || c > 0xDFFF) {
311 *codepoint =
static_cast<unsigned>(
c);
314 else if (c <= 0xDBFF) {
315 *codepoint = (
static_cast<unsigned>(
c) & 0x3FF) << 10;
317 *codepoint |= (
static_cast<unsigned>(
c) & 0x3FF);
318 *codepoint += 0x10000;
319 return c >= 0xDC00 && c <= 0xDFFF;
324 template <
typename InputStream,
typename OutputStream>
325 static bool Validate(InputStream& is, OutputStream& os) {
328 typename InputStream::Ch
c;
329 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
330 if (c < 0xD800 || c > 0xDFFF)
332 else if (c <= 0xDBFF) {
333 os.Put(c = is.Take());
334 return c >= 0xDC00 && c <= 0xDFFF;
341 template<
typename CharType =
wchar_t>
343 template <
typename InputByteStream>
344 static CharType
TakeBOM(InputByteStream& is) {
346 CharType
c =
Take(is);
350 template <
typename InputByteStream>
351 static CharType
Take(InputByteStream& is) {
353 unsigned c =
static_cast<uint8_t>(is.Take());
354 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
355 return static_cast<CharType
>(
c);
358 template <
typename OutputByteStream>
359 static void PutBOM(OutputByteStream& os) {
361 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
362 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
365 template <
typename OutputByteStream>
366 static void Put(OutputByteStream& os, CharType
c) {
368 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
369 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
374 template<
typename CharType =
wchar_t>
376 template <
typename InputByteStream>
377 static CharType
TakeBOM(InputByteStream& is) {
379 CharType
c =
Take(is);
383 template <
typename InputByteStream>
384 static CharType
Take(InputByteStream& is) {
386 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
387 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
388 return static_cast<CharType
>(
c);
391 template <
typename OutputByteStream>
392 static void PutBOM(OutputByteStream& os) {
394 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
395 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
398 template <
typename OutputByteStream>
399 static void Put(OutputByteStream& os, CharType
c) {
401 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
402 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
417 template<
typename CharType =
unsigned>
424 template<
typename OutputStream>
425 static void Encode(OutputStream& os,
unsigned codepoint) {
431 template<
typename OutputStream>
438 template <
typename InputStream>
439 static bool Decode(InputStream& is,
unsigned* codepoint) {
443 return c <= 0x10FFFF;
446 template <
typename InputStream,
typename OutputStream>
447 static bool Validate(InputStream& is, OutputStream& os) {
450 os.Put(c = is.Take());
451 return c <= 0x10FFFF;
456 template<
typename CharType =
unsigned>
458 template <
typename InputByteStream>
459 static CharType
TakeBOM(InputByteStream& is) {
461 CharType
c =
Take(is);
465 template <
typename InputByteStream>
466 static CharType
Take(InputByteStream& is) {
468 unsigned c =
static_cast<uint8_t>(is.Take());
469 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
470 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
471 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
472 return static_cast<CharType
>(
c);
475 template <
typename OutputByteStream>
476 static void PutBOM(OutputByteStream& os) {
478 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
479 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
480 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
481 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
484 template <
typename OutputByteStream>
485 static void Put(OutputByteStream& os, CharType
c) {
487 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
488 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
489 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
490 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
495 template<
typename CharType =
unsigned>
497 template <
typename InputByteStream>
498 static CharType
TakeBOM(InputByteStream& is) {
500 CharType
c =
Take(is);
504 template <
typename InputByteStream>
505 static CharType
Take(InputByteStream& is) {
507 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
508 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
509 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
510 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
511 return static_cast<CharType
>(
c);
514 template <
typename OutputByteStream>
515 static void PutBOM(OutputByteStream& os) {
517 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
518 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
519 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
520 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
523 template <
typename OutputByteStream>
524 static void Put(OutputByteStream& os, CharType
c) {
526 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
527 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
528 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
529 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
541 template<
typename CharType =
char>
547 template<
typename OutputStream>
548 static void Encode(OutputStream& os,
unsigned codepoint) {
550 os.Put(static_cast<Ch>(codepoint & 0xFF));
553 template<
typename OutputStream>
556 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
559 template <
typename InputStream>
560 static bool Decode(InputStream& is,
unsigned* codepoint) {
566 template <
typename InputStream,
typename OutputStream>
567 static bool Validate(InputStream& is, OutputStream& os) {
569 os.Put(static_cast<typename OutputStream::Ch>(c));
573 template <
typename InputByteStream>
574 static CharType
TakeBOM(InputByteStream& is) {
577 return static_cast<Ch
>(
c);
580 template <
typename InputByteStream>
581 static Ch
Take(InputByteStream& is) {
583 return static_cast<Ch
>(is.Take());
586 template <
typename OutputByteStream>
587 static void PutBOM(OutputByteStream& os) {
592 template <
typename OutputByteStream>
593 static void Put(OutputByteStream& os, Ch
c) {
595 os.Put(static_cast<typename OutputByteStream::Ch>(c));
614 template<
typename CharType>
620 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 622 template<
typename OutputStream>
623 static RAPIDJSON_FORCEINLINE
void Encode(OutputStream& os,
unsigned codepoint) {
624 typedef void (*EncodeFunc)(OutputStream&, unsigned);
626 (*f[os.GetType()])(os, codepoint);
629 template<
typename OutputStream>
630 static RAPIDJSON_FORCEINLINE
void EncodeUnsafe(OutputStream& os,
unsigned codepoint) {
631 typedef void (*EncodeFunc)(OutputStream&, unsigned);
633 (*f[os.GetType()])(os, codepoint);
636 template <
typename InputStream>
637 static RAPIDJSON_FORCEINLINE
bool Decode(InputStream& is,
unsigned* codepoint) {
638 typedef bool (*DecodeFunc)(InputStream&,
unsigned*);
640 return (*f[is.GetType()])(is, codepoint);
643 template <
typename InputStream,
typename OutputStream>
644 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os) {
645 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
647 return (*f[is.GetType()])(is, os);
650 #undef RAPIDJSON_ENCODINGS_FUNC 657 template<
typename SourceEncoding,
typename TargetEncoding>
660 template<
typename InputStream,
typename OutputStream>
661 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream& is, OutputStream& os) {
663 if (!SourceEncoding::Decode(is, &codepoint))
665 TargetEncoding::Encode(os, codepoint);
669 template<
typename InputStream,
typename OutputStream>
670 static RAPIDJSON_FORCEINLINE
bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
672 if (!SourceEncoding::Decode(is, &codepoint))
674 TargetEncoding::EncodeUnsafe(os, codepoint);
679 template<
typename InputStream,
typename OutputStream>
680 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os) {
681 return Transcode(is, os);
686 template<
typename Stream>
690 template<
typename Encoding>
692 template<
typename InputStream,
typename OutputStream>
693 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream& is, OutputStream& os) {
698 template<
typename InputStream,
typename OutputStream>
699 static RAPIDJSON_FORCEINLINE
bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
704 template<
typename InputStream,
typename OutputStream>
705 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os) {
706 return Encoding::Validate(is, os);
712 #if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__)) 716 #endif // RAPIDJSON_ENCODINGS_H_ static bool Decode(InputStream &is, unsigned *codepoint)
static void Put(OutputByteStream &os, Ch c)
static CharType TakeBOM(InputByteStream &is)
UTFType
Runtime-specified UTF encoding type of a stream.
#define RAPIDJSON_ASSERT(x)
Assertion.
static void Encode(OutputStream &os, unsigned codepoint)
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
static CharType TakeBOM(InputByteStream &is)
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Validate(InputStream &is, OutputStream &os)
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, Ch c)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_ENCODINGS_FUNC(x)
static void Put(OutputByteStream &os, CharType c)
static Ch Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static CharType TakeBOM(InputByteStream &is)
static CharType Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static bool Decode(InputStream &is, unsigned *codepoint)
static CharType Take(InputByteStream &is)
static Ch Take(InputByteStream &is)
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
static void Put(OutputByteStream &os, CharType c)
static unsigned char GetRange(unsigned char c)
static void PutBOM(OutputByteStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
UTF-16 big endian encoding.
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
UTF-32 big endian encoding.
static void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
common definitions and configuration
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
static void PutBOM(OutputByteStream &os)
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static void PutBOM(OutputByteStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_TRANS(mask)
UTF-16 little endian encoding.
static bool Validate(InputStream &is, OutputStream &os)
static CharType TakeBOM(InputByteStream &is)
UTF-32 little endian enocoding.
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)