Public Types | Public Member Functions | Private Types | Private Member Functions | Private Attributes | List of all members
AutoUTFInputStream< CharType, InputByteStream > Class Template Reference

Input stream wrapper with dynamically bound encoding and automatic encoding detection. More...

#include <encodedstream.h>

Public Types

typedef CharType Ch
 

Public Member Functions

 AutoUTFInputStream (InputByteStream &is, UTFType type=kUTF8)
 Constructor. More...
 
UTFType GetType () const
 
bool HasBOM () const
 
Ch Peek () const
 
Ch Take ()
 
size_t Tell () const
 
void Put (Ch)
 
void Flush ()
 
ChPutBegin ()
 
size_t PutEnd (Ch *)
 

Private Types

typedef Ch(* TakeFunc) (InputByteStream &is)
 

Private Member Functions

 RAPIDJSON_STATIC_ASSERT (sizeof(typename InputByteStream::Ch)==1)
 
 AutoUTFInputStream (const AutoUTFInputStream &)
 
AutoUTFInputStreamoperator= (const AutoUTFInputStream &)
 
void DetectType ()
 

Private Attributes

InputByteStream * is_
 
UTFType type_
 
Ch current_
 
TakeFunc takeFunc_
 
bool hasBOM_
 

Detailed Description

template<typename CharType, typename InputByteStream>
class AutoUTFInputStream< CharType, InputByteStream >

Input stream wrapper with dynamically bound encoding and automatic encoding detection.

Template Parameters
CharTypeType of character for reading.
InputByteStreamtype of input byte stream to be wrapped.

Definition at line 135 of file encodedstream.h.

Member Typedef Documentation

template<typename CharType , typename InputByteStream >
typedef CharType AutoUTFInputStream< CharType, InputByteStream >::Ch

Definition at line 138 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
typedef Ch(* AutoUTFInputStream< CharType, InputByteStream >::TakeFunc) (InputByteStream &is)
private

Definition at line 219 of file encodedstream.h.

Constructor & Destructor Documentation

template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::AutoUTFInputStream ( InputByteStream &  is,
UTFType  type = kUTF8 
)
inline

Constructor.

Parameters
isinput stream to be wrapped.
typeUTF encoding type if it is not detected from the stream.

Definition at line 145 of file encodedstream.h.

145  : is_(&is), type_(type), hasBOM_(false) {
147  DetectType();
148  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
149  takeFunc_ = f[type_];
150  current_ = takeFunc_(*is_);
151  }
#define RAPIDJSON_ENCODINGS_FUNC(x)
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
UTF-32 big endian.
Definition: encodings.h:608
UTF-8.
Definition: encodings.h:604
InputByteStream * is_
Ch(* TakeFunc)(InputByteStream &is)
template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::AutoUTFInputStream ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private

Member Function Documentation

template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::DetectType ( )
inlineprivate

Definition at line 171 of file encodedstream.h.

171  {
172  // BOM (Byte Order Mark):
173  // 00 00 FE FF UTF-32BE
174  // FF FE 00 00 UTF-32LE
175  // FE FF UTF-16BE
176  // FF FE UTF-16LE
177  // EF BB BF UTF-8
178 
179  const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
180  if (!c)
181  return;
182 
183  unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
184  hasBOM_ = false;
185  if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
186  else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
187  else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
188  else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
189  else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
190 
191  // RFC 4627: Section 3
192  // "Since the first two characters of a JSON text will always be ASCII
193  // characters [RFC0020], it is possible to determine whether an octet
194  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
195  // at the pattern of nulls in the first four octets."
196  // 00 00 00 xx UTF-32BE
197  // 00 xx 00 xx UTF-16BE
198  // xx 00 00 00 UTF-32LE
199  // xx 00 xx 00 UTF-16LE
200  // xx xx xx xx UTF-8
201 
202  if (!hasBOM_) {
203  int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
204  switch (pattern) {
205  case 0x08: type_ = kUTF32BE; break;
206  case 0x0A: type_ = kUTF16BE; break;
207  case 0x01: type_ = kUTF32LE; break;
208  case 0x05: type_ = kUTF16LE; break;
209  case 0x0F: type_ = kUTF8; break;
210  default: break; // Use type defined by user.
211  }
212  }
213 
214  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
215  if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
216  if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
217  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
UTF-32 big endian.
Definition: encodings.h:608
UTF-16 little endian.
Definition: encodings.h:605
UTF-8.
Definition: encodings.h:604
UTF-16 big endian.
Definition: encodings.h:606
InputByteStream * is_
std::string pattern
Definition: regex_t.cc:33
UTF-32 little endian.
Definition: encodings.h:607
template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::Flush ( )
inline

Definition at line 162 of file encodedstream.h.

162 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
template<typename CharType , typename InputByteStream >
UTFType AutoUTFInputStream< CharType, InputByteStream >::GetType ( ) const
inline

Definition at line 153 of file encodedstream.h.

153 { return type_; }
template<typename CharType , typename InputByteStream >
bool AutoUTFInputStream< CharType, InputByteStream >::HasBOM ( ) const
inline

Definition at line 154 of file encodedstream.h.

154 { return hasBOM_; }
template<typename CharType , typename InputByteStream >
AutoUTFInputStream& AutoUTFInputStream< CharType, InputByteStream >::operator= ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private
template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::Peek ( ) const
inline

Definition at line 156 of file encodedstream.h.

156 { return current_; }
template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::Put ( Ch  )
inline

Definition at line 161 of file encodedstream.h.

161 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
template<typename CharType , typename InputByteStream >
Ch* AutoUTFInputStream< CharType, InputByteStream >::PutBegin ( )
inline

Definition at line 163 of file encodedstream.h.

163 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
template<typename CharType , typename InputByteStream >
size_t AutoUTFInputStream< CharType, InputByteStream >::PutEnd ( Ch )
inline

Definition at line 164 of file encodedstream.h.

164 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::RAPIDJSON_STATIC_ASSERT ( sizeof(typename InputByteStream::Ch)  = =1)
private
template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::Take ( )
inline

Definition at line 157 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
size_t AutoUTFInputStream< CharType, InputByteStream >::Tell ( ) const
inline

Definition at line 158 of file encodedstream.h.

158 { return is_->Tell(); }
InputByteStream * is_

Member Data Documentation

template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::current_
private

Definition at line 222 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
bool AutoUTFInputStream< CharType, InputByteStream >::hasBOM_
private

Definition at line 224 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
InputByteStream* AutoUTFInputStream< CharType, InputByteStream >::is_
private

Definition at line 220 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
TakeFunc AutoUTFInputStream< CharType, InputByteStream >::takeFunc_
private

Definition at line 223 of file encodedstream.h.

template<typename CharType , typename InputByteStream >
UTFType AutoUTFInputStream< CharType, InputByteStream >::type_
private

Definition at line 221 of file encodedstream.h.


The documentation for this class was generated from the following file: