qtextcodec.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 **
4 ** Implementation of QTextCodec class
5 **
6 ** Created : 981015
7 **
8 ** Copyright (C)1998-2000 Trolltech AS. All rights reserved.
9 **
10 ** This file is part of the tools module of the Qt GUI Toolkit.
11 **
12 ** This file may be distributed under the terms of the Q Public License
13 ** as defined by Trolltech AS of Norway and appearing in the file
14 ** LICENSE.QPL included in the packaging of this file.
15 **
16 ** This file may be distributed and/or modified under the terms of the
17 ** GNU General Public License version 2 as published by the Free Software
18 ** Foundation and appearing in the file LICENSE.GPL included in the
19 ** packaging of this file.
20 **
21 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22 ** licenses may use this file in accordance with the Qt Commercial License
23 ** Agreement provided with the Software.
24 **
25 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27 **
28 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29 ** information about Qt Commercial License Agreements.
30 ** See http://www.trolltech.com/qpl/ for QPL licensing information.
31 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
32 **
33 ** Contact info@trolltech.com if any conditions of this licensing are
34 ** not clear to you.
35 **
36 **********************************************************************/
37 
38 #include "qtextcodec.h"
39 #ifndef QT_NO_TEXTCODEC
40 
41 #include "qinternallist.h"
42 #ifndef QT_NO_CODECS
43 #include "qutfcodec.h"
44 //#include "qgbkcodec.h"
45 //#include "qeucjpcodec.h"
46 //#include "qjiscodec.h"
47 //#include "qsjiscodec.h"
48 //#include "qeuckrcodec.h"
49 //#include "qbig5codec.h"
50 //#include "qrtlcodec.h"
51 //#include "qtsciicodec.h"
52 #endif
53 
54 #include "qfile.h"
55 #include "qstrlist.h"
56 #include "qstring.h"
57 
58 #include <stdlib.h>
59 #include <ctype.h>
60 #include <locale.h>
61 
62 
64 static bool destroying_is_ok; // starts out as 0
65 
66 /*! Deletes all the created codecs.
67 
68  \warning Do not call this function.
69 
70  QApplication calls this just before exiting, to delete any
71  QTextCodec objects that may be lying around. Since various other
72  classes hold pointers to QTextCodec objects, it is not safe to call
73  this function earlier.
74 
75  If you are using the utility classes (like QString) but not using
76  QApplication, calling this function at the very end of your
77  application can be helpful to chasing down memory leaks, as
78  QTextCodec objects will not show up.
79 */
80 
82 {
83  if ( !all )
84  return;
85 
88  all = 0;
89  ball->clear();
90  delete ball;
92 }
93 
94 
95 static void setupBuiltinCodecs();
96 
97 
98 static void realSetup()
99 {
100 #if defined(CHECK_STATE)
101  if ( destroying_is_ok )
102  qWarning( "creating new codec during codec cleanup" );
103 #endif
104  all = new QInternalList<QTextCodec>;
105  all->setAutoDelete( TRUE );
107 }
108 
109 
110 static inline void setup()
111 {
112  if ( !all )
113  realSetup();
114 }
115 
116 
119 public:
121  QCString fromUnicode(const QString& uc, int& lenInOut);
122 };
123 
124 
127 public:
129  QString toUnicode(const char* chars, int len);
130 };
131 
133  codec(c)
134 {
135 }
136 
137 
139 {
140  return codec->fromUnicode(uc,lenInOut);
141 }
142 
143 
145  codec(c)
146 {
147 }
148 
149 
150 QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
151 {
152  return codec->toUnicode(chars,len);
153 }
154 
155 
156 
157 // NOT REVISED
158 /*!
159  \class QTextCodec qtextcodec.h
160  \brief Provides conversion between text encodings.
161 
162  By making objects of subclasses of QTextCodec, support for
163  new text encodings can be added to Qt.
164 
165  The abstract virtual functions describe the encoder to the
166  system and the coder is used as required in the different
167  text file formats supported QTextStream and, under X11 for the
168  locale-specific character input and output (under Windows NT
169  codecs are not needed for GUI I/O since the system works
170  with Unicode already, and Windows 95/98 has built-in convertors
171  for the 8-bit local encoding).
172 
173  More recently created QTextCodec objects take precedence
174  over earlier ones.
175 
176  To add support for another 8-bit encoding to Qt, make a subclass
177  or QTextCodec and implement at least the following methods:
178  <dl>
179  <dt>\c const char* name() const
180  <dd>Return the official name for the encoding.
181  <dt>\c int mibEnum() const
182  <dd>Return the MIB enum for the encoding if it is listed in the
183  <a href=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>
184  IANA character-sets encoding file</a>.
185  </dl>
186  If the encoding is multi-byte then it will have "state"; that is,
187  the interpretation of some bytes will be dependent on some preceding
188  bytes. For such an encoding, you will need to implement
189  <dl>
190  <dt> \c QTextDecoder* makeDecoder() const
191  <dd>Return a QTextDecoder that remembers incomplete multibyte
192  sequence prefixes or other required state.
193  </dl>
194  If the encoding does \e not require state, you should implement:
195  <dl>
196  <dt> \c QString toUnicode(const char* chars, int len) const
197  <dd>Converts \e len characters from \e chars to Unicode.
198  </dl>
199  The base QTextCodec class has default implementations of the above
200  two functions, <i>but they are mutually recursive</i>, so you must
201  re-implement at least one of them, or both for improved efficiency.
202 
203  For conversion from Unicode to 8-bit encodings, it is rarely necessary
204  to maintain state. However, two functions similar to the two above
205  are used for encoding:
206  <dl>
207  <dt> \c QTextEncoder* makeEncoder() const
208  <dd>Return a QTextDecoder.
209  <dt> \c QCString fromUnicode(const QString& uc, int& lenInOut ) const;
210  <dd>Converts \e lenInOut characters (of type QChar) from the start
211  of the string \a uc, returning a QCString result, and also returning
212  the \link QCString::length() length\endlink
213  of the result in lenInOut.
214  </dl>
215  Again, these are mutually recursive so only one needs to be implemented,
216  or both if better efficiency is possible.
217 
218  Finally, you must implement:
219  <dl>
220  <dt> \c int heuristicContentMatch(const char* chars, int len) const
221  <dd>Gives a value indicating how likely it is that \e len characters
222  from \e chars are in the encoding.
223  </dl>
224  A good model for this function is the
225  QWindowsLocalCodec::heuristicContentMatch function found in the Qt sources.
226 
227  A QTextCodec subclass might have improved performance if you also
228  re-implement:
229  <dl>
230  <dt> \c bool canEncode( QChar ) const
231  <dd>Test if a Unicode character can be encoded.
232  <dt> \c bool canEncode( const QString& ) const
233  <dd>Test if a string of Unicode characters can be encoded.
234  <dt> \c int heuristicNameMatch(const char* hint) const
235  <dd>Test if a possibly non-standard name is referring to the codec.
236  </dl>
237 */
238 
239 
240 /*!
241  Constructs a QTextCodec, making it of highest precedence.
242  The QTextCodec should always be constructed on the heap
243  (with new), and once constructed it becomes the responsibility
244  of Qt to delete it (which is done at QApplication destruction).
245 */
247 {
248  setup();
249  all->insert(0,this);
250 }
251 
252 
253 /*!
254  Destructs the QTextCodec. Note that you should not delete
255  codecs yourself - once created they become the responsibility
256  of Qt to delete.
257 */
259 {
260  if ( !destroying_is_ok )
261  qWarning("QTextCodec::~QTextCodec() called by application");
262  if ( all )
263  all->remove( this );
264 }
265 
266 
267 /*!
268  Returns a value indicating how likely this decoder is
269  for decoding some format that has the given name.
270 
271  A good match returns a positive number around
272  the length of the string. A bad match is negative.
273 
274  The default implementation calls simpleHeuristicNameMatch()
275  with the name of the codec.
276 */
277 int QTextCodec::heuristicNameMatch(const char* hint) const
278 {
279  return simpleHeuristicNameMatch(name(),hint);
280 }
281 
282 
283 // returns a string cotnaining the letters and numbers from input,
284 // with a space separating run of a character class. e.g. "iso8859-1"
285 // becomes "iso 8859 1"
286 static QString lettersAndNumbers( const char * input )
287 {
288  QString result;
289  QChar c;
290 
291  while( input && *input ) {
292  c = *input;
293  if ( c.isLetter() || c.isNumber() )
294  result += c.lower();
295  if ( input[1] ) {
296  // add space at character class transition, except
297  // transition from upper-case to lower-case letter
298  QChar n( input[1] );
299  if ( c.isLetter() && n.isLetter() ) {
300  if ( c == c.lower() && n == n.upper() )
301  result += ' ';
302  } else if ( c.category() != n.category() ) {
303  result += ' ';
304  }
305  }
306  input++;
307  }
308  return result.simplifyWhiteSpace();
309 }
310 
311 /*!
312  A simple utility function for heuristicNameMatch() - it
313  does some very minor character-skipping
314  so that almost-exact matches score high.
315 */
316 int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
317 {
318  // if they're the same, return a perfect score.
319  if ( name && hint && qstrcmp( name, hint ) == 0 )
320  return qstrlen( hint );
321 
322  // if the letters and numbers are the same, we have an "almost"
323  // perfect match.
324  QString h( lettersAndNumbers( hint ) );
325  QString n( lettersAndNumbers( name ) );
326  if ( h == n )
327  return qstrlen( hint )-1;
328 
329  if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
330  return qstrlen( hint )-2;
331 
332  // could do some more here, but I don't think it's worth it
333 
334  return 0;
335 }
336 
337 
338 /*!
339  Returns the QTextCodec \a i places from the more recently
340  inserted, or NULL if there is no such QTextCodec. Thus,
341  codecForIndex(0) returns the most recently created QTextCodec.
342 */
344 {
345  setup();
346  return (uint)i >= all->count() ? 0 : all->at(i);
347 }
348 
349 
350 /*!
351  Returns the QTextCodec which matches the
352  \link QTextCodec::mibEnum() MIBenum\endlink \a mib.
353 */
355 {
356  setup();
359  for ( ; (result=i); ++i ) {
360  if ( result->mibEnum()==mib )
361  break;
362  }
363  return result;
364 }
365 
366 
367 
368 
369 
370 #ifdef _OS_WIN32_
371 class QWindowsLocalCodec: public QTextCodec
372 {
373 public:
374  QWindowsLocalCodec();
375  ~QWindowsLocalCodec();
376 
377  QString toUnicode(const char* chars, int len) const;
378  QCString fromUnicode(const QString& uc, int& lenInOut ) const;
379 
380  const char* name() const;
381  int mibEnum() const;
382 
383  int heuristicContentMatch(const char* chars, int len) const;
384 };
385 
386 QWindowsLocalCodec::QWindowsLocalCodec()
387 {
388 }
389 
390 QWindowsLocalCodec::~QWindowsLocalCodec()
391 {
392 }
393 
394 
395 QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
396 {
397  if ( len == 1 && chars ) { // Optimization; avoids allocation
398  char c[2];
399  c[0] = *chars;
400  c[1] = 0;
401  return qt_winMB2QString( c, 2 );
402  }
403  if ( len < 0 )
404  return qt_winMB2QString( chars );
405  QCString s(chars,len+1);
406  return qt_winMB2QString(s);
407 }
408 
409 QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
410 {
411  QCString r = qt_winQString2MB( uc, lenInOut );
412  lenInOut = r.length();
413  return r;
414 }
415 
416 
417 const char* QWindowsLocalCodec::name() const
418 {
419  return "System";
420 }
421 
422 int QWindowsLocalCodec::mibEnum() const
423 {
424  return 0;
425 }
426 
427 
428 int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
429 {
430  // ### Not a bad default implementation?
431  QString t = toUnicode(chars,len);
432  int l = t.length();
433  QCString mb = fromUnicode(t,l);
434  int i=0;
435  while ( i < len )
436  if ( chars[i] == mb[i] )
437  i++;
438  return i;
439 }
440 
441 #else
442 
443 /* locale names mostly copied from XFree86 */
444 static const char * const iso8859_2locales[] = {
445  "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
446  "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
447  "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
448  "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
449 
450 static const char * const iso8859_3locales[] = {
451  "eo", 0 };
452 
453 static const char * const iso8859_4locales[] = {
454  "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 };
455 
456 static const char * const iso8859_5locales[] = {
457  "bg", "bg_BG", "bulgarian", "mk", "mk_MK",
458  "sp", "sp_YU", 0 };
459 
460 static const char * const iso8859_6locales[] = {
461  "ar_AA", "ar_SA", "arabic", 0 };
462 
463 static const char * const iso8859_7locales[] = {
464  "el", "el_GR", "greek", 0 };
465 
466 static const char * const iso8859_8locales[] = {
467  "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
468 
469 static const char * const iso8859_9locales[] = {
470  "tr", "tr_TR", "turkish", 0 };
471 
472 static const char * const iso8859_15locales[] = {
473  "fr", "fi", "french", "finnish", "et", "et_EE", 0 };
474 
475 static const char * const koi8_ulocales[] = {
476  "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
477 
478 static const char * const tis_620locales[] = {
479  "th", "th_TH", "thai", 0 };
480 
481 
482 static bool try_locale_list( const char * const locale[], const char * lang )
483 {
484  int i;
485  for( i=0; locale[i] && qstrcmp(locale[i], lang); i++ )
486  { }
487  return locale[i] != 0;
488 }
489 
490 // For the probably_koi8_locales we have to look. the standard says
491 // these are 8859-5, but almsot all Russion users uses KOI8-R and
492 // incorrectly set $LANG to ru_RU. We'll check tolower() to see what
493 // tolower() thinks ru_RU means.
494 
495 // If you read the history, it seems that many Russians blame ISO and
496 // Peristroika for the confusion.
497 //
498 // The real bug is that some programs break if the user specifies
499 // ru_RU.KOI8-R.
500 
501 static const char * const probably_koi8_rlocales[] = {
502  "ru", "ru_SU", "ru_RU", "russian", 0 };
503 
504 // this means ANY of these locale aliases. if they're aliases for
505 // different locales, the code breaks.
506 static QTextCodec * ru_RU_codec = 0;
507 
508 static QTextCodec * ru_RU_hack( const char * i ) {
509  if ( ! ru_RU_codec ) {
510  QCString origlocale = setlocale( LC_CTYPE, i );
511  // unicode koi8r latin5 name
512  // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
513  // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
514  int latin5 = tolower( 0xCE );
515  int koi8r = tolower( 0xE0 );
516  if ( koi8r == 0xC0 && latin5 != 0xEE ) {
517  ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
518  } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
519  ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
520  } else {
521  // something else again... let's assume... *throws dice*
522  ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
523  qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
524  koi8r, latin5, i );
525  }
526  setlocale( LC_CTYPE, origlocale.data() );
527  }
528  return ru_RU_codec;
529 }
530 
531 #endif
532 
534 
536 {
537  localeMapper = codec;
538 }
539 
540 /*! Returns a pointer to the codec most suitable for this locale. */
541 
543 {
544  if ( localeMapper )
545  return localeMapper;
546 
547  setup();
548 
549 #ifdef _OS_WIN32_
550  localeMapper = new QWindowsLocalCodec;
551 #else
552  // Very poorly defined and followed standards causes lots of code
553  // to try to get all the cases...
554 
555  char * lang = qstrdup( getenv("LANG") );
556 
557  char * p = lang ? strchr( lang, '.' ) : 0;
558  if ( !p || *p != '.' ) {
559  // Some versions of setlocale return encoding, others not.
560  char *ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
561  // Some Linux distributions have broken locales which will return
562  // "C" for LC_CTYPE
563  if ( qstrcmp( ctype, "C" ) == 0 ) {
564  delete [] ctype;
565  } else {
566  if ( lang )
567  delete [] lang;
568  lang = ctype;
569  p = lang ? strchr( lang, '.' ) : 0;
570  }
571  }
572 
573  if( p && *p == '.' ) {
574  // if there is an encoding and we don't know it, we return 0
575  // User knows what they are doing. Codecs will believe them.
576  localeMapper = codecForName( lang );
577  if ( !localeMapper ) {
578  // Use or codec disagree.
579  localeMapper = codecForName( p+1 );
580  }
581  }
582  if ( !localeMapper || !(p && *p == '.') ) {
583  // if there is none, we default to 8859-1
584  // We could perhaps default to 8859-15.
585  if ( try_locale_list( iso8859_2locales, lang ) )
586  localeMapper = codecForName( "ISO 8859-2" );
587  else if ( try_locale_list( iso8859_3locales, lang ) )
588  localeMapper = codecForName( "ISO 8859-3" );
589  else if ( try_locale_list( iso8859_4locales, lang ) )
590  localeMapper = codecForName( "ISO 8859-4" );
591  else if ( try_locale_list( iso8859_5locales, lang ) )
592  localeMapper = codecForName( "ISO 8859-5" );
593  else if ( try_locale_list( iso8859_6locales, lang ) )
594  localeMapper = codecForName( "ISO 8859-6-I" );
595  else if ( try_locale_list( iso8859_7locales, lang ) )
596  localeMapper = codecForName( "ISO 8859-7" );
597  else if ( try_locale_list( iso8859_8locales, lang ) )
598  localeMapper = codecForName( "ISO 8859-8-I" );
599  else if ( try_locale_list( iso8859_9locales, lang ) )
600  localeMapper = codecForName( "ISO 8859-9" );
601  else if ( try_locale_list( iso8859_15locales, lang ) )
602  localeMapper = codecForName( "ISO 8859-15" );
603  else if ( try_locale_list( tis_620locales, lang ) )
604  localeMapper = codecForName( "ISO 8859-11" );
605  else if ( try_locale_list( koi8_ulocales, lang ) )
606  localeMapper = codecForName( "KOI8-U" );
607  else if ( try_locale_list( probably_koi8_rlocales, lang ) )
608  localeMapper = ru_RU_hack( lang );
609  else if (!lang || !(localeMapper = codecForName(lang) ))
610  localeMapper = codecForName( "ISO 8859-1" );
611  }
612  delete[] lang;
613 #endif
614 
615  return localeMapper;
616 }
617 
618 
619 /*!
620  Searches all installed QTextCodec objects, returning the one
621  which best matches given name. Returns NULL if no codec has
622  a match closeness above \a accuracy.
623 
624  \sa heuristicNameMatch()
625 */
626 QTextCodec* QTextCodec::codecForName(const char* hint, int accuracy)
627 {
628  setup();
630  QTextCodec* result = 0;
631  int best=accuracy;
632  for ( QTextCodec* cursor; (cursor=i); ++i ) {
633  int s = cursor->heuristicNameMatch(hint);
634  if ( s > best ) {
635  best = s;
636  result = cursor;
637  }
638  }
639  return result;
640 }
641 
642 
643 /*!
644  Searches all installed QTextCodec objects, returning the one
645  which most recognizes the given content. May return 0.
646 
647  Note that this is often a poor choice, since character
648  encodings often use most of the available character sequences,
649  and so only by linguistic analysis could a true match be made.
650 
651  \sa heuristicContentMatch()
652 */
653 QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
654 {
655  setup();
657  QTextCodec* result = 0;
658  int best=0;
659  for ( QTextCodec* cursor; (cursor=i); ++i ) {
660  int s = cursor->heuristicContentMatch(chars,len);
661  if ( s > best ) {
662  best = s;
663  result = cursor;
664  }
665  }
666  return result;
667 }
668 
669 
670 /*!
671  \fn const char* QTextCodec::name() const
672  Subclasses of QTextCodec must reimplement this function. It returns
673  the name of the encoding supported by the subclass. When choosing
674  a name for an encoding, consider these points:
675  <ul>
676  <li>On X11, heuristicNameMatch( const char * hint )
677  is used to test if a the QTextCodec
678  can convert between Unicode and the encoding of a font
679  with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
680  "koi8-r" for Russian KOI8 fonts.
681  The default algorithm of heuristicNameMatch() uses name().
682  <li>Some applications may use this function to present
683  encodings to the end user.
684  </ul>
685 */
686 
687 /*!
688  \fn int QTextCodec::mibEnum() const
689 
690  Subclasses of QTextCodec must reimplement this function. It returns the
691  MIBenum (see
692  <a href="ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets">
693  the IANA character-sets encoding file</a> for more information).
694  It is important that each QTextCodec subclass return the correct unique
695  value for this function.
696 */
697 
698 
699 /*!
700  \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
701 
702  Subclasses of QTextCodec must reimplement this function. It examines
703  the first \a len bytes of \a chars and returns a value indicating how
704  likely it is that the string is a prefix of text encoded in the
705  encoding of the subclass. Any negative return value indicates that the text
706  is detectably not in the encoding (eg. it contains undefined characters).
707  A return value of 0 indicates that the text should be decoded with this
708  codec rather than as ASCII, but there
709  is no particular evidence. The value should range up to \a len. Thus,
710  most decoders will return -1, 0, or -\a len.
711 
712  The characters are not null terminated.
713 
714  \sa codecForContent().
715 */
716 
717 
718 /*!
719  Creates a QTextDecoder which stores enough state to decode chunks
720  of char* data to create chunks of Unicode data. The default implementation
721  creates a stateless decoder, which is sufficient for only the simplest
722  encodings where each byte corresponds to exactly one Unicode character.
723 
724  The caller is responsible for deleting the returned object.
725 */
727 {
728  return new QTextStatelessDecoder(this);
729 }
730 
731 
732 /*!
733  Creates a QTextEncoder which stores enough state to encode chunks
734  of Unicode data as char* data. The default implementation
735  creates a stateless encoder, which is sufficient for only the simplest
736  encodings where each Unicode character corresponds to exactly one char.
737 
738  The caller is responsible for deleting the returned object.
739 */
741 {
742  return new QTextStatelessEncoder(this);
743 }
744 
745 
746 /*!
747  Subclasses of QTextCodec must reimplement this function or
748  makeDecoder(). It converts the first \a len characters of \a chars
749  to Unicode.
750 
751  The default implementation makes a decoder with makeDecoder() and
752  converts the input with that. Note that the default makeDecoder()
753  implementation makes a decoder that simply calls
754  this function, hence subclasses \e must reimplement one function or
755  the other to avoid infinite recursion.
756 */
757 QString QTextCodec::toUnicode(const char* chars, int len) const
758 {
759  QTextDecoder* i = makeDecoder();
760  QString result = i->toUnicode(chars,len);
761  delete i;
762  return result;
763 }
764 
765 
766 /*!
767  Subclasses of QTextCodec must reimplement either this function or
768  makeEncoder(). It converts the first \a lenInOut characters of \a
769  uc from Unicode to the encoding of the subclass. If \a lenInOut
770  is negative or too large, the length of \a uc is used instead.
771 
772  The value returned is the property of the caller, which is
773  responsible for deleting it with "delete []". The length of the
774  resulting Unicode character sequence is returned in \a lenInOut.
775 
776  The default implementation makes an encoder with makeEncoder() and
777  converts the input with that. Note that the default makeEncoder()
778  implementation makes an encoder that simply calls
779  this function, hence subclasses \e must reimplement one function or
780  the other to avoid infinite recursion.
781 */
782 
783 QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
784 {
785  QTextEncoder* i = makeEncoder();
786  QCString result = i->fromUnicode(uc, lenInOut);
787  delete i;
788  return result;
789 }
790 
791 /*!
792  \overload QCString QTextCodec::fromUnicode(const QString& uc) const
793 */
795 {
796  int l = uc.length();
797  return fromUnicode(uc,l);
798 }
799 
800 /*!
801  \overload QString QTextCodec::toUnicode(const QByteArray& a, int len) const
802 */
804 {
805  int l = a.size();
806  if( l > 0 && a.data()[l - 1] == '\0' ) l--;
807  l = QMIN( l, len );
808  return toUnicode( a.data(), l );
809 }
810 
811 /*!
812  \overload QString QTextCodec::toUnicode(const QByteArray& a) const
813 */
815 {
816  int l = a.size();
817  if( l > 0 && a.data()[l - 1] == '\0' ) l--;
818  return toUnicode( a.data(), l );
819 }
820 
821 /*!
822  \overload QString QTextCodec::toUnicode(const char* chars) const
823 */
824 QString QTextCodec::toUnicode(const char* chars) const
825 {
826  return toUnicode(chars,qstrlen(chars));
827 }
828 
829 /*!
830  Returns TRUE if the unicode character \a ch can be fully encoded
831  with this codec. The default implementation tests if the result of
832  toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be
833  able to improve the efficiency.
834 */
835 bool QTextCodec::canEncode( QChar ch ) const
836 {
837  return toUnicode(fromUnicode(ch)) == ch;
838 }
839 
840 /*!
841  Returns TRUE if the unicode string \a s can be fully encoded
842  with this codec. The default implementation tests if the result of
843  toUnicode(fromUnicode(s)) is the original \a s. Subclasses may be
844  able to improve the efficiency.
845 */
846 bool QTextCodec::canEncode( const QString& s ) const
847 {
848  return toUnicode(fromUnicode(s)) == s;
849 }
850 
851 
852 
853 /*!
854  \class QTextEncoder qtextcodec.h
855  \brief State-based encoder
856 
857  A QTextEncoder converts Unicode into another format, remembering
858  any state that is required between calls.
859 
860  \sa QTextCodec::makeEncoder()
861 */
862 
863 /*!
864  Destructs the encoder.
865 */
867 {
868 }
869 /*!
870  \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
871 
872  Converts \a lenInOut characters (not bytes) from \a uc, producing
873  a QCString. \a lenInOut will also be set to the
874  \link QCString::length() length\endlink of the result (in bytes).
875 
876  The encoder is free to record state to use when subsequent calls are
877  made to this function (for example, it might change modes with escape
878  sequences if needed during the encoding of one string, then assume that
879  mode applies when a subsequent call begins).
880 */
881 
882 /*!
883  \class QTextDecoder qtextcodec.h
884  \brief State-based decoder
885 
886  A QTextEncoder converts a text format into Unicode, remembering
887  any state that is required between calls.
888 
889  \sa QTextCodec::makeEncoder()
890 */
891 
892 
893 /*!
894  Destructs the decoder.
895 */
897 {
898 }
899 
900 /*!
901  \fn QString QTextDecoder::toUnicode(const char* chars, int len)
902 
903  Converts the first \a len bytes at \a chars to Unicode, returning the
904  result.
905 
906  If not all characters are used (eg. only part of a multi-byte
907  encoding is at the end of the characters), the decoder remembers
908  enough state to continue with the next call to this function.
909 */
910 
911 #define CHAINED 0xffff
912 
914  // If multibyte, ignore unicode and index into multibyte
915  // with the next character.
916  QMultiByteUnicodeTable() : unicode(0xfffd), multibyte(0) { }
917 
919  {
920  if ( multibyte )
921  delete [] multibyte;
922  }
923 
926 };
927 
928 #ifndef QT_NO_CODECS
929 static int getByte(char* &cursor)
930 {
931  int byte = 0;
932  if ( *cursor ) {
933  if ( cursor[1] == 'x' )
934  byte = (int)strtol(cursor+2,&cursor,16);
935  else if ( cursor[1] == 'd' )
936  byte = (int)strtol(cursor+2,&cursor,10);
937  else
938  byte = (int)strtol(cursor+2,&cursor,8);
939  }
940  return byte&0xff;
941 }
942 
943 class QTextCodecFromIOD;
944 
948 public:
950  QString toUnicode(const char* chars, int len);
951 };
952 
955 
957 
958  // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multibyte,
959  // use from_unicode_page_multibyte[row()][cell()] as string.
962  char unkn;
963 
964  // Only one of these is used
969 
970  bool stateless() const { return !to_unicode_multibyte; }
971 
972 public:
974  {
975  from_unicode_page = 0;
976  to_unicode_multibyte = 0;
977  to_unicode = 0;
978  from_unicode_page_multibyte = 0;
979  max_bytes_per_char = 1;
980 
981  const int maxlen=100;
982  char line[maxlen];
983  char esc='\\';
984  char comm='%';
985  bool incmap = FALSE;
986  while (iod->readLine(line,maxlen) > 0) {
987  if (0==qstrnicmp(line,"<code_set_name>",15))
988  n = line+15;
989  else if (0==qstrnicmp(line,"<escape_char> ",14))
990  esc = line[14];
991  else if (0==qstrnicmp(line,"<comment_char> ",15))
992  comm = line[15];
993  else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
994  aliases.append(line+8);
995  } else if (0==qstrnicmp(line,"CHARMAP",7)) {
996  if (!from_unicode_page) {
997  from_unicode_page = new char*[256];
998  for (int i=0; i<256; i++)
999  from_unicode_page[i]=0;
1000  }
1001  if (!to_unicode) {
1002  to_unicode = new ushort[256];
1003  }
1004  incmap = TRUE;
1005  } else if (0==qstrnicmp(line,"END CHARMAP",11))
1006  break;
1007  else if (incmap) {
1008  char* cursor = line;
1009  int byte=0,unicode=-1;
1010  ushort* mb_unicode=0;
1011  const int maxmb=8; // more -> we'll need to improve datastructures
1012  char mb[maxmb+1];
1013  int nmb=0;
1014 
1015  while (*cursor) {
1016  if (cursor[0]=='<' && cursor[1]=='U' &&
1017  cursor[2]>='0' && cursor[2]<='9' &&
1018  cursor[3]>='0' && cursor[3]<='9') {
1019 
1020  unicode = (int)strtol(cursor+2,&cursor,16);
1021 
1022  } else if (*cursor==esc) {
1023 
1024  byte = getByte(cursor);
1025 
1026  if ( *cursor == esc ) {
1027  if ( !to_unicode_multibyte ) {
1028  to_unicode_multibyte =
1029  new QMultiByteUnicodeTable[256];
1030  for (int i=0; i<256; i++) {
1031  to_unicode_multibyte[i].unicode =
1032  to_unicode[i];
1033  to_unicode_multibyte[i].multibyte = 0;
1034  }
1035  delete [] to_unicode;
1036  to_unicode = 0;
1037  }
1038  QMultiByteUnicodeTable* mbut =
1039  to_unicode_multibyte+byte;
1040  mb[nmb++] = byte;
1041  while ( nmb < maxmb && *cursor == esc ) {
1042  // Always at least once
1043 
1044  mbut->unicode = CHAINED;
1045  byte = getByte(cursor);
1046  mb[nmb++] = byte;
1047  if (!mbut->multibyte) {
1048  mbut->multibyte =
1049  new QMultiByteUnicodeTable[256];
1050  }
1051  mbut = mbut->multibyte+byte;
1052  mb_unicode = & mbut->unicode;
1053  }
1054 
1055  if ( nmb > max_bytes_per_char )
1056  max_bytes_per_char = nmb;
1057  }
1058  } else {
1059  cursor++;
1060  }
1061  }
1062 
1063  if (unicode >= 0 && unicode <= 0xffff)
1064  {
1065  QChar ch((ushort)unicode);
1066  if (!from_unicode_page[ch.row()]) {
1067  from_unicode_page[ch.row()] = new char[256];
1068  for (int i=0; i<256; i++)
1069  from_unicode_page[ch.row()][i]=0;
1070  }
1071  if ( mb_unicode ) {
1072  from_unicode_page[ch.row()][ch.cell()] = 0;
1073  if (!from_unicode_page_multibyte) {
1074  from_unicode_page_multibyte = new char**[256];
1075  for (int i=0; i<256; i++)
1076  from_unicode_page_multibyte[i]=0;
1077  }
1078  if (!from_unicode_page_multibyte[ch.row()]) {
1079  from_unicode_page_multibyte[ch.row()] = new char*[256];
1080  for (int i=0; i<256; i++)
1081  from_unicode_page_multibyte[ch.row()][i] = 0;
1082  }
1083  mb[nmb++] = 0;
1084  from_unicode_page_multibyte[ch.row()][ch.cell()]
1085  = qstrdup(mb);
1086  *mb_unicode = unicode;
1087  } else {
1088  from_unicode_page[ch.row()][ch.cell()] = (char)byte;
1089  if ( to_unicode )
1090  to_unicode[byte] = unicode;
1091  else
1092  to_unicode_multibyte[byte].unicode = unicode;
1093  }
1094  } else {
1095  }
1096  }
1097  }
1098  n = n.stripWhiteSpace();
1099 
1100  unkn = '?'; // ##### Might be a bad choice.
1101  }
1102 
1104  {
1105  if ( from_unicode_page ) {
1106  for (int i=0; i<256; i++)
1107  if (from_unicode_page[i])
1108  delete [] from_unicode_page[i];
1109  }
1110  if ( from_unicode_page_multibyte ) {
1111  for (int i=0; i<256; i++)
1112  if (from_unicode_page_multibyte[i])
1113  for (int j=0; j<256; j++)
1114  if (from_unicode_page_multibyte[i][j])
1115  delete [] from_unicode_page_multibyte[i][j];
1116  }
1117  if ( to_unicode )
1118  delete [] to_unicode;
1119  if ( to_unicode_multibyte )
1120  delete [] to_unicode_multibyte;
1121  }
1122 
1123  bool ok() const
1124  {
1125  return !!from_unicode_page;
1126  }
1127 
1129  {
1130  if ( stateless() )
1131  return QTextCodec::makeDecoder();
1132  else
1133  return new QTextCodecFromIODDecoder(this);
1134  }
1135 
1136  const char* name() const
1137  {
1138  return n;
1139  }
1140 
1141  int mibEnum() const
1142  {
1143  return 0; // #### Unknown.
1144  }
1145 
1146  int heuristicContentMatch(const char*, int) const
1147  {
1148  return 0;
1149  }
1150 
1151  int heuristicNameMatch(const char* hint) const
1152  {
1153  int bestr = QTextCodec::heuristicNameMatch(hint);
1154  QStrListIterator it(aliases);
1155  char* a;
1156  while ((a=it.current())) {
1157  ++it;
1158  int r = simpleHeuristicNameMatch(a,hint);
1159  if (r > bestr)
1160  bestr = r;
1161  }
1162  return bestr;
1163  }
1164 
1165  QString toUnicode(const char* chars, int len) const
1166  {
1167  const uchar* uchars = (const uchar*)chars;
1168  QString result;
1169  QMultiByteUnicodeTable* multibyte=to_unicode_multibyte;
1170  if ( multibyte ) {
1171  while (len--) {
1172  QMultiByteUnicodeTable& mb = multibyte[*uchars];
1173  if ( mb.multibyte ) {
1174  // Chained multi-byte
1175  multibyte = mb.multibyte;
1176  } else {
1177  result += QChar(mb.unicode);
1178  multibyte=to_unicode_multibyte;
1179  }
1180  uchars++;
1181  }
1182  } else {
1183  while (len--)
1184  result += QChar(to_unicode[*uchars++]);
1185  }
1186  return result;
1187  }
1188 
1189  QCString fromUnicode(const QString& uc, int& lenInOut) const
1190  {
1191  if (lenInOut > (int)uc.length())
1192  lenInOut = uc.length();
1193  int rlen = lenInOut*max_bytes_per_char;
1194  QCString rstr(rlen);
1195  char* cursor = rstr.rawData();
1196  char* s=0;
1197  int l = lenInOut;
1198  int lout = 0;
1199  for (int i=0; i<l; i++) {
1200  QChar ch = uc[i];
1201  if ( ch == QChar::null ) {
1202  // special
1203  *cursor++ = 0;
1204  } else if ( from_unicode_page[ch.row()] &&
1205  from_unicode_page[ch.row()][ch.cell()] )
1206  {
1207  *cursor++ = from_unicode_page[ch.row()][ch.cell()];
1208  lout++;
1209  } else if ( from_unicode_page_multibyte &&
1210  from_unicode_page_multibyte[ch.row()] &&
1211  (s=from_unicode_page_multibyte[ch.row()][ch.cell()]) )
1212  {
1213  while (*s) {
1214  *cursor++ = *s++;
1215  lout++;
1216  }
1217  } else {
1218  *cursor++ = unkn;
1219  lout++;
1220  }
1221  }
1222  *cursor = 0;
1223  lenInOut = lout;
1224  return rstr;
1225  }
1226 };
1227 
1229  codec(c)
1230 {
1232 }
1233 
1235 {
1236  const uchar* uchars = (const uchar*)chars;
1237  QString result;
1238  while (len--) {
1239  QMultiByteUnicodeTable& t = mb[*uchars];
1240  if ( t.multibyte ) {
1241  // Chained multi-byte
1242  mb = t.multibyte;
1243  } else {
1244  if ( t.unicode )
1245  result += QChar(t.unicode);
1247  }
1248  uchars++;
1249  }
1250  return result;
1251 }
1252 
1253 /*!
1254  Reads a POSIX2 charmap definition from \a iod.
1255  The parser recognizes the following lines:
1256 <pre>
1257  &lt;code_set_name&gt; <i>name</i>
1258  &lt;escape_char&gt; <i>character</i>
1259  % alias <i>alias</i>
1260  CHARMAP
1261  &lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...
1262  &lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...
1263  &lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...
1264  &lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...
1265  END CHARMAP
1266 </pre>
1267 
1268  The resulting QTextCodec is returned (and also added to the
1269  global list of codecs). The name() of the result is taken
1270  from the code_set_name.
1271 
1272  Note that a codec constructed in this way uses much more memory
1273  and is slower than a hand-written QTextCodec subclass, since
1274  tables in code are in memory shared by all applications simultaneously
1275  using Qt.
1276 
1277  \sa loadCharmapFile()
1278 */
1280 {
1282  if ( !r->ok() ) {
1283  delete r;
1284  r = 0;
1285  }
1286  return r;
1287 }
1288 
1289 /*!
1290  A convenience function for loadCharmap().
1291 */
1293 {
1294  QFile f(filename);
1295  if (f.open(IO_ReadOnly)) {
1297  if ( !r->ok() )
1298  delete r;
1299  else
1300  return r;
1301  }
1302  return 0;
1303 }
1304 #endif //QT_NO_CODECS
1305 
1306 
1307 /*!
1308  Returns a string representing the current language.
1309 */
1310 
1311 const char* QTextCodec::locale()
1312 {
1313  static QCString lang;
1314  if ( lang.isEmpty() ) {
1315  lang = getenv( "LANG" ); //########Windows??
1316  if ( lang.isEmpty() )
1317  lang = "C";
1318  }
1319  return lang;
1320 }
1321 
1322 
1323 
1324 #ifndef QT_NO_CODECS
1325 
1327 {
1328 public:
1329  QSimpleTextCodec( int );
1330  ~QSimpleTextCodec();
1331 
1332  QString toUnicode(const char* chars, int len) const;
1333  QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1334 
1335  const char* name() const;
1336  int mibEnum() const;
1337 
1338  int heuristicContentMatch(const char* chars, int len) const;
1339 
1340  int heuristicNameMatch(const char* hint) const;
1341 
1342 private:
1344 };
1345 
1346 
1347 #define LAST_MIB 2259
1348 
1349 static struct {
1350  const char * cs;
1351  int mib;
1353 } unicodevalues[] = {
1354  // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
1355  { "KOI8-R", 2084,
1356  { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1357  0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1358  0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
1359  0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1360  0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
1361  0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
1362  0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
1363  0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
1364  0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1365  0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1366  0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1367  0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1368  0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1369  0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1370  0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1371  0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1372  // /**/ - The BULLET OPERATOR is confused. Some people think
1373  // it should be 0x2022 (BULLET).
1374 
1375  // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
1376  { "KOI8-U", 2088,
1377  { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1378  0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1379  0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
1380  0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1381  0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
1382  0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
1383  0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
1384  0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
1385  0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1386  0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1387  0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1388  0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1389  0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1390  0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1391  0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1392  0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1393 
1394  // next bits generated from tables on the Unicode 2.0 CD. we can
1395  // use these tables since this is part of the transition to using
1396  // unicode everywhere in qt.
1397 
1398  // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1399 
1400  // then I inserted the files manually.
1401  { "ISO 8859-2", 5,
1402  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1403  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1404  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1405  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1406  0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
1407  0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
1408  0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
1409  0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
1410  0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1411  0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1412  0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1413  0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1414  0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1415  0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1416  0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1417  0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1418  { "ISO 8859-3", 6,
1419  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1420  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1421  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1422  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1423  0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
1424  0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
1425  0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
1426  0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
1427  0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
1428  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1429  0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
1430  0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
1431  0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
1432  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1433  0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
1434  0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
1435  { "ISO 8859-4", 7,
1436  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1437  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1438  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1439  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1440  0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
1441  0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
1442  0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
1443  0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
1444  0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1445  0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
1446  0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1447  0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
1448  0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1449  0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
1450  0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1451  0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
1452  { "ISO 8859-5", 8,
1453  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1454  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1455  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1456  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1457  0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
1458  0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
1459  0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1460  0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1461  0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1462  0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1463  0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1464  0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1465  0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1466  0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1467  0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
1468  0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
1469  { "ISO 8859-6-I", 82,
1470  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1471  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1472  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1473  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1474  0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
1475  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
1476  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1477  0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
1478  0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1479  0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1480  0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
1481  0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1482  0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
1483  0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
1484  0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1485  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1486  { "ISO 8859-7", 10,
1487  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1488  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1489  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1490  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1491  0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
1492  0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
1493  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
1494  0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1495  0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1496  0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1497  0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1498  0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1499  0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1500  0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1501  0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1502  0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1503  { "ISO 8859-8-I", 85,
1504  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1505  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1506  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1507  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1508  0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1509  0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
1510  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1511  0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
1512  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1513  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1514  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1515  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
1516  0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1517  0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1518  0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1519  0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1520  { "ISO 8859-9", 12,
1521  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1522  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1523  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1524  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1525  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1526  0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1527  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1528  0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1529  0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1530  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1531  0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1532  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1533  0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1534  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1535  0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1536  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1537  { "ISO 8859-10", 13,
1538  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1539  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1540  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1541  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1542  0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
1543  0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
1544  0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
1545  0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
1546  0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1547  0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
1548  0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
1549  0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1550  0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1551  0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
1552  0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
1553  0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
1554  { "ISO 8859-13", 109,
1555  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1556  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1557  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1558  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1559  0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
1560  0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1561  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
1562  0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1563  0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1564  0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1565  0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1566  0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1567  0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1568  0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1569  0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1570  0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
1571  { "ISO 8859-14", 110,
1572  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1573  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1574  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1575  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1576  0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
1577  0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
1578  0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
1579  0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
1580  0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1581  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1582  0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
1583  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
1584  0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1585  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1586  0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
1587  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
1588  { "ISO 8859-15", 111,
1589  { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1590  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1591  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1592  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1593  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
1594  0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1595  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
1596  0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
1597  0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1598  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1599  0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1600  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1601  0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1602  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1603  0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1604  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
1605 
1606  // next bits generated again from tables on the Unicode 3.0 CD.
1607 
1608  // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1609 
1610  { "CP 874", 0, //### what is the mib?
1611  { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
1612  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1613  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1614  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1615  0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1616  0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1617  0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1618  0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1619  0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1620  0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1621  0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1622  0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1623  0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1624  0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1625  0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1626  0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1627  { "CP 1250", 2250,
1628  { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1629  0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
1630  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1631  0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
1632  0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
1633  0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
1634  0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1635  0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
1636  0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1637  0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1638  0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1639  0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1640  0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1641  0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1642  0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1643  0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1644  { "CP 1251", 2251,
1645  { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
1646  0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
1647  0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1648  0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
1649  0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
1650  0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
1651  0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
1652  0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
1653  0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1654  0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1655  0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1656  0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1657  0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1658  0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1659  0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1660  0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
1661  { "CP 1252", 2252,
1662  { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1663  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
1664  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1665  0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
1666  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1667  0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1668  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1669  0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1670  0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1671  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1672  0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1673  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1674  0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1675  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1676  0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1677  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
1678  { "CP 1253", 2253,
1679  { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1680  0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1681  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1682  0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1683  0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1684  0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
1685  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
1686  0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1687  0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1688  0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1689  0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1690  0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1691  0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1692  0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1693  0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1694  0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1695  { "CP 1254", 2254,
1696  { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1697  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
1698  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1699  0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
1700  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1701  0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1702  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1703  0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1704  0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1705  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1706  0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1707  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1708  0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1709  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1710  0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1711  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1712  { "CP 1255", 2255,
1713  { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1714  0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1715  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1716  0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1717  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
1718  0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1719  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1720  0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1721  0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
1722  0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
1723  0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
1724  0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1725  0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1726  0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1727  0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1728  0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
1729  { "CP 1256", 2256,
1730  { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1731  0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
1732  0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1733  0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
1734  0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1735  0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1736  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1737  0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
1738  0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1739  0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1740  0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
1741  0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
1742  0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
1743  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
1744  0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
1745  0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
1746  { "CP 1257", 2257,
1747  { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1748  0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
1749  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1750  0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
1751  0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
1752  0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1753  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1754  0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1755  0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1756  0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1757  0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1758  0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1759  0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1760  0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1761  0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1762  0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
1763  { "CP 1258", 2258,
1764  { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1765  0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
1766  0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1767  0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
1768  0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1769  0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1770  0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1771  0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1772  0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1773  0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
1774  0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
1775  0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
1776  0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1777  0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
1778  0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
1779  0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
1780 
1781  // this one is generated from the charmap file located in /usr/share/i18n/charmaps
1782  // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
1783  // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
1784 
1785  // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
1786  { "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
1787  { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1788  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1789  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1790  0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1791  0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1792  0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1793  0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1794  0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1795  0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1796  0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1797  0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1798  0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1799  0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1800  0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1801  0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1802  0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1803 
1804  // change LAST_MIB if you add more, and edit unicodevalues in
1805  // kernel/qpsprinter.cpp too.
1806 };
1807 
1808 
1809 static const QSimpleTextCodec * reverseOwner = 0;
1811 
1812 
1814  : QTextCodec(), forwardIndex( i )
1815 {
1816 }
1817 
1818 
1820 {
1821  if ( reverseOwner == this ) {
1822  delete reverseMap;
1823  reverseMap = 0;
1824  reverseOwner = 0;
1825  }
1826 }
1827 
1828 // what happens if strlen(chars)<len? what happens if !chars? if len<1?
1829 QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
1830 {
1831  if(len <= 0)
1832  return QString::null;
1833 
1834  int clen = qstrlen(chars);
1835  len = QMIN(len, clen); // Note: NUL ends string
1836 
1837  QString r;
1838  r.setUnicode(0, len);
1839  QChar* uc = (QChar*)r.unicode(); // const_cast
1840  const unsigned char * c = (const unsigned char *)chars;
1841  for( int i=0; i<len; i++ ) {
1842  if ( c[i] > 127 )
1843  uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
1844  else
1845  uc[i] = c[i];
1846  }
1847  return r;
1848 }
1849 
1850 
1852 {
1853  if ( reverseOwner != this ) {
1854  int m = 0;
1855  int i = 0;
1856  while( i < 128 ) {
1857  if ( unicodevalues[forwardIndex].values[i] > m &&
1858  unicodevalues[forwardIndex].values[i] < 0xfffd )
1859  m = unicodevalues[forwardIndex].values[i];
1860  i++;
1861  }
1862  m++;
1863  if ( !reverseMap )
1864  reverseMap = new QArray<char>( m );
1865  if ( m > (int)(reverseMap->size()) )
1866  reverseMap->resize( m );
1867  for( i = 0; i < 128 && i < m; i++ )
1868  (*reverseMap)[i] = (char)i;
1869  for( ;i < m; i++ )
1870  (*reverseMap)[i] = '?';
1871  for( i=128; i<256; i++ ) {
1872  int u = unicodevalues[forwardIndex].values[i-128];
1873  if ( u < m )
1874  (*reverseMap)[u] = (char)(unsigned char)(i);
1875  }
1876  reverseOwner = this;
1877  }
1878  if ( len <0 || len > (int)uc.length() )
1879  len = uc.length();
1880  QCString r( len+1 );
1881  int i = len;
1882  int u;
1883  const QChar* ucp = uc.unicode();
1884  char* rp = r.rawData();
1885  char* rmp = reverseMap->data();
1886  int rmsize = (int) reverseMap->size();
1887  while( i-- )
1888  {
1889  u = ucp->unicode();
1890  *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
1891  ucp++;
1892  }
1893  r[len] = 0;
1894  return r;
1895 }
1896 
1897 
1898 const char* QSimpleTextCodec::name() const
1899 {
1900  return unicodevalues[forwardIndex].cs;
1901 }
1902 
1903 
1905 {
1906  return unicodevalues[forwardIndex].mib;
1907 }
1908 
1909 int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
1910 {
1911  if ( hint[0]=='k' ) {
1912  // Help people with messy fonts
1913  if ( QCString(hint) == "koi8-1" )
1914  return QTextCodec::heuristicNameMatch("koi8-r")-1;
1915  if ( QCString(hint) == "koi8-ru" )
1916  return QTextCodec::heuristicNameMatch("koi8-r")-1;
1917  } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) {
1918  // 8859-11 and tis620 are byte by bute equivalent
1919  int i = simpleHeuristicNameMatch("tis620-0", hint);
1920  if( !i )
1921  i = simpleHeuristicNameMatch("tis-620", hint);
1922  if( i ) return i;
1923  }
1924  return QTextCodec::heuristicNameMatch(hint);
1925 }
1926 
1927 int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
1928 {
1929  if ( len<1 || !chars )
1930  return -1;
1931  int i = 0;
1932  const uchar * c = (const unsigned char *)chars;
1933  int r = 0;
1934  while( i<len && c && *c ) {
1935  if ( *c >= 128 ) {
1936  if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
1937  return -1;
1938  }
1939  if ( (*c >= ' ' && *c < 127) ||
1940  *c == '\n' || *c == '\t' || *c == '\r' )
1941  r++;
1942  i++;
1943  c++;
1944  }
1945  if ( mibEnum()==4 )
1946  r+=1;
1947  return r;
1948 }
1949 
1950 
1951 #endif // QT_NO_CODECS
1952 
1954 {
1955 public:
1956  QLatin1Codec();
1957  ~QLatin1Codec();
1958 
1959  QString toUnicode(const char* chars, int len) const;
1960  QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1961 
1962  const char* name() const;
1963  int mibEnum() const;
1964 
1965  int heuristicContentMatch(const char* chars, int len) const;
1966 
1967  int heuristicNameMatch(const char* hint) const;
1968 
1969 private:
1970  //int forwardIndex;
1971 };
1972 
1973 
1975  : QTextCodec()
1976 {
1977 }
1978 
1979 
1981 {
1982 }
1983 
1984 // what happens if strlen(chars)<len? what happens if !chars? if len<1?
1985 QString QLatin1Codec::toUnicode(const char* chars, int len) const
1986 {
1987  if(len <= 0)
1988  return QString::null;
1989 
1990  return QString::fromLatin1(chars, len);
1991 }
1992 
1993 
1994 QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
1995 {
1996  if ( len <0 || len > (int)uc.length() )
1997  len = uc.length();
1998  QCString r( len+1 );
1999  int i = 0;
2000  const QChar *ch = uc.unicode();
2001  while ( i < len ) {
2002  r[i] = ch->row() ? '?' : ch->cell();
2003  i++;
2004  ch++;
2005  }
2006  r[len] = 0;
2007  return r;
2008 }
2009 
2010 
2011 const char* QLatin1Codec::name() const
2012 {
2013  return "ISO 8859-1";
2014 }
2015 
2016 
2018 {
2019  return 4;
2020 }
2021 
2022 int QLatin1Codec::heuristicNameMatch(const char* hint) const
2023 {
2024  return QTextCodec::heuristicNameMatch(hint);
2025 }
2026 
2027 int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
2028 {
2029  if ( len<1 || !chars )
2030  return -1;
2031  int i = 0;
2032  const uchar * c = (const unsigned char *)chars;
2033  int r = 0;
2034  while( i<len && c && *c ) {
2035  if ( *c >= 0x80 && *c < 0xa0 )
2036  return -1;
2037  if ( (*c >= ' ' && *c < 127) ||
2038  *c == '\n' || *c == '\t' || *c == '\r' )
2039  r++;
2040  i++;
2041  c++;
2042  }
2043  return r;
2044 }
2045 
2046 
2047 static void setupBuiltinCodecs()
2048 {
2049  (void)new QLatin1Codec;
2050 
2051 #ifndef QT_NO_CODECS
2052  int i = 0;
2053  do {
2054  (void)new QSimpleTextCodec( i );
2055  } while( unicodevalues[i++].mib != LAST_MIB );
2056 
2057  //(void)new QEucJpCodec;
2058  //(void)new QSjisCodec;
2059  //(void)new QJisCodec;
2060  //(void)new QEucKrCodec;
2061  //(void)new QGbkCodec;
2062  //(void)new QBig5Codec;
2063  (void)new QUtf8Codec;
2064  (void)new QUtf16Codec;
2065  //(void)new QHebrewCodec;
2066  //(void)new QArabicCodec;
2067  //(void)new QTsciiCodec;
2068 #endif // QT_NO_CODECS
2069 }
2070 
2071 #endif // QT_NO_TEXTCODEC
static QCString name
Definition: declinfo.cpp:673
static QTextCodec * codecForLocale()
Definition: qtextcodec.cpp:542
char * rawData() const
Definition: qcstring.h:216
QMultiByteUnicodeTable * multibyte
Definition: qtextcodec.cpp:925
virtual QTextDecoder * makeDecoder() const
Definition: qtextcodec.cpp:726
bool isNumber() const
Definition: qstring.cpp:11046
static const char * locale()
const QChar * unicode() const
Definition: qstring.h:508
int mibEnum() const
int heuristicContentMatch(const char *chars, int len) const
QCString stripWhiteSpace() const
Definition: qcstring.cpp:295
static const char *const koi8_ulocales[]
Definition: qtextcodec.cpp:475
QMultiByteUnicodeTable * mb
Definition: qtextcodec.cpp:947
static QTextCodec * codecForContent(const char *chars, int len)
Definition: qtextcodec.cpp:653
int heuristicNameMatch(const char *hint) const
static const char *const iso8859_8locales[]
Definition: qtextcodec.cpp:466
static QCString result
bool isEmpty() const
Definition: qcstring.h:189
QMultiByteUnicodeTable * to_unicode_multibyte
Definition: qtextcodec.cpp:966
static const int maxlen
Definition: qregexp.cpp:904
uint length() const
Definition: qcstring.h:195
void qt_set_locale_codec(QTextCodec *codec)
Definition: qtextcodec.cpp:535
QCString fromUnicode(const QString &uc, int &lenInOut) const
virtual QCString fromUnicode(const QString &uc, int &lenInOut) const
Definition: qtextcodec.cpp:783
int heuristicNameMatch(const char *hint) const
bool resize(uint size)
Definition: qarray.h:69
static int getByte(char *&cursor)
Definition: qtextcodec.cpp:929
#define LAST_MIB
int heuristicNameMatch(const char *hint) const
static const char *const iso8859_5locales[]
Definition: qtextcodec.cpp:456
State-based decoder.
Definition: qtextcodec.h:56
ushort unicode() const
Definition: qstring.h:151
bool isLetter() const
Definition: qstring.cpp:11035
const bool FALSE
Definition: qglobal.h:370
int mibEnum() const
static const char *const iso8859_2locales[]
Definition: qtextcodec.cpp:444
static QString lettersAndNumbers(const char *input)
Definition: qtextcodec.cpp:286
virtual ~QTextCodec()
Definition: qtextcodec.cpp:258
static int simpleHeuristicNameMatch(const char *name, const char *hint)
Definition: qtextcodec.cpp:316
virtual QTextEncoder * makeEncoder() const
Definition: qtextcodec.cpp:740
The QString class provides an abstraction of Unicode text and the classic C null-terminated char arra...
Definition: qstring.h:350
void qWarning(const char *msg,...)
Definition: qglobal.cpp:409
QTextCodecFromIOD(QIODevice *iod)
Definition: qtextcodec.cpp:973
bool ok() const
virtual ~QTextDecoder()
Definition: qtextcodec.cpp:896
QString toUnicode(const char *chars, int len)
Definition: qtextcodec.cpp:150
static QString fromLatin1(const char *, int len=-1)
Definition: qstring.cpp:14539
virtual ~QTextEncoder()
Definition: qtextcodec.cpp:866
string filename
Definition: train.py:213
The QChar class provides a light-weight Unicode character.
Definition: qstring.h:56
virtual int heuristicNameMatch(const char *hint) const
Definition: qtextcodec.cpp:277
static QStrList * l
Definition: config.cpp:1044
static bool try_locale_list(const char *const locale[], const char *lang)
Definition: qtextcodec.cpp:482
Category category() const
Definition: qstring.cpp:11096
const char * name() const
unsigned char uchar
Definition: nybbler.cc:11
static struct @4 unicodevalues[]
QString simplifyWhiteSpace() const
Definition: qstring.cpp:13482
char *** from_unicode_page_multibyte
Definition: qtextcodec.cpp:961
#define IO_ReadOnly
Definition: qiodevice.h:61
uchar & cell()
Definition: qstring.h:167
#define QMIN(a, b)
Definition: qglobal.h:391
type * data() const
Definition: qarray.h:63
QTextStatelessDecoder(const QTextCodec *)
Definition: qtextcodec.cpp:144
QCString fromUnicode(const QString &uc, int &lenInOut) const
static void deleteAllCodecs()
Definition: qtextcodec.cpp:81
static const char *const probably_koi8_rlocales[]
Definition: qtextcodec.cpp:501
Q_EXPORT uint qstrlen(const char *str)
Definition: qcstring.h:81
virtual QString toUnicode(const char *chars, int len) const
Definition: qtextcodec.cpp:757
static const char *const iso8859_3locales[]
Definition: qtextcodec.cpp:450
type * current() const
static QTextCodec * loadCharmapFile(QString filename)
static QTextCodec * ru_RU_codec
Definition: qtextcodec.cpp:506
static bool destroying_is_ok
Definition: qtextcodec.cpp:64
static int input(void)
Definition: code.cpp:15695
bool stateless() const
Definition: qtextcodec.cpp:970
State-based encoder.
Definition: qtextcodec.h:50
uint length() const
Definition: qstring.h:679
std::void_t< T > n
static const char *const iso8859_9locales[]
Definition: qtextcodec.cpp:469
const double a
bool open(int)
Definition: qfile_unix.cpp:134
static constexpr double mb
Definition: Units.h:79
std::string getenv(std::string const &name)
Definition: getenv.cc:15
bool insert(uint i, const type *d)
Definition: qinternallist.h:58
static QTextCodec * codecForMib(int mib)
Definition: qtextcodec.cpp:354
static void setupBuiltinCodecs()
const char * name() const
p
Definition: test.py:223
void append(const type *d)
Definition: qinternallist.h:61
QCString fromUnicode(const QString &uc, int &lenInOut) const
const char * data() const
Definition: qcstring.h:207
virtual int readLine(char *data, uint maxlen)
Definition: qiodevice.cpp:581
Q_UINT16 values[128]
static const char *const tis_620locales[]
Definition: qtextcodec.cpp:478
int mib
QTextCodecFromIODDecoder(const QTextCodecFromIOD *c)
char ** from_unicode_page
Definition: qtextcodec.cpp:960
QChar lower() const
Definition: qstring.cpp:11229
#define CHAINED
Definition: qtextcodec.cpp:911
QString stripWhiteSpace() const
Definition: qstring.cpp:13438
The QFile class is an I/O device that operates on files.
Definition: qfile.h:50
void setAutoDelete(bool enable)
Definition: qcollection.h:55
virtual bool canEncode(QChar) const
Definition: qtextcodec.cpp:835
unsigned short Q_UINT16
Definition: qglobal.h:418
static const char *const iso8859_4locales[]
Definition: qtextcodec.cpp:453
static QTextCodec * ru_RU_hack(const char *i)
Definition: qtextcodec.cpp:508
static QTextCodec * codecForName(const char *hint, int accuracy=0)
Definition: qtextcodec.cpp:626
static QTextCodec * loadCharmap(QIODevice *)
static const char *const iso8859_6locales[]
Definition: qtextcodec.cpp:460
unsigned short ushort
Definition: qglobal.h:350
QString toUnicode(const char *chars, int len) const
static QInternalList< QTextCodec > * all
Definition: qtextcodec.cpp:63
QString toUnicode(const char *chars, int len) const
int heuristicContentMatch(const char *, int) const
int heuristicContentMatch(const char *chars, int len) const
virtual int mibEnum() const =0
QTextStatelessEncoder(const QTextCodec *)
Definition: qtextcodec.cpp:132
const char * name() const
int mibEnum() const
static const QSimpleTextCodec * reverseOwner
const QTextCodecFromIOD * codec
Definition: qtextcodec.cpp:946
byte_as<> byte
Type of data size stored in bytes, in long long precision.
Definition: datasize.h:98
void line(double t, double *p, double &x, double &y, double &z)
char * qstrdup(const char *str)
Definition: qcstring.cpp:548
QT_STATIC_CONST QChar null
Definition: qstring.h:68
uchar & row()
Definition: qstring.h:168
QString toUnicode(const char *chars, int len) const
virtual QCString fromUnicode(const QString &uc, int &lenInOut)=0
QString toUnicode(const char *chars, int len)
static const char *const iso8859_7locales[]
Definition: qtextcodec.cpp:463
type * at(uint i)
Definition: qinternallist.h:81
int qstrnicmp(const char *str1, const char *str2, uint len)
Definition: qcstring.cpp:581
static const Null null
Definition: qstring.h:376
QCString fromUnicode(const QString &uc, int &lenInOut)
Definition: qtextcodec.cpp:138
static QTextCodec * localeMapper
Definition: qtextcodec.cpp:533
const char * cs
Provides conversion between text encodings.
Definition: qtextcodec.h:62
const QTextCodec * codec
Definition: qtextcodec.cpp:118
static QArray< char > * reverseMap
virtual QString toUnicode(const char *chars, int len)=0
The QIODevice class is the base class of I/O devices.
Definition: qiodevice.h:88
uint count() const
Definition: qinternallist.h:56
Q_EXPORT int qstrcmp(const char *str1, const char *str2)
Definition: qcstring.h:95
const QTextCodec * codec
Definition: qtextcodec.cpp:126
uint size() const
Definition: qarray.h:65
unsigned uint
Definition: qglobal.h:351
static void realSetup()
Definition: qtextcodec.cpp:98
QString & setUnicode(const QChar *unicode, uint len)
Definition: qstring.cpp:14736
static QCString * s
Definition: config.cpp:1042
const bool TRUE
Definition: qglobal.h:371
bool remove(uint i)
Definition: qinternallist.h:62
QChar upper() const
Definition: qstring.cpp:11248
QTextDecoder * makeDecoder() const
static void setup()
Definition: qtextcodec.cpp:110
static const char *const iso8859_15locales[]
Definition: qtextcodec.cpp:472
static QTextCodec * codecForIndex(int i)
Definition: qtextcodec.cpp:343