diff options
Diffstat (limited to 'src/codecs/qtextcodec.cpp')
-rw-r--r-- | src/codecs/qtextcodec.cpp | 3121 |
1 files changed, 3121 insertions, 0 deletions
diff --git a/src/codecs/qtextcodec.cpp b/src/codecs/qtextcodec.cpp new file mode 100644 index 000000000..e982f4b56 --- /dev/null +++ b/src/codecs/qtextcodec.cpp @@ -0,0 +1,3121 @@ +/**************************************************************************** +** +** Implementation of TQTextCodec class +** +** Created : 981015 +** +** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved. +** +** This file is part of the tools module of the TQt GUI Toolkit. +** +** This file may be used under the terms of the GNU General +** Public License versions 2.0 or 3.0 as published by the Free +** Software Foundation and appearing in the files LICENSE.GPL2 +** and LICENSE.GPL3 included in the packaging of this file. +** Alternatively you may (at your option) use any later version +** of the GNU General Public License if such license has been +** publicly approved by Trolltech ASA (or its successors, if any) +** and the KDE Free TQt Foundation. +** +** Please review the following information to ensure GNU General +** Public Licensing retquirements will be met: +** http://trolltech.com/products/qt/licenses/licensing/opensource/. +** If you are unsure which license is appropriate for your use, please +** review the following information: +** http://trolltech.com/products/qt/licenses/licensing/licensingoverview +** or contact the sales department at [email protected]. +** +** This file may be used under the terms of the Q Public License as +** defined by Trolltech ASA and appearing in the file LICENSE.TQPL +** included in the packaging of this file. Licensees holding valid TQt +** Commercial licenses may use this file in accordance with the TQt +** Commercial License Agreement provided with the Software. +** +** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, +** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR +** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted +** herein. +** +**********************************************************************/ + +#include "qplatformdefs.h" + +// UNIX Large File Support redefines open -> open64 +#if defined(open) +# undef open +#endif + +#include "qtextcodec.h" +#ifndef QT_NO_TEXTCODEC + +#include "qvaluelist.h" +#include "qtextcodecfactory.h" +#include "qutfcodec.h" +#include "qnamespace.h" +#ifndef QT_NO_CODECS +#include "qrtlcodec.h" +#include "qtsciicodec.h" +#include "qisciicodec_p.h" +#endif // QT_NO_CODECS +#ifndef QT_NO_BIG_CODECS +#include "qbig5codec.h" +#include "qeucjpcodec.h" +#include "qeuckrcodec.h" +#include "qgb18030codec.h" +#include "qjiscodec.h" +#include "qjpunicode.h" +#include "qsjiscodec.h" +#endif // QT_NO_BIG_CODECS +#include "qfile.h" +#include "qstrlist.h" +#include "qstring.h" +#include "../tools/qlocale_p.h" + +#if !defined(QT_NO_CODECS) && !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11) +# include "qfontcodecs_p.h" +#endif + +#ifdef QT_THREAD_SUPPORT +# include <private/qmutexpool_p.h> +#endif // QT_THREAD_SUPPORT + +#include <stdlib.h> +#include <ctype.h> +#ifndef Q_OS_TEMP +#include <locale.h> +#endif +#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) +#include <langinfo.h> +#endif + +static TQValueList<TQTextCodec*> *all = 0; +static bool destroying_is_ok; // starts out as 0 +static TQTextCodec * localeMapper = 0; + +class TQTextCodecCleanup { +public: + ~TQTextCodecCleanup() { + TQTextCodec::deleteAllCodecs(); + } +}; +static TQTextCodecCleanup qtextcodec_cleanup; + +/*! + Deletes all the created codecs. + + \warning Do not call this function. + + TQApplication calls this function just before exiting to delete + any TQTextCodec objects that may be lying around. Since various + other classes hold pointers to TQTextCodec objects, it is not safe + to call this function earlier. + + If you are using the utility classes (like TQString) but not using + TQApplication, calling this function at the very end of your + application may be helpful for chasing down memory leaks by + eliminating any TQTextCodec objects. +*/ + +void TQTextCodec::deleteAllCodecs() +{ + if ( !all ) + return; + +#ifdef QT_THREAD_SUPPORT + TQMutexLocker locker( qt_global_mutexpool ? + qt_global_mutexpool->get( &all ) : 0 ); + if ( !all ) + return; +#endif // QT_THREAD_SUPPORT + + destroying_is_ok = TRUE; + + TQValueList<TQTextCodec*> *ball = all; + all = 0; + TQValueList<TQTextCodec*>::Iterator it; + for ( it = ball->begin(); it != ball->end(); ++it ) { + delete *it; + *it = 0; + } + ball->clear(); + delete ball; + + destroying_is_ok = FALSE; +} + + +static void realSetup(); + + +static inline void setup() +{ + if ( all ) return; + +#ifdef QT_THREAD_SUPPORT + TQMutexLocker locker( qt_global_mutexpool ? + qt_global_mutexpool->get( &all ) : 0 ); + if ( all ) return; +#endif // QT_THREAD_SUPPORT + + realSetup(); +} + + +class TQTextStatelessEncoder: public TQTextEncoder { + const TQTextCodec* codec; +public: + TQTextStatelessEncoder(const TQTextCodec*); + TQCString fromUnicode(const TQString& uc, int& lenInOut); +}; + + +class TQTextStatelessDecoder : public TQTextDecoder { + const TQTextCodec* codec; +public: + TQTextStatelessDecoder(const TQTextCodec*); + TQString toUnicode(const char* chars, int len); +}; + +TQTextStatelessEncoder::TQTextStatelessEncoder(const TQTextCodec* c) : + codec(c) +{ +} + + +TQCString TQTextStatelessEncoder::fromUnicode(const TQString& uc, int& lenInOut) +{ + return codec->fromUnicode(uc,lenInOut); +} + + +TQTextStatelessDecoder::TQTextStatelessDecoder(const TQTextCodec* c) : + codec(c) +{ +} + + +TQString TQTextStatelessDecoder::toUnicode(const char* chars, int len) +{ + return codec->toUnicode(chars,len); +} + + + +/*! + \class TQTextCodec qtextcodec.h + \brief The TQTextCodec class provides conversion between text encodings. + \reentrant + \ingroup i18n + + TQt uses Unicode to store, draw and manipulate strings. In many + situations you may wish to deal with data that uses a different + encoding. For example, most Japanese documents are still stored in + Shift-JIS or ISO2022, while Russian users often have their + documents in KOI8-R or CP1251. + + TQt provides a set of TQTextCodec classes to help with converting + non-Unicode formats to and from Unicode. You can also create your + own codec classes (\link #subclassing see later\endlink). + + The supported encodings are: + \list + \i Latin1 + \i Big5 -- Chinese + \i Big5-HKSCS -- Chinese + \i eucJP -- Japanese + \i eucKR -- Korean + \i GB2312 -- Chinese + \i GBK -- Chinese + \i GB18030 -- Chinese + \i JIS7 -- Japanese + \i Shift-JIS -- Japanese + \i TSCII -- Tamil + \i utf8 -- Unicode, 8-bit + \i utf16 -- Unicode + \i KOI8-R -- Russian + \i KOI8-U -- Ukrainian + \i ISO8859-1 -- Western + \i ISO8859-2 -- Central European + \i ISO8859-3 -- Central European + \i ISO8859-4 -- Baltic + \i ISO8859-5 -- Cyrillic + \i ISO8859-6 -- Arabic + \i ISO8859-7 -- Greek + \i ISO8859-8 -- Hebrew, visually ordered + \i ISO8859-8-i -- Hebrew, logically ordered + \i ISO8859-9 -- Turkish + \i ISO8859-10 + \i ISO8859-13 + \i ISO8859-14 + \i ISO8859-15 -- Western + \i IBM 850 + \i IBM 866 + \i CP874 + \i CP1250 -- Central European + \i CP1251 -- Cyrillic + \i CP1252 -- Western + \i CP1253 -- Greek + \i CP1254 -- Turkish + \i CP1255 -- Hebrew + \i CP1256 -- Arabic + \i CP1257 -- Baltic + \i CP1258 + \i Apple Roman + \i TIS-620 -- Thai + \endlist + + TQTextCodecs can be used as follows to convert some locally encoded + string to Unicode. Suppose you have some string encoded in Russian + KOI8-R encoding, and want to convert it to Unicode. The simple way + to do this is: + + \code + TQCString locallyEncoded = "..."; // text to convert + TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R + TQString unicodeString = codec->toUnicode( locallyEncoded ); + \endcode + + After this, \c{unicodeString} holds the text converted to Unicode. + Converting a string from Unicode to the local encoding is just as + easy: + + \code + TQString unicodeString = "..."; // any Unicode text + TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R + TQCString locallyEncoded = codec->fromUnicode( unicodeString ); + \endcode + + Some care must be taken when trying to convert the data in chunks, + for example, when receiving it over a network. In such cases it is + possible that a multi-byte character will be split over two + chunks. At best this might result in the loss of a character and + at worst cause the entire conversion to fail. + + The approach to use in these situations is to create a TQTextDecoder + object for the codec and use this TQTextDecoder for the whole + decoding process, as shown below: + + \code + TQTextCodec *codec = TQTextCodec::codecForName( "Shift-JIS" ); + TQTextDecoder *decoder = codec->makeDecoder(); + + TQString unicodeString; + while( receiving_data ) { + TQByteArray chunk = new_data; + unicodeString += decoder->toUnicode( chunk.data(), chunk.length() ); + } + \endcode + + The TQTextDecoder object maintains state between chunks and therefore + works correctly even if a multi-byte character is split between + chunks. + + \target subclassing + \section1 Creating your own Codec class + + Support for new text encodings can be added to TQt by creating + TQTextCodec subclasses. + + Built-in codecs can be overridden by custom codecs since more + recently created TQTextCodec objects take precedence over earlier + ones. + + You may find it more convenient to make your codec class available + as a plugin; see the \link plugins-howto.html plugin + documentation\endlink for more details. + + The abstract virtual functions describe the encoder to the + system and the coder is used as retquired in the different + text file formats supported by TQTextStream, and under X11, for the + locale-specific character input and output. + + To add support for another 8-bit encoding to TQt, make a subclass + of TQTextCodec and implement at least the following methods: + + \code + const char* name() const + \endcode + Return the official name for the encoding. + + \code + int mibEnum() const + \endcode + Return the MIB enum for the encoding if it is listed in the + \link http://www.iana.org/assignments/character-sets + IANA character-sets encoding file\endlink. + + If the encoding is multi-byte then it will have "state"; that is, + the interpretation of some bytes will be dependent on some preceding + bytes. For such encodings, you must implement: + + \code + TQTextDecoder* makeDecoder() const + \endcode + Return a TQTextDecoder that remembers incomplete multi-byte sequence + prefixes or other retquired state. + + If the encoding does \e not retquire state, you should implement: + + \code + TQString toUnicode(const char* chars, int len) const + \endcode + Converts \e len characters from \e chars to Unicode. + + The base TQTextCodec class has default implementations of the above + two functions, \e{but they are mutually recursive}, so you must + re-implement at least one of them, or both for improved efficiency. + + For conversion from Unicode to 8-bit encodings, it is rarely necessary + to maintain state. However, two functions similar to the two above + are used for encoding: + + \code + TQTextEncoder* makeEncoder() const + \endcode + Return a TQTextEncoder. + + \code + TQCString fromUnicode(const TQString& uc, int& lenInOut ) const + \endcode + Converts \e lenInOut characters (of type TQChar) from the start of + the string \e uc, returning a TQCString result, and also returning + the \link TQCString::length() length\endlink of the result in + \e lenInOut. + + Again, these are mutually recursive so only one needs to be implemented, + or both if greater efficiency is possible. + + Finally, you must implement: + + \code + int heuristicContentMatch(const char* chars, int len) const + \endcode + Gives a value indicating how likely it is that \e len characters + from \e chars are in the encoding. + + A good model for this function is the + TQWindowsLocalCodec::heuristicContentMatch function found in the TQt + sources. + + A TQTextCodec subclass might have improved performance if you also + re-implement: + + \code + bool canEncode( TQChar ) const + \endcode + Test if a Unicode character can be encoded. + + \code + bool canEncode( const TQString& ) const + \endcode + Test if a string of Unicode characters can be encoded. + + \code + int heuristicNameMatch(const char* hint) const + \endcode + Test if a possibly non-standard name is referring to the codec. + + Codecs can also be created as \link plugins-howto.html plugins\endlink. +*/ + + +/*! + \nonreentrant + + Constructs a TQTextCodec, and gives it the highest precedence. The + TQTextCodec should always be constructed on the heap (i.e. with \c + new). TQt takes ownership and will delete it when the application + terminates. +*/ +TQTextCodec::TQTextCodec() +{ + setup(); + all->insert( all->begin(), this ); +} + + +/*! + \nonreentrant + + Destroys the TQTextCodec. Note that you should not delete codecs + yourself: once created they become TQt's responsibility. +*/ +TQTextCodec::~TQTextCodec() +{ + if ( !destroying_is_ok ) + qWarning("TQTextCodec::~TQTextCodec() called by application"); + if ( all ) + all->remove( this ); +} + + +/*! + Returns a value indicating how likely it is that this decoder is + appropriate for decoding some format that has the given name. The + name is compared with the \a hint. + + A good match returns a positive number around the length of the + string. A bad match is negative. + + The default implementation calls simpleHeuristicNameMatch() with + the name of the codec. +*/ +int TQTextCodec::heuristicNameMatch(const char* hint) const +{ + return simpleHeuristicNameMatch(name(),hint); +} + + +// returns a string containing the letters and numbers from input, +// with a space separating run of a character class. e.g. "iso8859-1" +// becomes "iso 8859 1" +static TQString lettersAndNumbers( const char * input ) +{ + TQString result; + TQChar c; + + while( input && *input ) { + c = *input; + if ( c.isLetter() || c.isNumber() ) + result += c.lower(); + if ( input[1] ) { + // add space at character class transition, except + // transition from upper-case to lower-case letter + TQChar n( input[1] ); + if ( c.isLetter() && n.isLetter() ) { + if ( c == c.lower() && n == n.upper() ) + result += ' '; + } else if ( c.category() != n.category() ) { + result += ' '; + } + } + input++; + } + return result.simplifyWhiteSpace(); +} + +/*! + A simple utility function for heuristicNameMatch(): it does some + very minor character-skipping so that almost-exact matches score + high. \a name is the text we're matching and \a hint is used for + the comparison. +*/ +int TQTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) +{ + // if they're the same, return a perfect score. + if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 ) + return qstrlen( hint ); + + // if the letters and numbers are the same, we have an "almost" + // perfect match. + TQString h( lettersAndNumbers( hint ) ); + TQString n( lettersAndNumbers( name ) ); + if ( h == n ) + return qstrlen( hint )-1; + + if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) + return qstrlen( hint )-2; + + // could do some more here, but I don't think it's worth it + + return 0; +} + + +/*! + Returns the TQTextCodec \a i positions from the most recently + inserted codec, or 0 if there is no such TQTextCodec. Thus, + codecForIndex(0) returns the most recently created TQTextCodec. +*/ +TQTextCodec* TQTextCodec::codecForIndex(int i) +{ + setup(); + return (uint)i >= all->count() ? 0 : *all->at(i); +} + + +/*! + Returns the TQTextCodec which matches the \link + TQTextCodec::mibEnum() MIBenum\endlink \a mib. +*/ +TQTextCodec* TQTextCodec::codecForMib(int mib) +{ + setup(); + TQValueList<TQTextCodec*>::ConstIterator i; + TQTextCodec* result=0; + for ( i = all->begin(); i != all->end(); ++i ) { + result = *i; + if ( result->mibEnum()==mib ) + return result; + } + +#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT) + if ( !result || (result && result->mibEnum() != mib) ) { + TQTextCodec *codec = TQTextCodecFactory::createForMib(mib); + if (codec) + result = codec; + } +#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT + + return result; +} + + + + + +#ifdef Q_OS_WIN32 +class TQWindowsLocalCodec: public TQTextCodec +{ +public: + TQWindowsLocalCodec(); + ~TQWindowsLocalCodec(); + + TQString toUnicode(const char* chars, int len) const; + TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; + + const char* name() const; + int mibEnum() const; + + int heuristicContentMatch(const char* chars, int len) const; + + TQTextDecoder* makeDecoder() const; +}; + +TQWindowsLocalCodec::TQWindowsLocalCodec() +{ +} + +TQWindowsLocalCodec::~TQWindowsLocalCodec() +{ +} + + +TQString TQWindowsLocalCodec::toUnicode(const char* chars, int len) const +{ + if ( len == 1 && chars ) { // Optimization; avoids allocation + char c[2]; + c[0] = *chars; + c[1] = 0; + return qt_winMB2TQString( c, 2 ); + } + if ( len < 0 ) + return qt_winMB2TQString( chars ); + TQCString s(chars,len+1); + return qt_winMB2TQString(s); +} + +TQCString TQWindowsLocalCodec::fromUnicode(const TQString& uc, int& lenInOut ) const +{ + TQCString r = qt_winTQString2MB( uc, lenInOut ); + lenInOut = r.length(); + return r; +} + + +const char* TQWindowsLocalCodec::name() const +{ + return "System"; +} + +int TQWindowsLocalCodec::mibEnum() const +{ + return 0; +} + + +int TQWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const +{ + // ### Not a bad default implementation? + TQString t = toUnicode(chars,len); + int l = t.length(); + TQCString mb = fromUnicode(t,l); + int i=0; + while ( i < len ) { + if ( chars[i] == mb[i] ) + i++; + else + break; + } + return i; +} + +class TQWindowsLocalDecoder: public TQTextDecoder +{ + const TQWindowsLocalCodec* codec; + int nbuf; + uchar buf[4]; // hopefully this will be enough +public: + TQWindowsLocalDecoder(const TQWindowsLocalCodec *c) : codec(c), nbuf(0) + { + } + + TQString toUnicode(const char* chars, int len) + { + if (len != 1 && nbuf == 0) + return codec->toUnicode(chars, len); + if (len == 1) { + char c[sizeof buf + 2]; + memcpy(c, buf, nbuf); + c[nbuf] = *chars; + c[nbuf+1] = 0; + + // try to decode this: + TQString retval = codec->toUnicode(c, -1); + if ( retval.isEmpty() ) { + // it didn't return anything; we probably stopped mid-way in a multi-byte + // character + buf[nbuf++] = *chars; + if (nbuf + 1 == sizeof buf) { + qWarning("TQWindowsLocalDecoder: exceeded max internal buffer size"); + nbuf = 0; + } + } + else + nbuf = 0; // decoded successfully + + return retval; + } + + if (len == -1) + len = (int)strlen(chars); + + // Ugh! We need to allocate memory + char *s = new char[nbuf + len + 1]; + memcpy(s, buf, nbuf); + memcpy(s + nbuf, chars, len); + s[nbuf + len] = 0; + + TQString retval = codec->toUnicode(s, -1); + nbuf = 0; + delete[] s; + return retval; + } +}; + +TQTextDecoder* TQWindowsLocalCodec::makeDecoder() const +{ + return new TQWindowsLocalDecoder(this); +} + +#else + +/* locale names mostly copied from XFree86 */ +static const char * const iso8859_2locales[] = { + "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr", + "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro", + "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk", + "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 }; + +static const char * const iso8859_3locales[] = { + "eo", 0 }; + +static const char * const iso8859_4locales[] = { + "ee", "ee_EE", 0 }; + +static const char * const iso8859_5locales[] = { + "mk", "mk_MK", "sp", "sp_YU", 0 }; + +static const char * const cp_1251locales[] = { + "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 }; + +static const char * const pt_154locales[] = { + "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 }; + +static const char * const iso8859_6locales[] = { + "ar_AA", "ar_SA", "arabic", 0 }; + +static const char * const iso8859_7locales[] = { + "el", "el_GR", "greek", 0 }; + +static const char * const iso8859_8locales[] = { + "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; + +static const char * const iso8859_9locales[] = { + "tr", "tr_TR", "turkish", 0 }; + +static const char * const iso8859_13locales[] = { + "lt", "lt_LT", "lv", "lv_LV", 0 }; + +static const char * const iso8859_15locales[] = { + "et", "et_EE", + // Euro countries + "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE", + "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR", + "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR", + "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE", + 0 }; + +static const char * const koi8_ulocales[] = { + "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; + +static const char * const tis_620locales[] = { + "th", "th_TH", "thai", 0 }; + +static const char * const tcvnlocales[] = { + "vi", "vi_VN", 0 }; + +static bool try_locale_list( const char * const locale[], const char * lang ) +{ + int i; + for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ ) + ; + return locale[i] != 0; +} + +// For the probably_koi8_locales we have to look. the standard says +// these are 8859-5, but almost all Russian users use KOI8-R and +// incorrectly set $LANG to ru_RU. We'll check tolower() to see what +// tolower() thinks ru_RU means. + +// If you read the history, it seems that many Russians blame ISO and +// Perestroika for the confusion. +// +// The real bug is that some programs break if the user specifies +// ru_RU.KOI8-R. + +static const char * const probably_koi8_rlocales[] = { + "ru", "ru_SU", "ru_RU", "russian", 0 }; + +static TQTextCodec * ru_RU_hack( const char * i ) { + TQTextCodec * ru_RU_codec = 0; + + TQCString origlocale = setlocale( LC_CTYPE, i ); + // unicode koi8r latin5 name + // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU + // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU + int latin5 = tolower( 0xCE ); + int koi8r = tolower( 0xE0 ); + if ( koi8r == 0xC0 && latin5 != 0xEE ) { + ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); + } else if ( koi8r != 0xC0 && latin5 == 0xEE ) { + ru_RU_codec = TQTextCodec::codecForName( "ISO 8859-5" ); + } else { + // something else again... let's assume... *throws dice* + ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); + qWarning( "TQTextCodec: using KOI8-R, probe failed (%02x %02x %s)", + koi8r, latin5, i ); + } + setlocale( LC_CTYPE, origlocale.data() ); + + return ru_RU_codec; +} + +#endif + +/*! + Set the codec to \a c; this will be returned by codecForLocale(). + This might be needed for some applications that want to use their + own mechanism for setting the locale. + + \sa codecForLocale() +*/ +void TQTextCodec::setCodecForLocale(TQTextCodec *c) { + localeMapper = c; +} + +/*! Returns a pointer to the codec most suitable for this locale. */ + +TQTextCodec* TQTextCodec::codecForLocale() +{ + if ( localeMapper ) + return localeMapper; + + setup(); + + return localeMapper; +} + + +/*! + Searches all installed TQTextCodec objects and returns the one + which best matches \a name; the match is case-insensitive. Returns + 0 if no codec's heuristicNameMatch() reports a match better than + \a accuracy, or if \a name is a null string. + + \sa heuristicNameMatch() +*/ + +TQTextCodec* TQTextCodec::codecForName( const char* name, int accuracy ) +{ + if ( !name || !*name ) + return 0; + + setup(); + TQValueList<TQTextCodec*>::ConstIterator i; + TQTextCodec* result = 0; + int best = accuracy; + TQTextCodec* cursor; + for ( i = all->begin(); i != all->end(); ++i ) { + cursor = *i; + int s = cursor->heuristicNameMatch( name ); + if ( s > best ) { + best = s; + result = cursor; + } + } + +#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT) + if ( !result ) + result = TQTextCodecFactory::createForName(name); +#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT + + return result; +} + + +/*! + Searches all installed TQTextCodec objects, returning the one which + most recognizes the given content. May return 0. + + Note that this is often a poor choice, since character encodings + often use most of the available character sequences, and so only + by linguistic analysis could a true match be made. + + \a chars contains the string to check, and \a len contains the + number of characters in the string to use. + + \sa heuristicContentMatch() +*/ +TQTextCodec* TQTextCodec::codecForContent(const char* chars, int len) +{ + setup(); + TQValueList<TQTextCodec*>::ConstIterator i; + TQTextCodec* result = 0; + int best=0; + TQTextCodec* cursor; + for ( i = all->begin(); i != all->end(); ++i ) { + cursor = *i; + int s = cursor->heuristicContentMatch(chars,len); + if ( s > best ) { + best = s; + result = cursor; + } + } + return result; +} + + +/*! + \fn const char* TQTextCodec::name() const + + TQTextCodec subclasses must reimplement this function. It returns + the name of the encoding supported by the subclass. When choosing + a name for an encoding, consider these points: + \list + \i On X11, heuristicNameMatch( const char * hint ) + is used to test if a the TQTextCodec + can convert between Unicode and the encoding of a font + with encoding \e hint, such as "iso8859-1" for Latin-1 fonts, + "koi8-r" for Russian KOI8 fonts. + The default algorithm of heuristicNameMatch() uses name(). + \i Some applications may use this function to present + encodings to the end user. + \endlist + */ + +/*! + \fn int TQTextCodec::mibEnum() const + + Subclasses of TQTextCodec must reimplement this function. It + returns the MIBenum (see \link + http://www.iana.org/assignments/character-sets the + IANA character-sets encoding file\endlink for more information). + It is important that each TQTextCodec subclass returns the correct + unique value for this function. +*/ + + +/*! + Returns the preferred mime name of the encoding as defined in the + \link http://www.iana.org/assignments/character-sets + IANA character-sets encoding file\endlink. +*/ +const char* TQTextCodec::mimeName() const +{ + return name(); +} + + +/*! + \fn int TQTextCodec::heuristicContentMatch(const char* chars, int len) const + + TQTextCodec subclasses must reimplement this function. It examines + the first \a len bytes of \a chars and returns a value indicating + how likely it is that the string is a prefix of text encoded in + the encoding of the subclass. A negative return value indicates + that the text is detectably not in the encoding (e.g. it contains + characters undefined in the encoding). A return value of 0 + indicates that the text should be decoded with this codec rather + than as ASCII, but there is no particular evidence. The value + should range up to \a len. Thus, most decoders will return -1, 0, + or -\a len. + + The characters are not null terminated. + + \sa codecForContent(). +*/ + + +/*! + Creates a TQTextDecoder which stores enough state to decode chunks + of char* data to create chunks of Unicode data. The default + implementation creates a stateless decoder, which is only + sufficient for the simplest encodings where each byte corresponds + to exactly one Unicode character. + + The caller is responsible for deleting the returned object. +*/ +TQTextDecoder* TQTextCodec::makeDecoder() const +{ + return new TQTextStatelessDecoder(this); +} + + +/*! + Creates a TQTextEncoder which stores enough state to encode chunks + of Unicode data as char* data. The default implementation creates + a stateless encoder, which is only sufficient for the simplest + encodings where each Unicode character corresponds to exactly one + character. + + The caller is responsible for deleting the returned object. +*/ +TQTextEncoder* TQTextCodec::makeEncoder() const +{ + return new TQTextStatelessEncoder(this); +} + + +/*! + TQTextCodec subclasses must reimplement this function or + makeDecoder(). It converts the first \a len characters of \a chars + to Unicode. + + The default implementation makes a decoder with makeDecoder() and + converts the input with that. Note that the default makeDecoder() + implementation makes a decoder that simply calls + this function, hence subclasses \e must reimplement one function or + the other to avoid infinite recursion. +*/ +TQString TQTextCodec::toUnicode(const char* chars, int len) const +{ + if ( chars == 0 ) + return TQString::null; + TQTextDecoder* i = makeDecoder(); + TQString result = i->toUnicode(chars,len); + delete i; + return result; +} + + +/*! + TQTextCodec subclasses must reimplement either this function or + makeEncoder(). It converts the first \a lenInOut characters of \a + uc from Unicode to the encoding of the subclass. If \a lenInOut is + negative or too large, the length of \a uc is used instead. + + Converts \a lenInOut characters (not bytes) from \a uc, producing + a TQCString. \a lenInOut will be set to the \link + TQCString::length() length\endlink of the result (in bytes). + + The default implementation makes an encoder with makeEncoder() and + converts the input with that. Note that the default makeEncoder() + implementation makes an encoder that simply calls this function, + hence subclasses \e must reimplement one function or the other to + avoid infinite recursion. +*/ + +TQCString TQTextCodec::fromUnicode(const TQString& uc, int& lenInOut) const +{ + TQTextEncoder* i = makeEncoder(); + TQCString result = i->fromUnicode(uc, lenInOut); + delete i; + return result; +} + +/*! + \overload + \internal +*/ +TQByteArray TQTextCodec::fromUnicode( const TQString &str, int pos, int len ) const +{ + TQByteArray a; + if( len < 0 ) + len = str.length() - pos; + a = fromUnicode( str.mid(pos, len) ); + if( a.size() > 0 && a[(int)a.size() - 1] == '\0' ) + a.resize( a.size() - 1 ); + return a; +} + +/*! + \overload + + \a uc is the unicode source string. +*/ +TQCString TQTextCodec::fromUnicode(const TQString& uc) const +{ + int l = uc.length(); + return fromUnicode(uc,l); +} + +/*! + \overload + + \a a contains the source characters; \a len contains the number of + characters in \a a to use. +*/ +TQString TQTextCodec::toUnicode(const TQByteArray& a, int len) const +{ + int l = a.size(); + l = TQMIN( l, len ); + return toUnicode( a.data(), l ); +} + +/*! + \overload + + \a a contains the source characters. +*/ +TQString TQTextCodec::toUnicode(const TQByteArray& a) const +{ + int l = a.size(); + return toUnicode( a.data(), l ); +} + +/*! + \overload + + \a a contains the source characters; \a len contains the number of + characters in \a a to use. +*/ +TQString TQTextCodec::toUnicode(const TQCString& a, int len) const +{ + int l = a.length(); + l = TQMIN( l, len ); + return toUnicode( a.data(), l ); +} + +/*! + \overload + + \a a contains the source characters. +*/ +TQString TQTextCodec::toUnicode(const TQCString& a) const +{ + int l = a.length(); + return toUnicode( a.data(), l ); +} + +/*! + \overload + + \a chars contains the source characters. +*/ +TQString TQTextCodec::toUnicode(const char* chars) const +{ + return toUnicode(chars,qstrlen(chars)); +} + +/*! + \internal +*/ +unsigned short TQTextCodec::characterFromUnicode(const TQString &str, int pos) const +{ + TQCString result = TQTextCodec::fromUnicode(TQString(str[pos])); + uchar *ch = (uchar *) result.data(); + ushort retval = 0; + if (result.size() > 2) { + retval = (ushort) *ch << 8; + ch++; + } + return retval + *ch; +} + +/*! + Returns TRUE if the Unicode character \a ch can be fully encoded + with this codec; otherwise returns FALSE. The default + implementation tests if the result of toUnicode(fromUnicode(ch)) + is the original \a ch. Subclasses may be able to improve the + efficiency. +*/ +bool TQTextCodec::canEncode( TQChar ch ) const +{ + return toUnicode(fromUnicode(ch)) == ch; +} + +/*! + \overload + + \a s contains the string being tested for encode-ability. +*/ +bool TQTextCodec::canEncode( const TQString& s ) const +{ + if ( s.isEmpty() ) + return TRUE; + return toUnicode(fromUnicode(s)) == s; +} + + + +/*! + \class TQTextEncoder qtextcodec.h + \brief The TQTextEncoder class provides a state-based encoder. + \reentrant + \ingroup i18n + + The encoder converts Unicode into another format, remembering any + state that is retquired between calls. + + \sa TQTextCodec::makeEncoder() +*/ + +/*! + Destroys the encoder. +*/ +TQTextEncoder::~TQTextEncoder() +{ +} + +/*! + \fn TQCString TQTextEncoder::fromUnicode(const TQString& uc, int& lenInOut) + + Converts \a lenInOut characters (not bytes) from \a uc, producing + a TQCString. \a lenInOut will be set to the \link + TQCString::length() length\endlink of the result (in bytes). + + The encoder is free to record state to use when subsequent calls + are made to this function (for example, it might change modes with + escape sequences if needed during the encoding of one string, then + assume that mode applies when a subsequent call begins). +*/ + +/*! + \class TQTextDecoder qtextcodec.h + \brief The TQTextDecoder class provides a state-based decoder. + \reentrant + \ingroup i18n + + The decoder converts a text format into Unicode, remembering any + state that is retquired between calls. + + \sa TQTextCodec::makeEncoder() +*/ + + +/*! + Destroys the decoder. +*/ +TQTextDecoder::~TQTextDecoder() +{ +} + +/*! + \fn TQString TQTextDecoder::toUnicode(const char* chars, int len) + + Converts the first \a len bytes in \a chars to Unicode, returning + the result. + + If not all characters are used (e.g. if only part of a multi-byte + encoding is at the end of the characters), the decoder remembers + enough state to continue with the next call to this function. +*/ + +#define CHAINED 0xffff + +struct TQMultiByteUnicodeTable { + // If multiByte, ignore unicode and index into multiByte + // with the next character. + TQMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { } + + ~TQMultiByteUnicodeTable() + { + if ( multiByte ) + delete [] multiByte; + } + + ushort unicode; + TQMultiByteUnicodeTable* multiByte; +}; + +static int getByte(char* &cursor) +{ + int byte = 0; + if ( *cursor ) { + if ( cursor[1] == 'x' ) + byte = strtol(cursor+2,&cursor,16); + else if ( cursor[1] == 'd' ) + byte = strtol(cursor+2,&cursor,10); + else + byte = strtol(cursor+2,&cursor,8); + } + return byte&0xff; +} + +class TQTextCodecFromIOD; + +class TQTextCodecFromIODDecoder : public TQTextDecoder { + const TQTextCodecFromIOD* codec; + TQMultiByteUnicodeTable* mb; +public: + TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c); + TQString toUnicode(const char* chars, int len); +}; + +class TQTextCodecFromIOD : public TQTextCodec { + friend class TQTextCodecFromIODDecoder; + + TQCString n; + + // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte, + // use from_unicode_page_multiByte[row()][cell()] as string. + char** from_unicode_page; + char*** from_unicode_page_multiByte; + char unkn; + + // Only one of these is used + ushort* to_unicode; + TQMultiByteUnicodeTable* to_unicode_multiByte; + int max_bytes_per_char; + TQStrList aliases; + + bool stateless() const { return !to_unicode_multiByte; } + +public: + TQTextCodecFromIOD(TQIODevice* iod) + { + from_unicode_page = 0; + to_unicode_multiByte = 0; + to_unicode = 0; + from_unicode_page_multiByte = 0; + max_bytes_per_char = 1; + + const int maxlen=100; + char line[maxlen]; + char esc='\\'; + char comm='%'; + bool incmap = FALSE; + while (iod->readLine(line,maxlen) > 0) { + if (0==qstrnicmp(line,"<code_set_name>",15)) + n = line+15; + else if (0==qstrnicmp(line,"<escape_char> ",14)) + esc = line[14]; + else if (0==qstrnicmp(line,"<comment_char> ",15)) + comm = line[15]; + else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) { + aliases.append(line+8); + } else if (0==qstrnicmp(line,"CHARMAP",7)) { + if (!from_unicode_page) { + from_unicode_page = new char*[256]; + for (int i=0; i<256; i++) + from_unicode_page[i]=0; + } + if (!to_unicode) { + to_unicode = new ushort[256]; + } + incmap = TRUE; + } else if (0==qstrnicmp(line,"END CHARMAP",11)) + break; + else if (incmap) { + char* cursor = line; + int byte=-1,unicode=-1; + ushort* mb_unicode=0; + const int maxmb=8; // more -> we'll need to improve datastructures + char mb[maxmb+1]; + int nmb=0; + + while (*cursor) { + if (cursor[0]=='<' && cursor[1]=='U' && + cursor[2]>='0' && cursor[2]<='9' && + cursor[3]>='0' && cursor[3]<='9') { + + unicode = strtol(cursor+2,&cursor,16); + + } else if (*cursor==esc) { + + byte = getByte(cursor); + + if ( *cursor == esc ) { + if ( !to_unicode_multiByte ) { + to_unicode_multiByte = + new TQMultiByteUnicodeTable[256]; + for (int i=0; i<256; i++) { + to_unicode_multiByte[i].unicode = + to_unicode[i]; + to_unicode_multiByte[i].multiByte = 0; + } + delete [] to_unicode; + to_unicode = 0; + } + TQMultiByteUnicodeTable* mbut = + to_unicode_multiByte+byte; + mb[nmb++] = byte; + while ( nmb < maxmb && *cursor == esc ) { + // Always at least once + + mbut->unicode = CHAINED; + byte = getByte(cursor); + mb[nmb++] = byte; + if (!mbut->multiByte) { + mbut->multiByte = + new TQMultiByteUnicodeTable[256]; + } + mbut = mbut->multiByte+byte; + mb_unicode = & mbut->unicode; + } + + if ( nmb > max_bytes_per_char ) + max_bytes_per_char = nmb; + } + } else { + cursor++; + } + } + + if (unicode >= 0 && unicode <= 0xffff) + { + TQChar ch((ushort)unicode); + if (!from_unicode_page[ch.row()]) { + from_unicode_page[ch.row()] = new char[256]; + for (int i=0; i<256; i++) + from_unicode_page[ch.row()][i]=0; + } + if ( mb_unicode ) { + from_unicode_page[ch.row()][ch.cell()] = 0; + if (!from_unicode_page_multiByte) { + from_unicode_page_multiByte = new char**[256]; + for (int i=0; i<256; i++) + from_unicode_page_multiByte[i]=0; + } + if (!from_unicode_page_multiByte[ch.row()]) { + from_unicode_page_multiByte[ch.row()] = new char*[256]; + for (int i=0; i<256; i++) + from_unicode_page_multiByte[ch.row()][i] = 0; + } + mb[nmb++] = 0; + from_unicode_page_multiByte[ch.row()][ch.cell()] + = qstrdup(mb); + *mb_unicode = unicode; + } else { + from_unicode_page[ch.row()][ch.cell()] = (char)byte; + if ( to_unicode ) + to_unicode[byte] = unicode; + else + to_unicode_multiByte[byte].unicode = unicode; + } + } else { + } + } + } + n = n.stripWhiteSpace(); + + unkn = '?'; // ##### Might be a bad choice. + } + + ~TQTextCodecFromIOD() + { + if ( from_unicode_page ) { + for (int i=0; i<256; i++) + if (from_unicode_page[i]) + delete [] from_unicode_page[i]; + } + if ( from_unicode_page_multiByte ) { + for (int i=0; i<256; i++) + if (from_unicode_page_multiByte[i]) + for (int j=0; j<256; j++) + if (from_unicode_page_multiByte[i][j]) + delete [] from_unicode_page_multiByte[i][j]; + } + if ( to_unicode ) + delete [] to_unicode; + if ( to_unicode_multiByte ) + delete [] to_unicode_multiByte; + } + + bool ok() const + { + return !!from_unicode_page; + } + + TQTextDecoder* makeDecoder() const + { + if ( stateless() ) + return TQTextCodec::makeDecoder(); + else + return new TQTextCodecFromIODDecoder(this); + } + + const char* name() const + { + return n; + } + + int mibEnum() const + { + return 0; // #### Unknown. + } + + int heuristicContentMatch(const char*, int) const + { + return 0; + } + + int heuristicNameMatch(const char* hint) const + { + int bestr = TQTextCodec::heuristicNameMatch(hint); + TQStrListIterator it(aliases); + char* a; + while ((a=it.current())) { + ++it; + int r = simpleHeuristicNameMatch(a,hint); + if (r > bestr) + bestr = r; + } + return bestr; + } + + TQString toUnicode(const char* chars, int len) const + { + const uchar* uchars = (const uchar*)chars; + TQString result; + TQMultiByteUnicodeTable* multiByte=to_unicode_multiByte; + if ( multiByte ) { + while (len--) { + TQMultiByteUnicodeTable& mb = multiByte[*uchars]; + if ( mb.multiByte ) { + // Chained multi-byte + multiByte = mb.multiByte; + } else { + result += TQChar(mb.unicode); + multiByte=to_unicode_multiByte; + } + uchars++; + } + } else { + while (len--) + result += TQChar(to_unicode[*uchars++]); + } + return result; + } + +#if !defined(Q_NO_USING_KEYWORD) + using TQTextCodec::fromUnicode; +#endif + TQCString fromUnicode(const TQString& uc, int& lenInOut) const + { + if (lenInOut > (int)uc.length()) + lenInOut = uc.length(); + int rlen = lenInOut*max_bytes_per_char; + TQCString rstr(rlen+1); + char* cursor = rstr.data(); + char* s=0; + int l = lenInOut; + int lout = 0; + for (int i=0; i<l; i++) { + TQChar ch = uc[i]; + if ( ch == TQChar::null ) { + // special + *cursor++ = 0; + } else if ( from_unicode_page[ch.row()] && + from_unicode_page[ch.row()][ch.cell()] ) + { + *cursor++ = from_unicode_page[ch.row()][ch.cell()]; + lout++; + } else if ( from_unicode_page_multiByte && + from_unicode_page_multiByte[ch.row()] && + (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) ) + { + while (*s) { + *cursor++ = *s++; + lout++; + } + } else { + *cursor++ = unkn; + lout++; + } + } + *cursor = 0; + lenInOut = lout; + return rstr; + } +}; + +TQTextCodecFromIODDecoder::TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c) : + codec(c) +{ + mb = codec->to_unicode_multiByte; +} + +TQString TQTextCodecFromIODDecoder::toUnicode(const char* chars, int len) +{ + const uchar* uchars = (const uchar*)chars; + TQString result; + while (len--) { + TQMultiByteUnicodeTable& t = mb[*uchars]; + if ( t.multiByte ) { + // Chained multi-byte + mb = t.multiByte; + } else { + if ( t.unicode ) + result += TQChar(t.unicode); + mb=codec->to_unicode_multiByte; + } + uchars++; + } + return result; +} + +#ifndef QT_NO_CODECS +// Cannot use <pre> or \code +/*! + Reads a POSIX2 charmap definition from \a iod. + The parser recognizes the following lines: + +<font name="sans"> + <code_set_name> <i>name</i></br> + <escape_char> <i>character</i></br> + % alias <i>alias</i></br> + CHARMAP</br> + <<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br> + <<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br> + <<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br> + <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br> + END CHARMAP</br> +</font> + + The resulting TQTextCodec is returned (and also added to the global + list of codecs). The name() of the result is taken from the + code_set_name. + + Note that a codec constructed in this way uses much more memory + and is slower than a hand-written TQTextCodec subclass, since + tables in code are kept in memory shared by all TQt applications. + + \sa loadCharmapFile() +*/ +TQTextCodec* TQTextCodec::loadCharmap(TQIODevice* iod) +{ + TQTextCodecFromIOD* r = new TQTextCodecFromIOD(iod); + if ( !r->ok() ) { + delete r; + r = 0; + } + return r; +} + +/*! + A convenience function for loadCharmap() that loads the charmap + definition from the file \a filename. +*/ +TQTextCodec* TQTextCodec::loadCharmapFile(TQString filename) +{ + TQFile f(filename); + if (f.open(IO_ReadOnly)) { + TQTextCodecFromIOD* r = new TQTextCodecFromIOD(&f); + if ( !r->ok() ) + delete r; + else + return r; + } + return 0; +} + +#endif //QT_NO_CODECS + +/*! + Returns a string representing the current language and + sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil. +*/ + +const char* TQTextCodec::locale() +{ + return TQLocalePrivate::systemLocaleName(); +} + +#ifndef QT_NO_CODECS + +class TQSimpleTextCodec: public TQTextCodec +{ +public: + TQSimpleTextCodec( int ); + ~TQSimpleTextCodec(); + + TQString toUnicode(const char* chars, int len) const; +#if !defined(Q_NO_USING_KEYWORD) + using TQTextCodec::fromUnicode; +#endif + TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; + unsigned short characterFromUnicode(const TQString &str, int pos) const; + + const char* name() const; + const char* mimeName() const; + int mibEnum() const; + + int heuristicContentMatch(const char* chars, int len) const; + + int heuristicNameMatch(const char* hint) const; +#if !defined(Q_NO_USING_KEYWORD) + using TQTextCodec::canEncode; +#endif + bool canEncode( TQChar ch ) const; + + void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; + +private: + void buildReverseMap(); + + int forwardIndex; +#ifndef Q_WS_QWS + TQMemArray<unsigned char> *reverseMap; +#endif +}; + +#ifdef Q_WS_QWS +static const TQSimpleTextCodec * reverseOwner = 0; +static TQMemArray<unsigned char> * reverseMap = 0; +#endif + +#define LAST_MIB 2004 + +static const struct { + const char *mime; + const char * cs; + int mib; + Q_UINT16 values[128]; +} unicodevalues[] = { + // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt + { "KOI8-R", "KOI8-R", 2084, + { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, + // /**/ - The BULLET OPERATOR is confused. Some people think + // it should be 0x2022 (BULLET). + + // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt + { "KOI8-U", "KOI8-U", 2088, + { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, + + // next bits generated from tables on the Unicode 2.0 CD. we can + // use these tables since this is part of the transition to using + // unicode everywhere in qt. + + // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done + + // then I inserted the files manually. + { "ISO-8859-2", "ISO 8859-2", 5, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, + 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, + 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, + 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, + { "ISO-8859-3", "ISO 8859-3", 6, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, + 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, + 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, + 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, + 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, + 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} }, + { "ISO-8859-4", "ISO 8859-4", 7, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, + 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, + 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, + 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, + 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, + 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} }, + { "ISO-8859-5", "ISO 8859-5", 8, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, + 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} }, + { "ISO-8859-6", "ISO 8859-6", 82, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, + 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, + 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, + 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, + 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "ISO-8859-7", "ISO 8859-7", 10, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, + 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, + 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, + 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, + 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, + { "ISO-8859-8-I", "ISO 8859-8-I", 85, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "ISO-8859-9", "ISO 8859-9", 12, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, + { "ISO-8859-10", "ISO 8859-10", 13, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, + 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, + 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, + 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, + 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, + 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, + { "ISO-8859-13", "ISO 8859-13", 109, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, + 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, + 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, + 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, + 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, + 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, + 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, + { "ISO-8859-14", "ISO 8859-14", 110, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, + 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, + 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, + 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, + { "ISO-8859-16", "ISO 8859-16", 112, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, + 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, + 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, + 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, + 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} }, + + // next bits generated again from tables on the Unicode 3.0 CD. + + // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done + + { "CP 850", "IBM 850", 2009, + { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, + 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, + 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, + 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, + 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, + 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, + 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, + 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} }, + { "CP 874", "CP 874", 0, //### what is the mib? + { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, + 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, + 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, + 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, + 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, + 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, + 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "IBM 866", "IBM 866", 2086, + { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, + 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} }, + + { "windows-1250", "CP 1250", 2250, + { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, + { "windows-1251", "CP 1251", 2251, + { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, + 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, + 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, + 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} }, + { "windows-1252", "CP 1252", 2252, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, + { "windows-1253", "CP 1253", 2253, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, + 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, + 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, + 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, + 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, + { "windows-1254", "CP 1254", 2254, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, + { "windows-1255", "CP 1255", 2255, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, + 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, + 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} }, + { "windows-1256", "CP 1256", 2256, + { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, + 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, + 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, + 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, + 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, + 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, + 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, + 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} }, + { "windows-1257", "CP 1257", 2257, + { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, + 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, + 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, + 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, + 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, + 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} }, + { "windows-1258", "CP 1258", 2258, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} }, + + { "Apple Roman", "Apple Roman", 0, + { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, + 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, + 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, + 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, + 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, + 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, + 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, + 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} }, + + + + // This one is based on the charmap file + // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted + // to this format by Børre Gaup <[email protected]> + { "WINSAMI2", "WS2", 0, + { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE, + 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F, + 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, + + + // this one is generated from the charmap file located in /usr/share/i18n/charmaps + // on most Linux distributions. The thai character set tis620 is byte by byte equivalent + // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too. + + // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620 + { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent) + { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, + 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, + 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, + 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, + 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, + 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, + 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } }, + + /* + Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] + MIBenum: 2004 + Source: LaserJet IIP Printer User's Manual, + HP part no 33471-90901, Hewlet-Packard, June 1989. + Alias: roman8 + Alias: r8 + Alias: csHPRoman8 + */ + { "Roman8", "HP-Roman8", 2004, + { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, + 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, + 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1, + 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2, + 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA, + 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC, + 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6, + 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4, + 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3, + 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF, + 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC, + 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } } + + // if you add more chacater sets at the end, change LAST_MIB above +}; + +TQSimpleTextCodec::TQSimpleTextCodec( int i ) + : TQTextCodec(), forwardIndex( i ) +{ +#ifndef Q_WS_QWS + reverseMap = 0; +#endif +} + + +TQSimpleTextCodec::~TQSimpleTextCodec() +{ +#ifndef Q_WS_QWS + delete reverseMap; +#else + if ( reverseOwner == this ) { + delete reverseMap; + reverseMap = 0; + reverseOwner = 0; + } +#endif +} + +void TQSimpleTextCodec::buildReverseMap() +{ +#ifdef Q_WS_QWS + if ( reverseOwner != this ) { + int m = 0; + int i = 0; + while( i < 128 ) { + if ( unicodevalues[forwardIndex].values[i] > m && + unicodevalues[forwardIndex].values[i] < 0xfffd ) + m = unicodevalues[forwardIndex].values[i]; + i++; + } + m++; + if ( !reverseMap ) + reverseMap = new TQMemArray<unsigned char>( m ); + if ( m > (int)(reverseMap->size()) ) + reverseMap->resize( m ); + for( i = 0; i < 128 && i < m; i++ ) + (*reverseMap)[i] = (char)i; + for( ;i < m; i++ ) + (*reverseMap)[i] = 0; + for( i=128; i<256; i++ ) { + int u = unicodevalues[forwardIndex].values[i-128]; + if ( u < m ) + (*reverseMap)[u] = (char)(unsigned char)(i); + } + reverseOwner = this; + } +#else + if ( !reverseMap ) { + TQMemArray<unsigned char> **map = &((TQSimpleTextCodec *)this)->reverseMap; + int m = 0; + int i = 0; + while( i < 128 ) { + if ( unicodevalues[forwardIndex].values[i] > m && + unicodevalues[forwardIndex].values[i] < 0xfffd ) + m = unicodevalues[forwardIndex].values[i]; + i++; + } + m++; + *map = new TQMemArray<unsigned char>( m ); + for( i = 0; i < 128 && i < m; i++ ) + (**map)[i] = (char)i; + for( ;i < m; i++ ) + (**map)[i] = 0; + for( i=128; i<256; i++ ) { + int u = unicodevalues[forwardIndex].values[i-128]; + if ( u < m ) + (**map)[u] = (char)(unsigned char)(i); + } + } +#endif +} + +TQString TQSimpleTextCodec::toUnicode(const char* chars, int len) const +{ + if ( len <= 0 || chars == 0 ) + return TQString::null; + + const unsigned char * c = (const unsigned char *)chars; + int i; + + for ( i = 0; i < len; i++ ) + if ( c[i] == '\0' ) { + len = i; + break; + } + + TQString r; + r.setUnicode(0, len); + TQChar* uc = (TQChar*)r.unicode(); // const_cast + + for ( i = 0; i < len; i++ ) { + if ( c[i] > 127 ) + uc[i] = unicodevalues[forwardIndex].values[c[i]-128]; + else + uc[i] = c[i]; + } + return r; +} + + +TQCString TQSimpleTextCodec::fromUnicode(const TQString& uc, int& len ) const +{ +#ifdef Q_WS_QWS + if ( this != reverseOwner ) +#else + if ( !reverseMap ) +#endif + ((TQSimpleTextCodec *)this)->buildReverseMap(); + + if ( len <0 || len > (int)uc.length() ) + len = uc.length(); + TQCString r( len+1 ); + int i = len; + int u; + const TQChar* ucp = uc.unicode(); + unsigned char* rp = (unsigned char *)r.data(); + unsigned char* rmp = reverseMap->data(); + int rmsize = (int) reverseMap->size(); + while( i-- ) + { + u = ucp->unicode(); + *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' ); + if ( *rp == 0 ) *rp = '?'; + rp++; + ucp++; + } + r[len] = 0; + return r; +} + +void TQSimpleTextCodec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const +{ +#ifdef Q_WS_QWS + if ( this != reverseOwner ) +#else + if ( !reverseMap ) +#endif + ((TQSimpleTextCodec *)this)->buildReverseMap(); + + unsigned char* rmp = reverseMap->data(); + int rmsize = (int) reverseMap->size(); + while ( length-- ) { + unsigned short u = in->unicode(); + *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); + ++in; + ++out; + } +} + +unsigned short TQSimpleTextCodec::characterFromUnicode(const TQString &str, int pos) const +{ +#ifdef Q_WS_QWS + if ( this != reverseOwner ) +#else + if ( !reverseMap ) +#endif + ((TQSimpleTextCodec *)this)->buildReverseMap(); + + unsigned short u = str[pos].unicode(); + unsigned char* rmp = reverseMap->data(); + int rmsize = (int) reverseMap->size(); + return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); +} + +bool TQSimpleTextCodec::canEncode( TQChar ch ) const +{ +#ifdef Q_WS_QWS + if ( this != reverseOwner ) +#else + if ( !reverseMap ) +#endif + ((TQSimpleTextCodec *)this)->buildReverseMap(); + + unsigned short u = ch.unicode(); + unsigned char* rmp = reverseMap->data(); + int rmsize = (int) reverseMap->size(); + return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE ); +} + +const char* TQSimpleTextCodec::name() const +{ + return unicodevalues[forwardIndex].cs; +} + +const char* TQSimpleTextCodec::mimeName() const +{ + return unicodevalues[forwardIndex].mime; +} + + +int TQSimpleTextCodec::mibEnum() const +{ + return unicodevalues[forwardIndex].mib; +} + +int TQSimpleTextCodec::heuristicNameMatch(const char* hint) const +{ + if ( qstricmp( hint, mimeName() ) == 0 ) + return 10000; // return a large value + if ( hint[0]=='k' ) { + TQCString lhint = TQCString(hint).lower(); + // Help people with messy fonts + if ( lhint == "koi8-1" ) + return TQTextCodec::heuristicNameMatch("koi8-r")-1; + if ( lhint == "koi8-ru" ) + return TQTextCodec::heuristicNameMatch("koi8-r")-1; + } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) { + // 8859-11 and tis620 are byte by byte equivalent + int i = simpleHeuristicNameMatch("tis620-0", hint); + if( !i ) + i = simpleHeuristicNameMatch("tis-620", hint); + if( i ) return i; + } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) { + int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint); + if ( i ) + return i; + } + return TQTextCodec::heuristicNameMatch(hint); +} + +int TQSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const +{ + if ( len<1 || !chars ) + return -1; + int i = 0; + const uchar * c = (const unsigned char *)chars; + int r = 0; + while( i<len && c && *c ) { + if ( *c >= 128 ) { + if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd ) + return -1; + } + if ( (*c >= ' ' && *c < 127) || + *c == '\n' || *c == '\t' || *c == '\r' ) + r++; + i++; + c++; + } + if ( mibEnum()==4 ) + r+=1; + return r; +} + +#endif + +class TQLatin1Codec : public TQTextCodec +{ +public: +#if !defined(Q_NO_USING_KEYWORD) + using TQTextCodec::fromUnicode; + using TQTextCodec::toUnicode; +#endif + TQString toUnicode(const char* chars, int len) const; + TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; + void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; + unsigned short characterFromUnicode(const TQString &str, int pos) const; + + const char* name() const; + const char* mimeName() const; + int mibEnum() const; + + int heuristicContentMatch(const char* chars, int len) const; + +private: + int forwardIndex; +}; + + +TQString TQLatin1Codec::toUnicode(const char* chars, int len) const +{ + if ( chars == 0 ) + return TQString::null; + + return TQString::fromLatin1(chars, len); +} + + +TQCString TQLatin1Codec::fromUnicode(const TQString& uc, int& len ) const +{ + if ( len <0 || len > (int)uc.length() ) + len = uc.length(); + TQCString r( len+1 ); + char *d = r.data(); + int i = 0; + const TQChar *ch = uc.unicode(); + while ( i < len ) { + d[i] = ch->row() ? '?' : ch->cell(); + i++; + ch++; + } + r[len] = 0; + return r; +} + +void TQLatin1Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const +{ + while ( length-- ) { + *out = in->row() ? 0 : in->cell(); + ++in; + ++out; + } +} + +unsigned short TQLatin1Codec::characterFromUnicode(const TQString &str, int pos) const +{ + const TQChar *ch = str.unicode() + pos; + if (ch->row()) + return 0; + return (unsigned short) ch->cell(); +} + + +const char* TQLatin1Codec::name() const +{ + return "ISO 8859-1"; +} + +const char* TQLatin1Codec::mimeName() const +{ + return "ISO-8859-1"; +} + + +int TQLatin1Codec::mibEnum() const +{ + return 4; +} + +int TQLatin1Codec::heuristicContentMatch(const char* chars, int len) const +{ + if ( len<1 || !chars ) + return -1; + int i = 0; + const uchar * c = (const unsigned char *)chars; + int r = 0; + while( i<len && c && *c ) { + if ( *c >= 0x80 && *c < 0xa0 ) + return -1; + if ( (*c >= ' ' && *c < 127) || + *c == '\n' || *c == '\t' || *c == '\r' ) + r++; + i++; + c++; + } + if ( this == (const TQTextCodec *)codecForLocale() ) + r += 5; + return r; +} + +class TQLatin15Codec: public TQLatin1Codec +{ +public: + TQString toUnicode(const char* chars, int len) const; +#if !defined(Q_NO_USING_KEYWORD) + using TQLatin1Codec::fromUnicode; +#endif + TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; + void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; + unsigned short characterFromUnicode(const TQString &str, int pos) const; + + const char* name() const; + const char* mimeName() const; + int mibEnum() const; + +private: + int forwardIndex; +}; + + +TQString TQLatin15Codec::toUnicode(const char* chars, int len) const +{ + if ( chars == 0 ) + return TQString::null; + + TQString str = TQString::fromLatin1(chars, len); + TQChar *uc = (TQChar *)str.unicode(); + while( len-- ) { + switch( uc->unicode() ) { + case 0xa4: + *uc = 0x20ac; + break; + case 0xa6: + *uc = 0x0160; + break; + case 0xa8: + *uc = 0x0161; + break; + case 0xb4: + *uc = 0x017d; + break; + case 0xb8: + *uc = 0x017e; + break; + case 0xbc: + *uc = 0x0152; + break; + case 0xbd: + *uc = 0x0153; + break; + case 0xbe: + *uc = 0x0178; + break; + default: + break; + } + uc++; + } + return str; +} + +static inline unsigned char +latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE ) +{ + uchar c; + if ( uc < 0x0100 ) { + if ( uc > 0xa3 && uc < 0xbf ) { + switch( uc ) { + case 0xa4: + case 0xa6: + case 0xa8: + case 0xb4: + case 0xb8: + case 0xbc: + case 0xbd: + case 0xbe: + c = replacement ? '?' : 0; + break; + default: + c = (unsigned char) uc; + break; + } + } else { + c = (unsigned char) uc; + } + } else { + if ( uc == 0x20ac ) + c = 0xa4; + else if ( (uc & 0xff00) == 0x0100 ) { + switch( uc ) { + case 0x0160: + c = 0xa6; + break; + case 0x0161: + c = 0xa8; + break; + case 0x017d: + c = 0xb4; + break; + case 0x017e: + c = 0xb8; + break; + case 0x0152: + c = 0xbc; + break; + case 0x0153: + c = 0xbd; + break; + case 0x0178: + c = 0xbe; + break; + default: + c = replacement ? '?' : 0; + } + } else { + c = replacement ? '?' : 0; + } + } + return c; +} + + +void TQLatin15Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const +{ + while ( length-- ) { + *out = latin15CharFromUnicode( in->unicode(), FALSE ); + ++in; + ++out; + } +} + + +TQCString TQLatin15Codec::fromUnicode(const TQString& uc, int& len ) const +{ + if ( len <0 || len > (int)uc.length() ) + len = uc.length(); + TQCString r( len+1 ); + char *d = r.data(); + int i = 0; + const TQChar *ch = uc.unicode(); + while ( i < len ) { + d[i] = latin15CharFromUnicode( ch->unicode() ); + i++; + ch++; + } + r[len] = 0; + return r; +} + +unsigned short TQLatin15Codec::characterFromUnicode(const TQString &str, int pos) const +{ + return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE ); +} + + +const char* TQLatin15Codec::name() const +{ + return "ISO 8859-15"; +} + +const char* TQLatin15Codec::mimeName() const +{ + return "ISO-8859-15"; +} + + +int TQLatin15Codec::mibEnum() const +{ + return 111; +} + +static TQTextCodec *checkForCodec(const char *name) { + TQTextCodec *c = TQTextCodec::codecForName(name); + if (!c) { + const char *at = strchr(name, '@'); + if (at) { + TQCString n(name, at - name + 1); + c = TQTextCodec::codecForName(n.data()); + } + } + return c; +} + +/* the next two functions are implicitely thread safe, + as they are only called by setup() which uses a mutex. +*/ +static void setupLocaleMapper() +{ +#ifdef Q_OS_WIN32 + localeMapper = TQTextCodec::codecForName( "System" ); +#else + +#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) && !defined(Q_OS_MAC) + char *charset = nl_langinfo (CODESET); + if ( charset ) + localeMapper = TQTextCodec::codecForName( charset ); +#endif + + if ( !localeMapper ) { + // Very poorly defined and followed standards causes lots of code + // to try to get all the cases... + + // Try to determine locale codeset from locale name assigned to + // LC_CTYPE category. + + // First part is getting that locale name. First try setlocale() which + // definitely knows it, but since we cannot fully trust it, get ready + // to fall back to environment variables. + char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) ); + + // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG + // environment variables. + char * lang = qstrdup( getenv("LC_ALL") ); + if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { + if ( lang ) delete [] lang; + lang = qstrdup( getenv("LC_CTYPE") ); + } + if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { + if ( lang ) delete [] lang; + lang = qstrdup( getenv("LANG") ); + } + + // Now try these in order: + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + // 2. CODESET from lang if it contains a .CODESET part + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + // 4. locale (ditto) + // 5. check for "@euro" + // 6. guess locale from ctype unless ctype is "C" + // 7. guess locale from lang + + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + char * codeset = ctype ? strchr( ctype, '.' ) : 0; + if ( codeset && *codeset == '.' ) + localeMapper = checkForCodec( codeset + 1 ); + + // 2. CODESET from lang if it contains a .CODESET part + codeset = lang ? strchr( lang, '.' ) : 0; + if ( !localeMapper && codeset && *codeset == '.' ) + localeMapper = checkForCodec( codeset + 1 ); + + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) + localeMapper = checkForCodec( ctype ); + + // 4. locale (ditto) + if ( !localeMapper && lang && *lang != 0 ) + localeMapper = checkForCodec( lang ); + + // 5. "@euro" + if ( !localeMapper && ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); + + // 6. guess locale from ctype unless ctype is "C" + // 7. guess locale from lang + char * try_by_name = ctype; + if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) + try_by_name = lang; + + // Now do the guessing. + if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) { + if ( try_locale_list( iso8859_15locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); + else if ( try_locale_list( iso8859_2locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-2" ); + else if ( try_locale_list( iso8859_3locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-3" ); + else if ( try_locale_list( iso8859_4locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-4" ); + else if ( try_locale_list( iso8859_5locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-5" ); + else if ( try_locale_list( iso8859_6locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-6" ); + else if ( try_locale_list( iso8859_7locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-7" ); + else if ( try_locale_list( iso8859_8locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); + else if ( try_locale_list( iso8859_9locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-9" ); + else if ( try_locale_list( iso8859_13locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-13" ); + else if ( try_locale_list( tis_620locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-11" ); + else if ( try_locale_list( koi8_ulocales, lang ) ) + localeMapper = TQTextCodec::codecForName( "KOI8-U" ); + else if ( try_locale_list( cp_1251locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "CP 1251" ); + else if ( try_locale_list( pt_154locales, lang ) ) + localeMapper = TQTextCodec::codecForName( "PT 154" ); + else if ( try_locale_list( probably_koi8_rlocales, lang ) ) + localeMapper = ru_RU_hack( lang ); + } + + delete [] ctype; + delete [] lang; + } + if ( localeMapper && localeMapper->mibEnum() == 11 ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); + + // If everything failed, we default to 8859-1 + // We could perhaps default to 8859-15. + if ( !localeMapper ) + localeMapper = TQTextCodec::codecForName( "ISO 8859-1" ); +#endif +} + + +static void realSetup() +{ +#if defined(QT_CHECK_STATE) + if ( destroying_is_ok ) + qWarning( "TQTextCodec: creating new codec during codec cleanup!" ); +#endif + all = new TQValueList<TQTextCodec*>; + + (void)new TQLatin1Codec; + (void)new TQLatin15Codec; + (void)new TQUtf8Codec; + (void)new TQUtf16Codec; + +#ifndef QT_NO_CODECS + int i = 0; + do { + (void)new TQSimpleTextCodec( i ); + } while( unicodevalues[i++].mib != LAST_MIB ); + + (void)new TQTsciiCodec; + + for (i = 0; i < 9; ++i) { + (void)new TQIsciiCodec(i); + } +#endif // QT_NO_CODECS +#ifndef QT_NO_CODEC_HEBREW + (void)new TQHebrewCodec; +#endif +#ifndef QT_NO_BIG_CODECS + (void)new TQBig5Codec; + (void)new TQBig5hkscsCodec; + (void)new TQEucJpCodec; + (void)new TQEucKrCodec; + (void)new TQGb2312Codec; + (void)new TQGbkCodec; + (void)new TQGb18030Codec; + (void)new TQJisCodec; + (void)new TQSjisCodec; +#endif // QT_NO_BIG_CODECS + +#ifdef Q_OS_WIN32 + (void) new TQWindowsLocalCodec; +#endif // Q_OS_WIN32 + + if ( !localeMapper ) + setupLocaleMapper(); +} + +void TQTextCodec::fromUnicodeInternal( const TQChar *in, unsigned short *out, int length ) +{ + switch( mibEnum() ) { +#ifndef QT_NO_CODECS + case 2084: + case 2088: + case 5: + case 6: + case 7: + case 8: + case 82: + case 10: + case 85: + case 12: + case 13: + case 109: + case 110: + case 2004: + case 2009: + case 2086: + case 2250: + case 2251: + case 2252: + case 2253: + case 2254: + case 2255: + case 2256: + case 2257: + case 2258: + case 2259: + ((TQSimpleTextCodec *)this)->fromUnicode( in, out, length ); + break; + +#if !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11) + // the TQFont*Codecs are only used on X11 + + case 15: + ((TQFontJis0201Codec *) this)->fromUnicode( in, out, length ); + break; + + case 63: + ((TQFontJis0208Codec *) this)->fromUnicode( in, out, length ); + break; + + case 36: + ((TQFontKsc5601Codec *) this)->fromUnicode( in, out, length ); + break; + + case 57: + ((TQFontGb2312Codec *) this)->fromUnicode( in, out, length ); + break; + + case -113: + ((TQFontGbkCodec *) this)->fromUnicode( in, out, length ); + break; + + case -114: + ((TQFontGb18030_0Codec *) this)->fromUnicode( in, out, length ); + break; + + case -2026: + ((TQFontBig5Codec *) this)->fromUnicode( in, out, length ); + break; + + case -2101: + ((TQFontBig5hkscsCodec *) this)->fromUnicode( in, out, length ); + break; + + case -4242: + ((TQFontLaoCodec *) this)->fromUnicode( in, out, length ); + break; +#endif +#endif // QT_NO_CODECS + + case 4: + ((TQLatin1Codec *) this)->fromUnicode( in, out, length ); + break; + + case 111: + ((TQLatin15Codec *) this)->fromUnicode( in, out, length ); + break; + + default: + { + TQConstString string( in, length ); + TQString str = string.string(); + for ( int i = 0; i < length; i++ ) + out[i] = characterFromUnicode( str, i ); + } + } +} + + +/*! + \fn TQTextCodec* TQTextCodec::codecForTr() + + Returns the codec used by TQObject::tr() on its argument. If this + function returns 0 (the default), tr() assumes Latin-1. + + \sa setCodecForTr() +*/ + +/*! + \fn void TQTextCodec::setCodecForTr(TQTextCodec *c) + \nonreentrant + + Sets the codec used by TQObject::tr() on its argument to \a c. If + \a c is 0 (the default), tr() assumes Latin-1. + + If the literal quoted text in the program is not in the Latin-1 + encoding, this function can be used to set the appropriate + encoding. For example, software developed by Korean programmers + might use eucKR for all the text in the program, in which case the + main() function might look like this: + + \code + int main(int argc, char** argv) + { + TQApplication app(argc, argv); + ... install any additional codecs ... + TQTextCodec::setCodecForTr( TQTextCodec::codecForName("eucKR") ); + ... + } + \endcode + + Note that this is not the way to select the encoding that the \e + user has chosen. For example, to convert an application containing + literal English strings to Korean, all that is needed is for the + English strings to be passed through tr() and for translation + files to be loaded. For details of internationalization, see the + \link i18n.html TQt internationalization documentation\endlink. + + \sa codecForTr(), setCodecForTr(), setCodecForCStrings() +*/ + + +/*! + \fn TQTextCodec* TQTextCodec::codecForCStrings() + + Returns the codec used by TQString to convert to and from const + char* and TQCStrings. If this function returns 0 (the default), + TQString assumes Latin-1. + + \sa setCodecForCStrings() +*/ + +/*! + \fn void TQTextCodec::setCodecForCStrings(TQTextCodec *c) + \nonreentrant + + Sets the codec used by TQString to convert to and from const char* + and TQCStrings. If \a c is 0 (the default), TQString assumes Latin-1. + + \warning Some codecs do not preserve the characters in the ascii + range (0x00 to 0x7f). For example, the Japanese Shift-JIS + encoding maps the backslash character (0x5a) to the Yen character. + This leads to unexpected results when using the backslash + character to escape characters in strings used in e.g. regular + expressions. Use TQString::fromLatin1() to preserve characters in + the ascii range when needed. + + \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings() +*/ + + +TQTextCodec *TQTextCodec::cftr = 0; +TQTextCodec *TQTextCodec::cfcs = 0; + + +#endif // QT_NO_TEXTCODEC |