summaryrefslogtreecommitdiffstats
path: root/src/codecs/qutfcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/codecs/qutfcodec.cpp')
-rw-r--r--src/codecs/qutfcodec.cpp350
1 files changed, 350 insertions, 0 deletions
diff --git a/src/codecs/qutfcodec.cpp b/src/codecs/qutfcodec.cpp
new file mode 100644
index 000000000..79ec538c3
--- /dev/null
+++ b/src/codecs/qutfcodec.cpp
@@ -0,0 +1,350 @@
+/****************************************************************************
+**
+** Implementation of TQUtf{8,16}Codec class
+**
+** Created : 981015
+**
+** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved.
+**
+** This file is part of the tools module of the TQt GUI Toolkit.
+**
+** This file may be used under the terms of the GNU General
+** Public License versions 2.0 or 3.0 as published by the Free
+** Software Foundation and appearing in the files LICENSE.GPL2
+** and LICENSE.GPL3 included in the packaging of this file.
+** Alternatively you may (at your option) use any later version
+** of the GNU General Public License if such license has been
+** publicly approved by Trolltech ASA (or its successors, if any)
+** and the KDE Free TQt Foundation.
+**
+** Please review the following information to ensure GNU General
+** Public Licensing retquirements will be met:
+** http://trolltech.com/products/qt/licenses/licensing/opensource/.
+** If you are unsure which license is appropriate for your use, please
+** review the following information:
+** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
+** or contact the sales department at [email protected].
+**
+** This file may be used under the terms of the Q Public License as
+** defined by Trolltech ASA and appearing in the file LICENSE.TQPL
+** included in the packaging of this file. Licensees holding valid TQt
+** Commercial licenses may use this file in accordance with the TQt
+** Commercial License Agreement provided with the Software.
+**
+** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
+** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
+** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
+** herein.
+**
+**********************************************************************/
+
+#include "qutfcodec.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+int TQUtf8Codec::mibEnum() const
+{
+ return 106;
+}
+
+TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const
+{
+ int l = uc.length();
+ if (lenInOut > 0)
+ l = TQMIN(l, lenInOut);
+ int rlen = l*3+1;
+ TQCString rstr(rlen);
+ uchar* cursor = (uchar*)rstr.data();
+ const TQChar *ch = uc.unicode();
+ for (int i=0; i < l; i++) {
+ uint u = ch->unicode();
+ if ( u < 0x80 ) {
+ *cursor++ = (uchar)u;
+ } else {
+ if ( u < 0x0800 ) {
+ *cursor++ = 0xc0 | ((uchar) (u >> 6));
+ } else {
+ if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
+ unsigned short low = ch[1].unicode();
+ if (low >= 0xdc00 && low < 0xe000) {
+ ++ch;
+ ++i;
+ u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
+ }
+ }
+ if (u > 0xffff) {
+ // see TQString::fromUtf8() and TQString::utf8() for explanations
+ if (u > 0x10fe00 && u < 0x10ff00) {
+ *cursor++ = (u - 0x10fe00);
+ ++ch;
+ continue;
+ } else {
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f);
+ }
+ } else {
+ *cursor++ = 0xe0 | ((uchar) (u >> 12));
+ }
+ *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f);
+ }
+ *cursor++ = 0x80 | ((uchar) (u&0x3f));
+ }
+ ++ch;
+ }
+ *cursor = 0;
+ lenInOut = cursor - (uchar*)rstr.data();
+ ((TQByteArray&)rstr).resize(lenInOut+1);
+ return rstr;
+}
+
+TQString TQUtf8Codec::toUnicode(const char* chars, int len) const
+{
+ if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
+ // starts with a byte order mark
+ chars += 3;
+ len -= 3;
+ }
+ return TQString::fromUtf8( chars, len );
+}
+
+
+const char* TQUtf8Codec::name() const
+{
+ return "UTF-8";
+}
+
+int TQUtf8Codec::heuristicContentMatch(const char* chars, int len) const
+{
+ int score = 0;
+ for (int i=0; i<len; i++) {
+ uchar ch = chars[i];
+ // No nulls allowed.
+ if ( !ch )
+ return -1;
+ if ( ch < 128 ) {
+ // Inconclusive
+ score++;
+ } else if ( (ch&0xe0) == 0xc0 ) {
+ if ( i < len-1 ) {
+ uchar c2 = chars[++i];
+ if ( (c2&0xc0) != 0x80 )
+ return -1;
+ score+=3;
+ }
+ } else if ( (ch&0xf0) == 0xe0 ) {
+ if ( i < len-1 ) {
+ uchar c2 = chars[++i];
+ if ( (c2&0xc0) != 0x80 ) {
+ return -1;
+#if 0
+ if ( i < len-1 ) {
+ uchar c3 = chars[++i];
+ if ( (c3&0xc0) != 0x80 )
+ return -1;
+ score+=3;
+ }
+#endif
+ }
+ score+=2;
+ }
+ }
+ }
+ return score;
+}
+
+
+
+
+class TQUtf8Decoder : public TQTextDecoder {
+ uint uc;
+ uint min_uc;
+ int need;
+ bool headerDone;
+public:
+ TQUtf8Decoder() : need(0), headerDone(FALSE)
+ {
+ }
+
+ TQString toUnicode(const char* chars, int len)
+ {
+ TQString result;
+ result.setLength( len + 1 ); // worst case
+ TQChar *qch = (TQChar *)result.unicode();
+ uchar ch;
+ int error = -1;
+ for (int i=0; i<len; i++) {
+ ch = chars[i];
+ if (need) {
+ if ( (ch&0xc0) == 0x80 ) {
+ uc = (uc << 6) | (ch & 0x3f);
+ need--;
+ if ( !need ) {
+ if (uc > 0xffff) {
+ // surrogate pair
+ uc -= 0x10000;
+ unsigned short high = uc/0x400 + 0xd800;
+ unsigned short low = uc%0x400 + 0xdc00;
+ *qch++ = TQChar(high);
+ *qch++ = TQChar(low);
+ headerDone = TRUE;
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
+ *qch++ = TQChar::replacement;
+ } else {
+ if (headerDone || TQChar(uc) != TQChar::byteOrderMark)
+ *qch++ = uc;
+ headerDone = TRUE;
+ }
+ }
+ } else {
+ // error
+ i = error;
+ *qch++ = TQChar::replacement;
+ need = 0;
+ }
+ } else {
+ if ( ch < 128 ) {
+ *qch++ = ch;
+ headerDone = TRUE;
+ } else if ((ch & 0xe0) == 0xc0) {
+ uc = ch & 0x1f;
+ need = 1;
+ error = i;
+ min_uc = 0x80;
+ } else if ((ch & 0xf0) == 0xe0) {
+ uc = ch & 0x0f;
+ need = 2;
+ error = i;
+ min_uc = 0x800;
+ } else if ((ch&0xf8) == 0xf0) {
+ uc = ch & 0x07;
+ need = 3;
+ error = i;
+ min_uc = 0x10000;
+ } else {
+ // error
+ *qch++ = TQChar::replacement;
+ }
+ }
+ }
+ result.truncate( qch - result.unicode() );
+ return result;
+ }
+};
+
+TQTextDecoder* TQUtf8Codec::makeDecoder() const
+{
+ return new TQUtf8Decoder;
+}
+
+
+
+
+
+
+int TQUtf16Codec::mibEnum() const
+{
+ return 1000;
+}
+
+const char* TQUtf16Codec::name() const
+{
+ return "ISO-10646-UCS-2";
+}
+
+int TQUtf16Codec::heuristicContentMatch(const char* chars, int len) const
+{
+ uchar* uchars = (uchar*)chars;
+ if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe ||
+ uchars[1] == 0xff && uchars[0] == 0xfe) )
+ return len;
+ else
+ return 0;
+}
+
+
+
+
+class TQUtf16Encoder : public TQTextEncoder {
+ bool headerdone;
+public:
+ TQUtf16Encoder() : headerdone(FALSE)
+ {
+ }
+
+ TQCString fromUnicode(const TQString& uc, int& lenInOut)
+ {
+ if ( headerdone ) {
+ lenInOut = uc.length()*sizeof(TQChar);
+ TQCString d(lenInOut);
+ memcpy(d.data(),uc.unicode(),lenInOut);
+ return d;
+ } else {
+ headerdone = TRUE;
+ lenInOut = (1+uc.length())*sizeof(TQChar);
+ TQCString d(lenInOut);
+ memcpy(d.data(),&TQChar::byteOrderMark,sizeof(TQChar));
+ memcpy(d.data()+sizeof(TQChar),uc.unicode(),uc.length()*sizeof(TQChar));
+ return d;
+ }
+ }
+};
+
+class TQUtf16Decoder : public TQTextDecoder {
+ uchar buf;
+ bool half;
+ bool swap;
+ bool headerdone;
+
+public:
+ TQUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE)
+ {
+ }
+
+ TQString toUnicode(const char* chars, int len)
+ {
+ TQString result;
+ result.setLength( len + 1 ); // worst case
+ TQChar *qch = (TQChar *)result.unicode();
+ TQChar ch;
+ while ( len-- ) {
+ if ( half ) {
+ if ( swap ) {
+ ch.setRow( *chars++ );
+ ch.setCell( buf );
+ } else {
+ ch.setRow( buf );
+ ch.setCell( *chars++ );
+ }
+ if ( !headerdone ) {
+ if ( ch == TQChar::byteOrderSwapped ) {
+ swap = !swap;
+ } else if ( ch == TQChar::byteOrderMark ) {
+ // Ignore ZWNBSP
+ } else {
+ *qch++ = ch;
+ }
+ headerdone = TRUE;
+ } else
+ *qch++ = ch;
+ half = FALSE;
+ } else {
+ buf = *chars++;
+ half = TRUE;
+ }
+ }
+ result.truncate( qch - result.unicode() );
+ return result;
+ }
+};
+
+TQTextDecoder* TQUtf16Codec::makeDecoder() const
+{
+ return new TQUtf16Decoder;
+}
+
+TQTextEncoder* TQUtf16Codec::makeEncoder() const
+{
+ return new TQUtf16Encoder;
+}
+
+#endif //QT_NO_TEXTCODEC