diff options
Diffstat (limited to 'src/codecs')
-rw-r--r-- | src/codecs/tqgb18030codec.cpp | 30 | ||||
-rw-r--r-- | src/codecs/tqutfcodec.cpp | 23 |
2 files changed, 21 insertions, 32 deletions
diff --git a/src/codecs/tqgb18030codec.cpp b/src/codecs/tqgb18030codec.cpp index 0ae2fb4ff..d2578dc8e 100644 --- a/src/codecs/tqgb18030codec.cpp +++ b/src/codecs/tqgb18030codec.cpp @@ -184,18 +184,16 @@ TQCString TQGb18030Codec::fromUnicode(const TQString& uc, int& lenInOut) const if ( ch.row() == 0x00 && ch.cell() < 0x80 ) { // ASCII *cursor++ = ch.cell(); - } else if ((ch.unicode() & 0xf800) == 0xd800) { - unsigned short high = ch.unicode(); + } else if (ch.isHighSurrogate()) { // surrogates area. check for correct encoding // we need at least one more character, first the high surrogate, then the low one - if (i == l-1 || high >= 0xdc00) + if (i == l-1) *cursor++ = '?'; else { - unsigned short low = uc[i+1].unicode(); - if (low >= 0xdc00 && low <= 0xdfff) { + if (uc[i+1].isLowSurrogate()) { // valid surrogate pair + uint u = TQChar::surrogateToUcs4(uc[i], uc[i + 1]); ++i; - uint u = (high-0xd800)*0x400+(low-0xdc00)+0x10000; len = qt_UnicodeToGb18030(u, buf); if (len >= 2) { for (int j=0; j<len; j++) @@ -241,15 +239,13 @@ TQString TQGb18030Codec::toUnicode(const char* chars, int len) const uint u = qt_Gb18030ToUnicode( (const uchar*)(chars + i), clen ); if (clen == 2 || clen == 4) { - if (u < 0x10000) + if (!TQChar::requiresSurrogates(u)) { result += TQValidChar(u); + } else { // encode into surrogate pair - u -= 0x10000; - unsigned short high = u/0x400 + 0xd800; - unsigned short low = u%0x400 + 0xdc00; - result += TQChar(high); - result += TQChar(low); + result += TQChar(TQChar::highSurrogate(u)); + result += TQChar(TQChar::lowSurrogate(u)); } i += clen; } else if (i < len) { @@ -402,15 +398,13 @@ public: int clen = 4; uint u = qt_Gb18030ToUnicode(buf, clen); if (clen == 4) { - if (u < 0x10000) + if (!TQChar::requiresSurrogates(u)) { result += TQValidChar(u); + } else { // encode into surrogate pair - u -= 0x10000; - unsigned short high = u/0x400 + 0xd800; - unsigned short low = u%0x400 + 0xdc00; - result += TQChar(high); - result += TQChar(low); + result += TQChar(TQChar::highSurrogate(u)); + result += TQChar(TQChar::lowSurrogate(u)); } } else { result += TQChar::replacement; diff --git a/src/codecs/tqutfcodec.cpp b/src/codecs/tqutfcodec.cpp index 1125aa9f3..eba25e505 100644 --- a/src/codecs/tqutfcodec.cpp +++ b/src/codecs/tqutfcodec.cpp @@ -64,13 +64,10 @@ TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const if ( u < 0x0800 ) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - if (u >= 0xd800 && u < 0xdc00 && i < l-1) { - unsigned short low = ch[1].unicode(); - if (low >= 0xdc00 && low < 0xe000) { - ++ch; - ++i; - u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; - } + if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) { + u = TQChar::surrogateToUcs4(ch[0], ch[1]); + ++ch; + ++i; } if (u > 0xffff) { // see TQString::fromUtf8() and TQString::utf8() for explanations @@ -179,16 +176,14 @@ public: uc = (uc << 6) | (ch & 0x3f); need--; if ( !need ) { - if (uc > 0xffff) { + if (TQChar::requiresSurrogates(uc)) { // surrogate pair - uc -= 0x10000; - unsigned short high = uc/0x400 + 0xd800; - unsigned short low = uc%0x400 + 0xdc00; - *qch++ = TQChar(high); - *qch++ = TQChar(low); + *qch++ = TQChar(TQChar::highSurrogate(uc)); + *qch++ = TQChar(TQChar::lowSurrogate(uc)); headerDone = TRUE; } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - *qch++ = TQChar::replacement; + // overlong sequence, UTF16 surrogate or BOM + *qch++ = TQChar::replacement; } else { if (headerDone || TQChar(uc) != TQChar::byteOrderMark) *qch++ = uc; |