diff options
author | Michele Calgaro <[email protected]> | 2025-01-29 18:05:37 +0900 |
---|---|---|
committer | Michele Calgaro <[email protected]> | 2025-01-30 19:06:16 +0900 |
commit | c5cda03125a6d34c179d968011083bceb87976bd (patch) | |
tree | 33c2ba873b23cf503ed3c3aa1c52d3fac1006245 | |
parent | d517cda6bdb0160be39a96712d4cf6036b920be3 (diff) | |
download | tqt3-c5cda03125a6d34c179d968011083bceb87976bd.tar.gz tqt3-c5cda03125a6d34c179d968011083bceb87976bd.zip |
Add support for surrogate pairs to TQChar API.
This relates to issue #162.
The new code is partially taken from Qt4 but with some local rework.
Signed-off-by: Michele Calgaro <[email protected]>
-rw-r--r-- | src/3rdparty/libpng/CHANGES | 2 | ||||
-rw-r--r-- | src/codecs/tqgb18030codec.cpp | 30 | ||||
-rw-r--r-- | src/codecs/tqutfcodec.cpp | 23 | ||||
-rw-r--r-- | src/kernel/tqfontengine_x11.cpp | 17 | ||||
-rw-r--r-- | src/kernel/tqtextengine.cpp | 3 | ||||
-rw-r--r-- | src/tools/tqstring.cpp | 22 | ||||
-rw-r--r-- | src/tools/tqstring.h | 43 |
7 files changed, 82 insertions, 58 deletions
diff --git a/src/3rdparty/libpng/CHANGES b/src/3rdparty/libpng/CHANGES index d151a41c7..eea6916ee 100644 --- a/src/3rdparty/libpng/CHANGES +++ b/src/3rdparty/libpng/CHANGES @@ -828,7 +828,7 @@ version 1.0.8 [July 24, 2000] version 1.0.9beta1 [November 10, 2000] Fixed typo in scripts/makefile.hpux Updated makevms.com in scripts and contrib/* and contrib/* (Martin Zinser) - Fixed seqence-point bug in contrib/pngminus/png2pnm (Martin Zinser) + Fixed sequence-point bug in contrib/pngminus/png2pnm (Martin Zinser) Changed "cdrom.com" in documentation to "libpng.org" Revised pnggccrd.c to get it all working, and updated makefile.gcmmx (Greg). Changed type of "params" from voidp to png_voidp in png_read|write_png(). diff --git a/src/codecs/tqgb18030codec.cpp b/src/codecs/tqgb18030codec.cpp index 0ae2fb4ff..d2578dc8e 100644 --- a/src/codecs/tqgb18030codec.cpp +++ b/src/codecs/tqgb18030codec.cpp @@ -184,18 +184,16 @@ TQCString TQGb18030Codec::fromUnicode(const TQString& uc, int& lenInOut) const if ( ch.row() == 0x00 && ch.cell() < 0x80 ) { // ASCII *cursor++ = ch.cell(); - } else if ((ch.unicode() & 0xf800) == 0xd800) { - unsigned short high = ch.unicode(); + } else if (ch.isHighSurrogate()) { // surrogates area. check for correct encoding // we need at least one more character, first the high surrogate, then the low one - if (i == l-1 || high >= 0xdc00) + if (i == l-1) *cursor++ = '?'; else { - unsigned short low = uc[i+1].unicode(); - if (low >= 0xdc00 && low <= 0xdfff) { + if (uc[i+1].isLowSurrogate()) { // valid surrogate pair + uint u = TQChar::surrogateToUcs4(uc[i], uc[i + 1]); ++i; - uint u = (high-0xd800)*0x400+(low-0xdc00)+0x10000; len = qt_UnicodeToGb18030(u, buf); if (len >= 2) { for (int j=0; j<len; j++) @@ -241,15 +239,13 @@ TQString TQGb18030Codec::toUnicode(const char* chars, int len) const uint u = qt_Gb18030ToUnicode( (const uchar*)(chars + i), clen ); if (clen == 2 || clen == 4) { - if (u < 0x10000) + if (!TQChar::requiresSurrogates(u)) { result += TQValidChar(u); + } else { // encode into surrogate pair - u -= 0x10000; - unsigned short high = u/0x400 + 0xd800; - unsigned short low = u%0x400 + 0xdc00; - result += TQChar(high); - result += TQChar(low); + result += TQChar(TQChar::highSurrogate(u)); + result += TQChar(TQChar::lowSurrogate(u)); } i += clen; } else if (i < len) { @@ -402,15 +398,13 @@ public: int clen = 4; uint u = qt_Gb18030ToUnicode(buf, clen); if (clen == 4) { - if (u < 0x10000) + if (!TQChar::requiresSurrogates(u)) { result += TQValidChar(u); + } else { // encode into surrogate pair - u -= 0x10000; - unsigned short high = u/0x400 + 0xd800; - unsigned short low = u%0x400 + 0xdc00; - result += TQChar(high); - result += TQChar(low); + result += TQChar(TQChar::highSurrogate(u)); + result += TQChar(TQChar::lowSurrogate(u)); } } else { result += TQChar::replacement; diff --git a/src/codecs/tqutfcodec.cpp b/src/codecs/tqutfcodec.cpp index 1125aa9f3..eba25e505 100644 --- a/src/codecs/tqutfcodec.cpp +++ b/src/codecs/tqutfcodec.cpp @@ -64,13 +64,10 @@ TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const if ( u < 0x0800 ) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - if (u >= 0xd800 && u < 0xdc00 && i < l-1) { - unsigned short low = ch[1].unicode(); - if (low >= 0xdc00 && low < 0xe000) { - ++ch; - ++i; - u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; - } + if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) { + u = TQChar::surrogateToUcs4(ch[0], ch[1]); + ++ch; + ++i; } if (u > 0xffff) { // see TQString::fromUtf8() and TQString::utf8() for explanations @@ -179,16 +176,14 @@ public: uc = (uc << 6) | (ch & 0x3f); need--; if ( !need ) { - if (uc > 0xffff) { + if (TQChar::requiresSurrogates(uc)) { // surrogate pair - uc -= 0x10000; - unsigned short high = uc/0x400 + 0xd800; - unsigned short low = uc%0x400 + 0xdc00; - *qch++ = TQChar(high); - *qch++ = TQChar(low); + *qch++ = TQChar(TQChar::highSurrogate(uc)); + *qch++ = TQChar(TQChar::lowSurrogate(uc)); headerDone = TRUE; } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - *qch++ = TQChar::replacement; + // overlong sequence, UTF16 surrogate or BOM + *qch++ = TQChar::replacement; } else { if (headerDone || TQChar(uc) != TQChar::byteOrderMark) *qch++ = uc; diff --git a/src/kernel/tqfontengine_x11.cpp b/src/kernel/tqfontengine_x11.cpp index b3461a6ff..47078dea9 100644 --- a/src/kernel/tqfontengine_x11.cpp +++ b/src/kernel/tqfontengine_x11.cpp @@ -1531,16 +1531,15 @@ static glyph_t getAdobeCharIndex(XftFont *font, int cmap, uint ucs4) return g; } -static uint getChar(const TQChar *str, int &i, const int len) +static uint getUnicode(const TQChar *str, int &i, const int len) { - uint uc = str[i].unicode(); - if (uc >= 0xd800 && uc < 0xdc00 && i < len-1) { - uint low = str[++i].unicode(); - if (low >= 0xdc00 && low < 0xe000) { - uc = (uc - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; - } + if (str[i].isHighSurrogate() && i < (len - 1) && str[i + 1].isLowSurrogate()) + { + ++i; // Don't delete this: it is required for correct + // advancement when handling surrogate pairs + return TQChar::surrogateToUcs4(str[i - 1], str[i]); } - return uc; + return str[i].unicode(); } TQFontEngine::Error TQFontEngineXft::stringToCMap( const TQChar *str, int len, glyph_t *glyphs, advance_t *advances, int *nglyphs, bool mirrored ) const @@ -1552,7 +1551,7 @@ TQFontEngine::Error TQFontEngineXft::stringToCMap( const TQChar *str, int len, g int glyph_pos = 0; for ( int i = 0; i < len; ++i ) { - uint uc = getChar(str, i, len); + uint uc = getUnicode(str, i, len); if ( uc == 0xa0 ) uc = 0x20; if ( mirrored ) diff --git a/src/kernel/tqtextengine.cpp b/src/kernel/tqtextengine.cpp index f50d849cc..05cdbcc13 100644 --- a/src/kernel/tqtextengine.cpp +++ b/src/kernel/tqtextengine.cpp @@ -819,8 +819,7 @@ static void calcLineBreaks(const TQString &str, TQCharAttributes *charAttributes if (category == TQChar::Other_Surrogate) { // char stop only on first pair - if (uc[i].unicode() >= 0xd800 && uc[i].unicode() < 0xdc00 && i < len-1 - && uc[i+1].unicode() >= 0xdc00 && uc[i+1].unicode() < 0xe000) + if (uc[i].isHighSurrogate() && i < (len - 1) && uc[i + 1].isLowSurrogate()) goto nsm; // ### correctly handle second surrogate } diff --git a/src/tools/tqstring.cpp b/src/tools/tqstring.cpp index 318f1aa77..8db00f1cc 100644 --- a/src/tools/tqstring.cpp +++ b/src/tools/tqstring.cpp @@ -6016,13 +6016,10 @@ TQCString TQString::utf8() const if ( u < 0x0800 ) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - if (u >= 0xd800 && u < 0xdc00 && i < l-1) { - unsigned short low = ch[1].unicode(); - if (low >= 0xdc00 && low < 0xe000) { - ++ch; - ++i; - u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; - } + if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) { + u = TQChar::surrogateToUcs4(ch[0], ch[1]); + ++ch; + ++i; } if (u > 0xffff) { // if people are working in utf8, but strings are encoded in eg. latin1, the resulting @@ -6101,15 +6098,12 @@ TQString TQString::fromUtf8( const char* utf8, int len ) uc = (uc << 6) | (ch & 0x3f); need--; if ( !need ) { - if (uc > 0xffff) { + if (TQChar::requiresSurrogates(uc)) { // surrogate pair - uc -= 0x10000; - unsigned short high = uc/0x400 + 0xd800; - unsigned short low = uc%0x400 + 0xdc00; - *qch++ = TQChar(high); - *qch++ = TQChar(low); + *qch++ = TQChar(TQChar::highSurrogate(uc)); + *qch++ = TQChar(TQChar::lowSurrogate(uc)); } else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // overlong seqence, UTF16 surrogate or BOM + // overlong sequence, UTF16 surrogate or BOM i = error; qch = addOne(qch, result); *qch++ = TQChar(0xdbff); diff --git a/src/tools/tqstring.h b/src/tools/tqstring.h index 03fcf9459..c29a9c392 100644 --- a/src/tools/tqstring.h +++ b/src/tools/tqstring.h @@ -222,6 +222,14 @@ public: bool isDigit() const; bool isSymbol() const; + // Surrogate pairs support + bool isHighSurrogate() const; + bool isLowSurrogate() const; + static bool requiresSurrogates(uint ucs4); + static ushort highSurrogate(uint ucs4); + static ushort lowSurrogate(uint ucs4); + static uint surrogateToUcs4(const TQChar &high, const TQChar &low); + uchar cell() const { return ((uchar) ucs & 0xff); } uchar row() const { return ((uchar) (ucs>>8)&0xff); } void setCell( uchar cell ) { ucs = (ucs & 0xff00) + cell; } @@ -313,6 +321,36 @@ inline TQChar::TQChar( int rc ) : ucs( (ushort) (rc & 0xffff) ) { } +inline bool TQChar::isHighSurrogate() const +{ + return ((ucs & 0xfc00) == 0xd800); +} + +inline bool TQChar::isLowSurrogate() const +{ + return ((ucs & 0xfc00) == 0xdc00); +} + +inline bool TQChar::requiresSurrogates(uint ucs4) +{ + return (ucs4 >= 0x10000); +} + +inline ushort TQChar::highSurrogate(uint ucs4) +{ + return ushort(((ucs4 - 0x10000) >> 10)) | 0xd800; +} + +inline ushort TQChar::lowSurrogate(uint ucs4) +{ + return ushort(ucs4 & 0x03FF) | 0xdc00; +} + +inline uint TQChar::surrogateToUcs4(const TQChar &high, const TQChar &low) +{ + return (uint(high.ucs & 0x03FF) << 10) | (low.ucs & 0x03FF) | 0x10000; +} + inline bool operator==( char ch, TQChar c ) { return ((uchar) ch) == c.ucs; @@ -806,6 +844,11 @@ public: bool isNumber() const { return s.constref(p).isNumber(); } bool isLetterOrNumber() { return s.constref(p).isLetterOrNumber(); } bool isDigit() const { return s.constref(p).isDigit(); } + bool isSymbol() const { return s.constref(p).isSymbol(); } + + // Surrogate pairs support + bool isHighSurrogate() const { return s.constref(p).isHighSurrogate(); } + bool isLowSurrogate() const { return s.constref(p).isLowSurrogate(); } int digitValue() const { return s.constref(p).digitValue(); } TQChar lower() const { return s.constref(p).lower(); } |