summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichele Calgaro <[email protected]>2025-01-29 18:05:37 +0900
committerMichele Calgaro <[email protected]>2025-01-30 19:06:16 +0900
commitc5cda03125a6d34c179d968011083bceb87976bd (patch)
tree33c2ba873b23cf503ed3c3aa1c52d3fac1006245
parentd517cda6bdb0160be39a96712d4cf6036b920be3 (diff)
downloadtqt3-c5cda03125a6d34c179d968011083bceb87976bd.tar.gz
tqt3-c5cda03125a6d34c179d968011083bceb87976bd.zip
Add support for surrogate pairs to TQChar API.
This relates to issue #162. The new code is partially taken from Qt4 but with some local rework. Signed-off-by: Michele Calgaro <[email protected]>
-rw-r--r--src/3rdparty/libpng/CHANGES2
-rw-r--r--src/codecs/tqgb18030codec.cpp30
-rw-r--r--src/codecs/tqutfcodec.cpp23
-rw-r--r--src/kernel/tqfontengine_x11.cpp17
-rw-r--r--src/kernel/tqtextengine.cpp3
-rw-r--r--src/tools/tqstring.cpp22
-rw-r--r--src/tools/tqstring.h43
7 files changed, 82 insertions, 58 deletions
diff --git a/src/3rdparty/libpng/CHANGES b/src/3rdparty/libpng/CHANGES
index d151a41c7..eea6916ee 100644
--- a/src/3rdparty/libpng/CHANGES
+++ b/src/3rdparty/libpng/CHANGES
@@ -828,7 +828,7 @@ version 1.0.8 [July 24, 2000]
version 1.0.9beta1 [November 10, 2000]
Fixed typo in scripts/makefile.hpux
Updated makevms.com in scripts and contrib/* and contrib/* (Martin Zinser)
- Fixed seqence-point bug in contrib/pngminus/png2pnm (Martin Zinser)
+ Fixed sequence-point bug in contrib/pngminus/png2pnm (Martin Zinser)
Changed "cdrom.com" in documentation to "libpng.org"
Revised pnggccrd.c to get it all working, and updated makefile.gcmmx (Greg).
Changed type of "params" from voidp to png_voidp in png_read|write_png().
diff --git a/src/codecs/tqgb18030codec.cpp b/src/codecs/tqgb18030codec.cpp
index 0ae2fb4ff..d2578dc8e 100644
--- a/src/codecs/tqgb18030codec.cpp
+++ b/src/codecs/tqgb18030codec.cpp
@@ -184,18 +184,16 @@ TQCString TQGb18030Codec::fromUnicode(const TQString& uc, int& lenInOut) const
if ( ch.row() == 0x00 && ch.cell() < 0x80 ) {
// ASCII
*cursor++ = ch.cell();
- } else if ((ch.unicode() & 0xf800) == 0xd800) {
- unsigned short high = ch.unicode();
+ } else if (ch.isHighSurrogate()) {
// surrogates area. check for correct encoding
// we need at least one more character, first the high surrogate, then the low one
- if (i == l-1 || high >= 0xdc00)
+ if (i == l-1)
*cursor++ = '?';
else {
- unsigned short low = uc[i+1].unicode();
- if (low >= 0xdc00 && low <= 0xdfff) {
+ if (uc[i+1].isLowSurrogate()) {
// valid surrogate pair
+ uint u = TQChar::surrogateToUcs4(uc[i], uc[i + 1]);
++i;
- uint u = (high-0xd800)*0x400+(low-0xdc00)+0x10000;
len = qt_UnicodeToGb18030(u, buf);
if (len >= 2) {
for (int j=0; j<len; j++)
@@ -241,15 +239,13 @@ TQString TQGb18030Codec::toUnicode(const char* chars, int len) const
uint u = qt_Gb18030ToUnicode( (const uchar*)(chars + i), clen );
if (clen == 2 || clen == 4) {
- if (u < 0x10000)
+ if (!TQChar::requiresSurrogates(u)) {
result += TQValidChar(u);
+ }
else {
// encode into surrogate pair
- u -= 0x10000;
- unsigned short high = u/0x400 + 0xd800;
- unsigned short low = u%0x400 + 0xdc00;
- result += TQChar(high);
- result += TQChar(low);
+ result += TQChar(TQChar::highSurrogate(u));
+ result += TQChar(TQChar::lowSurrogate(u));
}
i += clen;
} else if (i < len) {
@@ -402,15 +398,13 @@ public:
int clen = 4;
uint u = qt_Gb18030ToUnicode(buf, clen);
if (clen == 4) {
- if (u < 0x10000)
+ if (!TQChar::requiresSurrogates(u)) {
result += TQValidChar(u);
+ }
else {
// encode into surrogate pair
- u -= 0x10000;
- unsigned short high = u/0x400 + 0xd800;
- unsigned short low = u%0x400 + 0xdc00;
- result += TQChar(high);
- result += TQChar(low);
+ result += TQChar(TQChar::highSurrogate(u));
+ result += TQChar(TQChar::lowSurrogate(u));
}
} else {
result += TQChar::replacement;
diff --git a/src/codecs/tqutfcodec.cpp b/src/codecs/tqutfcodec.cpp
index 1125aa9f3..eba25e505 100644
--- a/src/codecs/tqutfcodec.cpp
+++ b/src/codecs/tqutfcodec.cpp
@@ -64,13 +64,10 @@ TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const
if ( u < 0x0800 ) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
- if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
- unsigned short low = ch[1].unicode();
- if (low >= 0xdc00 && low < 0xe000) {
- ++ch;
- ++i;
- u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
- }
+ if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) {
+ u = TQChar::surrogateToUcs4(ch[0], ch[1]);
+ ++ch;
+ ++i;
}
if (u > 0xffff) {
// see TQString::fromUtf8() and TQString::utf8() for explanations
@@ -179,16 +176,14 @@ public:
uc = (uc << 6) | (ch & 0x3f);
need--;
if ( !need ) {
- if (uc > 0xffff) {
+ if (TQChar::requiresSurrogates(uc)) {
// surrogate pair
- uc -= 0x10000;
- unsigned short high = uc/0x400 + 0xd800;
- unsigned short low = uc%0x400 + 0xdc00;
- *qch++ = TQChar(high);
- *qch++ = TQChar(low);
+ *qch++ = TQChar(TQChar::highSurrogate(uc));
+ *qch++ = TQChar(TQChar::lowSurrogate(uc));
headerDone = TRUE;
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- *qch++ = TQChar::replacement;
+ // overlong sequence, UTF16 surrogate or BOM
+ *qch++ = TQChar::replacement;
} else {
if (headerDone || TQChar(uc) != TQChar::byteOrderMark)
*qch++ = uc;
diff --git a/src/kernel/tqfontengine_x11.cpp b/src/kernel/tqfontengine_x11.cpp
index b3461a6ff..47078dea9 100644
--- a/src/kernel/tqfontengine_x11.cpp
+++ b/src/kernel/tqfontengine_x11.cpp
@@ -1531,16 +1531,15 @@ static glyph_t getAdobeCharIndex(XftFont *font, int cmap, uint ucs4)
return g;
}
-static uint getChar(const TQChar *str, int &i, const int len)
+static uint getUnicode(const TQChar *str, int &i, const int len)
{
- uint uc = str[i].unicode();
- if (uc >= 0xd800 && uc < 0xdc00 && i < len-1) {
- uint low = str[++i].unicode();
- if (low >= 0xdc00 && low < 0xe000) {
- uc = (uc - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
- }
+ if (str[i].isHighSurrogate() && i < (len - 1) && str[i + 1].isLowSurrogate())
+ {
+ ++i; // Don't delete this: it is required for correct
+ // advancement when handling surrogate pairs
+ return TQChar::surrogateToUcs4(str[i - 1], str[i]);
}
- return uc;
+ return str[i].unicode();
}
TQFontEngine::Error TQFontEngineXft::stringToCMap( const TQChar *str, int len, glyph_t *glyphs, advance_t *advances, int *nglyphs, bool mirrored ) const
@@ -1552,7 +1551,7 @@ TQFontEngine::Error TQFontEngineXft::stringToCMap( const TQChar *str, int len, g
int glyph_pos = 0;
for ( int i = 0; i < len; ++i ) {
- uint uc = getChar(str, i, len);
+ uint uc = getUnicode(str, i, len);
if ( uc == 0xa0 )
uc = 0x20;
if ( mirrored )
diff --git a/src/kernel/tqtextengine.cpp b/src/kernel/tqtextengine.cpp
index f50d849cc..05cdbcc13 100644
--- a/src/kernel/tqtextengine.cpp
+++ b/src/kernel/tqtextengine.cpp
@@ -819,8 +819,7 @@ static void calcLineBreaks(const TQString &str, TQCharAttributes *charAttributes
if (category == TQChar::Other_Surrogate) {
// char stop only on first pair
- if (uc[i].unicode() >= 0xd800 && uc[i].unicode() < 0xdc00 && i < len-1
- && uc[i+1].unicode() >= 0xdc00 && uc[i+1].unicode() < 0xe000)
+ if (uc[i].isHighSurrogate() && i < (len - 1) && uc[i + 1].isLowSurrogate())
goto nsm;
// ### correctly handle second surrogate
}
diff --git a/src/tools/tqstring.cpp b/src/tools/tqstring.cpp
index 318f1aa77..8db00f1cc 100644
--- a/src/tools/tqstring.cpp
+++ b/src/tools/tqstring.cpp
@@ -6016,13 +6016,10 @@ TQCString TQString::utf8() const
if ( u < 0x0800 ) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
- if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
- unsigned short low = ch[1].unicode();
- if (low >= 0xdc00 && low < 0xe000) {
- ++ch;
- ++i;
- u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
- }
+ if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) {
+ u = TQChar::surrogateToUcs4(ch[0], ch[1]);
+ ++ch;
+ ++i;
}
if (u > 0xffff) {
// if people are working in utf8, but strings are encoded in eg. latin1, the resulting
@@ -6101,15 +6098,12 @@ TQString TQString::fromUtf8( const char* utf8, int len )
uc = (uc << 6) | (ch & 0x3f);
need--;
if ( !need ) {
- if (uc > 0xffff) {
+ if (TQChar::requiresSurrogates(uc)) {
// surrogate pair
- uc -= 0x10000;
- unsigned short high = uc/0x400 + 0xd800;
- unsigned short low = uc%0x400 + 0xdc00;
- *qch++ = TQChar(high);
- *qch++ = TQChar(low);
+ *qch++ = TQChar(TQChar::highSurrogate(uc));
+ *qch++ = TQChar(TQChar::lowSurrogate(uc));
} else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // overlong seqence, UTF16 surrogate or BOM
+ // overlong sequence, UTF16 surrogate or BOM
i = error;
qch = addOne(qch, result);
*qch++ = TQChar(0xdbff);
diff --git a/src/tools/tqstring.h b/src/tools/tqstring.h
index 03fcf9459..c29a9c392 100644
--- a/src/tools/tqstring.h
+++ b/src/tools/tqstring.h
@@ -222,6 +222,14 @@ public:
bool isDigit() const;
bool isSymbol() const;
+ // Surrogate pairs support
+ bool isHighSurrogate() const;
+ bool isLowSurrogate() const;
+ static bool requiresSurrogates(uint ucs4);
+ static ushort highSurrogate(uint ucs4);
+ static ushort lowSurrogate(uint ucs4);
+ static uint surrogateToUcs4(const TQChar &high, const TQChar &low);
+
uchar cell() const { return ((uchar) ucs & 0xff); }
uchar row() const { return ((uchar) (ucs>>8)&0xff); }
void setCell( uchar cell ) { ucs = (ucs & 0xff00) + cell; }
@@ -313,6 +321,36 @@ inline TQChar::TQChar( int rc ) : ucs( (ushort) (rc & 0xffff) )
{
}
+inline bool TQChar::isHighSurrogate() const
+{
+ return ((ucs & 0xfc00) == 0xd800);
+}
+
+inline bool TQChar::isLowSurrogate() const
+{
+ return ((ucs & 0xfc00) == 0xdc00);
+}
+
+inline bool TQChar::requiresSurrogates(uint ucs4)
+{
+ return (ucs4 >= 0x10000);
+}
+
+inline ushort TQChar::highSurrogate(uint ucs4)
+{
+ return ushort(((ucs4 - 0x10000) >> 10)) | 0xd800;
+}
+
+inline ushort TQChar::lowSurrogate(uint ucs4)
+{
+ return ushort(ucs4 & 0x03FF) | 0xdc00;
+}
+
+inline uint TQChar::surrogateToUcs4(const TQChar &high, const TQChar &low)
+{
+ return (uint(high.ucs & 0x03FF) << 10) | (low.ucs & 0x03FF) | 0x10000;
+}
+
inline bool operator==( char ch, TQChar c )
{
return ((uchar) ch) == c.ucs;
@@ -806,6 +844,11 @@ public:
bool isNumber() const { return s.constref(p).isNumber(); }
bool isLetterOrNumber() { return s.constref(p).isLetterOrNumber(); }
bool isDigit() const { return s.constref(p).isDigit(); }
+ bool isSymbol() const { return s.constref(p).isSymbol(); }
+
+ // Surrogate pairs support
+ bool isHighSurrogate() const { return s.constref(p).isHighSurrogate(); }
+ bool isLowSurrogate() const { return s.constref(p).isLowSurrogate(); }
int digitValue() const { return s.constref(p).digitValue(); }
TQChar lower() const { return s.constref(p).lower(); }