* More TQt/Qt4 features

* Various compilation fixes for Slackware git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/dependencies/tqtinterface@1170159 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
author: tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2010-08-30 23:26:07 +0000
committer: tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2010-08-30 23:26:07 +0000
commit: e5e5db14bf9a12b17fefe650fface82bb250aaec (patch)
tree: 04c3848e58635eaa773ef7d85a4ed24597be33dd /qtinterface/tqtextcodec.cpp
parent: f7c45454128a78405313e1ebb86ef5f597481ebe (diff)
download: tqtinterface-e5e5db14bf9a12b17fefe650fface82bb250aaec.tar.gz
tqtinterface-e5e5db14bf9a12b17fefe650fface82bb250aaec.zip
1 files changed, 492 insertions, 0 deletions
diff --git a/qtinterface/tqtextcodec.cpp b/qtinterface/tqtextcodec.cpp
index 7958168..6e047a5 100644
--- a/qtinterface/tqtextcodec.cpp
+++ b/qtinterface/tqtextcodec.cpp
@@ -21,3 +21,495 @@ Boston, MA 02110-1301, USA.
 
 #include <tqt.h>
 #include <tqtextcodec.h>
+
+#ifdef USE_QT4
+
+// returns a string containing the letters and numbers from input,
+// with a space separating run of a character class. e.g. "iso8859-1"
+// becomes "iso 8859 1"
+static QString lettersAndNumbers( const char * input )
+{
+    QString result;
+    QChar c;
+
+    while( input && *input ) {
+	c = *input;
+ 	if ( c.isLetter() || c.isNumber() )
+ 	    result += c.lower();
+	if ( input[1] ) {
+	    // add space at character class transition, except
+	    // transition from upper-case to lower-case letter
+	    QChar n( input[1] );
+	    if ( c.isLetter() && n.isLetter() ) {
+		if ( c == c.lower() && n == n.upper() )
+		    result += ' ';
+	    } else if ( c.category() != n.category() ) {
+		result += ' ';
+	    }
+	}
+	input++;
+    }
+    return result.simplifyWhiteSpace();
+}
+
+#define CHAINED 0xffff
+
+struct QMultiByteUnicodeTable {
+    // If multiByte, ignore unicode and index into multiByte
+    //  with the next character.
+    QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
+
+    ~QMultiByteUnicodeTable()
+    {
+	if ( multiByte )
+	    delete [] multiByte;
+    }
+
+    ushort unicode;
+    QMultiByteUnicodeTable* multiByte;
+};
+
+static int getByte(char* &cursor)
+{
+    int byte = 0;
+    if ( *cursor ) {
+	if ( cursor[1] == 'x' )
+	    byte = strtol(cursor+2,&cursor,16);
+	else if ( cursor[1] == 'd' )
+	    byte = strtol(cursor+2,&cursor,10);
+	else
+	    byte = strtol(cursor+2,&cursor,8);
+    }
+    return byte&0xff;
+}
+
+class QTextCodecFromIOD;
+
+class QTextCodecFromIODDecoder : public QTextDecoder {
+    const QTextCodecFromIOD* codec;
+    QMultiByteUnicodeTable* mb;
+public:
+    QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
+    //QString toUnicode(const char* chars, int len);
+    QString convertToUnicode(const char* chars, int len, int *state);
+};
+
+class QTextCodecFromIOD : public QTextCodec {
+    friend class QTextCodecFromIODDecoder;
+
+    TQCString n;
+
+    // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
+    //  use from_unicode_page_multiByte[row()][cell()] as string.
+    char** from_unicode_page;
+    char*** from_unicode_page_multiByte;
+    char unkn;
+
+    // Only one of these is used
+    ushort* to_unicode;
+    QMultiByteUnicodeTable* to_unicode_multiByte;
+    int max_bytes_per_char;
+    TQStrList aliases;
+
+    bool stateless() const { return !to_unicode_multiByte; }
+
+public:
+    QTextCodecFromIOD(QIODevice* iod)
+    {
+	from_unicode_page = 0;
+	to_unicode_multiByte = 0;
+	to_unicode = 0;
+	from_unicode_page_multiByte = 0;
+	max_bytes_per_char = 1;
+
+	const int maxlen=100;
+	char line[maxlen];
+	char esc='\\';
+	char comm='%';
+	bool incmap = FALSE;
+	while (iod->readLine(line,maxlen) > 0) {
+	    if (0==qstrnicmp(line,"<code_set_name>",15))
+		n = line+15;
+	    else if (0==qstrnicmp(line,"<escape_char> ",14))
+		esc = line[14];
+	    else if (0==qstrnicmp(line,"<comment_char> ",15))
+		comm = line[15];
+	    else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
+		aliases.append(line+8);
+	    } else if (0==qstrnicmp(line,"CHARMAP",7)) {
+		if (!from_unicode_page) {
+		    from_unicode_page = new char*[256];
+		    for (int i=0; i<256; i++)
+			from_unicode_page[i]=0;
+		}
+		if (!to_unicode) {
+		    to_unicode = new ushort[256];
+		}
+		incmap = TRUE;
+	    } else if (0==qstrnicmp(line,"END CHARMAP",11))
+		break;
+	    else if (incmap) {
+		char* cursor = line;
+		int byte=-1,unicode=-1;
+		ushort* mb_unicode=0;
+		const int maxmb=8; // more -> we'll need to improve datastructures
+		char mb[maxmb+1];
+		int nmb=0;
+
+		while (*cursor) {
+		    if (cursor[0]=='<' && cursor[1]=='U' &&
+			cursor[2]>='0' && cursor[2]<='9' &&
+			cursor[3]>='0' && cursor[3]<='9') {
+
+			unicode = strtol(cursor+2,&cursor,16);
+
+		    } else if (*cursor==esc) {
+
+			byte = getByte(cursor);
+
+			if ( *cursor == esc ) {
+			    if ( !to_unicode_multiByte ) {
+				to_unicode_multiByte =
+				    new QMultiByteUnicodeTable[256];
+				for (int i=0; i<256; i++) {
+				    to_unicode_multiByte[i].unicode =
+					to_unicode[i];
+				    to_unicode_multiByte[i].multiByte = 0;
+				}
+				delete [] to_unicode;
+				to_unicode = 0;
+			    }
+			    QMultiByteUnicodeTable* mbut =
+				to_unicode_multiByte+byte;
+			    mb[nmb++] = byte;
+			    while ( nmb < maxmb && *cursor == esc ) {
+				// Always at least once
+
+				mbut->unicode = CHAINED;
+				byte = getByte(cursor);
+				mb[nmb++] = byte;
+				if (!mbut->multiByte) {
+				    mbut->multiByte =
+					new QMultiByteUnicodeTable[256];
+				}
+				mbut = mbut->multiByte+byte;
+				mb_unicode = & mbut->unicode;
+			    }
+
+			    if ( nmb > max_bytes_per_char )
+				max_bytes_per_char = nmb;
+			}
+		    } else {
+			cursor++;
+		    }
+		}
+
+		if (unicode >= 0 && unicode <= 0xffff)
+		{
+		    QChar ch((ushort)unicode);
+		    if (!from_unicode_page[ch.row()]) {
+			from_unicode_page[ch.row()] = new char[256];
+			for (int i=0; i<256; i++)
+			    from_unicode_page[ch.row()][i]=0;
+		    }
+		    if ( mb_unicode ) {
+			from_unicode_page[ch.row()][ch.cell()] = 0;
+			if (!from_unicode_page_multiByte) {
+			    from_unicode_page_multiByte = new char**[256];
+			    for (int i=0; i<256; i++)
+				from_unicode_page_multiByte[i]=0;
+			}
+			if (!from_unicode_page_multiByte[ch.row()]) {
+			    from_unicode_page_multiByte[ch.row()] = new char*[256];
+			    for (int i=0; i<256; i++)
+				from_unicode_page_multiByte[ch.row()][i] = 0;
+			}
+			mb[nmb++] = 0;
+			from_unicode_page_multiByte[ch.row()][ch.cell()]
+			    = qstrdup(mb);
+			*mb_unicode = unicode;
+		    } else {
+			from_unicode_page[ch.row()][ch.cell()] = (char)byte;
+			if ( to_unicode )
+			    to_unicode[byte] = unicode;
+			else
+			    to_unicode_multiByte[byte].unicode = unicode;
+		    }
+		} else {
+		}
+	    }
+	}
+	n = n.stripWhiteSpace();
+
+	unkn = '?'; // ##### Might be a bad choice.
+    }
+
+    ~QTextCodecFromIOD()
+    {
+	if ( from_unicode_page ) {
+	    for (int i=0; i<256; i++)
+		if (from_unicode_page[i])
+		    delete [] from_unicode_page[i];
+	}
+	if ( from_unicode_page_multiByte ) {
+	    for (int i=0; i<256; i++)
+		if (from_unicode_page_multiByte[i])
+		    for (int j=0; j<256; j++)
+			if (from_unicode_page_multiByte[i][j])
+			    delete [] from_unicode_page_multiByte[i][j];
+	}
+	if ( to_unicode )
+	    delete [] to_unicode;
+	if ( to_unicode_multiByte )
+	    delete [] to_unicode_multiByte;
+    }
+
+    bool ok() const
+    {
+	return !!from_unicode_page;
+    }
+
+    QTextDecoder* makeDecoder() const
+    {
+	if ( stateless() )
+	    return QTextCodec::makeDecoder();
+	else
+	    return new QTextCodecFromIODDecoder(this);
+    }
+
+    const char* qtio_name() const
+    {
+	return n;
+    }
+
+    int mibEnum() const
+    {
+	return 0; // #### Unknown.
+    }
+
+    int heuristicContentMatch(const char*, int) const
+    {
+	return 0;
+    }
+
+    int heuristicNameMatch(const char* hint) const
+    {
+	int bestr = QTextCodec::heuristicNameMatch(hint);
+	TQStrListIterator it(aliases);
+	char* a;
+	while ((a=it.current())) {
+	    ++it;
+	    int r = simpleHeuristicNameMatch(a,hint);
+	    if (r > bestr)
+		bestr = r;
+	}
+	return bestr;
+    }
+
+    QString toUnicode(const char* chars, int len) const
+    {
+	const uchar* uchars = (const uchar*)chars;
+	QString result;
+	QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
+	if ( multiByte ) {
+	    while (len--) {
+		QMultiByteUnicodeTable& mb = multiByte[*uchars];
+		if ( mb.multiByte ) {
+		    // Chained multi-byte
+		    multiByte = mb.multiByte;
+		} else {
+		    result += QChar(mb.unicode);
+		    multiByte=to_unicode_multiByte;
+		}
+		uchars++;
+	    }
+	} else {
+	    while (len--)
+		result += QChar(to_unicode[*uchars++]);
+	}
+	return result;
+    }
+
+    QString convertToUnicode(const char* chars, int len, ConverterState *state) const
+    {
+	return toUnicode(chars, len);
+    }
+
+#if !defined(Q_NO_USING_KEYWORD)
+   using QTextCodec::fromUnicode;
+#endif
+   TQCString fromUnicode(const QString& uc, int& lenInOut) const
+    {
+	if (lenInOut > (int)uc.length())
+	    lenInOut = uc.length();
+	int rlen = lenInOut*max_bytes_per_char;
+	TQCString rstr(rlen+1);
+	char* cursor = rstr.data();
+	char* s=0;
+	int l = lenInOut;
+	int lout = 0;
+	for (int i=0; i<l; i++) {
+	    QChar ch = uc[i];
+	    if ( ch == QChar() ) {
+		// special
+		*cursor++ = 0;
+	    } else if ( from_unicode_page[ch.row()] &&
+		from_unicode_page[ch.row()][ch.cell()] )
+	    {
+		*cursor++ = from_unicode_page[ch.row()][ch.cell()];
+		lout++;
+	    } else if ( from_unicode_page_multiByte &&
+		      from_unicode_page_multiByte[ch.row()] &&
+		      (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
+	    {
+		while (*s) {
+		    *cursor++ = *s++;
+		    lout++;
+		}
+	    } else {
+		*cursor++ = unkn;
+		lout++;
+	    }
+	}
+	*cursor = 0;
+	lenInOut = lout;
+	return rstr;
+    }
+
+    QByteArray convertFromUnicode(const QChar *charin, int len, ConverterState *state) const
+    {
+	return fromUnicode(charin, len);
+    }
+
+    QByteArray name() const
+    {
+	return qtio_name();
+    }
+};
+
+// QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
+//     codec(c)
+// {
+//     mb = codec->to_unicode_multiByte;
+// }
+
+QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state)
+{
+    const uchar* uchars = (const uchar*)chars;
+    QString result;
+    while (len--) {
+	QMultiByteUnicodeTable& t = mb[*uchars];
+	if ( t.multiByte ) {
+	    // Chained multi-byte
+	    mb = t.multiByte;
+	} else {
+	    if ( t.unicode )
+		result += QChar(t.unicode);
+	    mb=codec->to_unicode_multiByte;
+	}
+	uchars++;
+    }
+    return result;
+}
+
+#ifndef QT_NO_CODECS
+// Cannot use <pre> or \code
+/*!
+    Reads a POSIX2 charmap definition from \a iod.
+    The parser recognizes the following lines:
+
+<font name="sans">
+&nbsp;&nbsp;&lt;code_set_name&gt; <i>name</i></br>
+&nbsp;&nbsp;&lt;escape_char&gt; <i>character</i></br>
+&nbsp;&nbsp;% alias <i>alias</i></br>
+&nbsp;&nbsp;CHARMAP</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;END CHARMAP</br>
+</font>
+
+    The resulting QTextCodec is returned (and also added to the global
+    list of codecs). The name() of the result is taken from the
+    code_set_name.
+
+    Note that a codec constructed in this way uses much more memory
+    and is slower than a hand-written QTextCodec subclass, since
+    tables in code are kept in memory shared by all Qt applications.
+
+    \sa loadCharmapFile()
+*/
+QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
+{
+    QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
+    if ( !r->ok() ) {
+	delete r;
+	r = 0;
+    }
+    return r;
+}
+
+/*!
+    A convenience function for loadCharmap() that loads the charmap
+    definition from the file \a filename.
+*/
+QTextCodec* QTextCodec::loadCharmapFile(QString filename)
+{
+    QFile f(filename);
+    if (f.open(IO_ReadOnly)) {
+	QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
+	if ( !r->ok() )
+	    delete r;
+	else
+	    return r;
+    }
+    return 0;
+}
+
+/*!
+    Returns a value indicating how likely it is that this decoder is
+    appropriate for decoding some format that has the given name. The
+    name is compared with the \a hint.
+
+    A good match returns a positive number around the length of the
+    string. A bad match is negative.
+
+    The default implementation calls simpleHeuristicNameMatch() with
+    the name of the codec.
+*/
+int QTextCodec::heuristicNameMatch(const char* hint) const
+{
+    return simpleHeuristicNameMatch(name(),hint);
+}
+
+/*!
+    A simple utility function for heuristicNameMatch(): it does some
+    very minor character-skipping so that almost-exact matches score
+    high. \a name is the text we're matching and \a hint is used for
+    the comparison.
+*/
+int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
+{
+    // if they're the same, return a perfect score.
+    if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
+	return qstrlen( hint );
+
+    // if the letters and numbers are the same, we have an "almost"
+    // perfect match.
+    QString h( lettersAndNumbers( hint ) );
+    QString n( lettersAndNumbers( name ) );
+    if ( h == n )
+	return qstrlen( hint )-1;
+
+    if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
+	return qstrlen( hint )-2;
+
+    // could do some more here, but I don't think it's worth it
+
+    return 0;
+}
+
+#endif //QT_NO_CODECS
+
+#endif // USE_QT4
+\ No newline at end of file
author	tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2010-08-30 23:26:07 +0000
committer	tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2010-08-30 23:26:07 +0000
commit	e5e5db14bf9a12b17fefe650fface82bb250aaec (patch)
tree	04c3848e58635eaa773ef7d85a4ed24597be33dd /qtinterface/tqtextcodec.cpp
parent	f7c45454128a78405313e1ebb86ef5f597481ebe (diff)
download	tqtinterface-e5e5db14bf9a12b17fefe650fface82bb250aaec.tar.gz tqtinterface-e5e5db14bf9a12b17fefe650fface82bb250aaec.zip