diff options
Diffstat (limited to 'qtinterface/tqtextcodec.cpp')
-rw-r--r-- | qtinterface/tqtextcodec.cpp | 515 |
1 files changed, 0 insertions, 515 deletions
diff --git a/qtinterface/tqtextcodec.cpp b/qtinterface/tqtextcodec.cpp deleted file mode 100644 index 6e047a5..0000000 --- a/qtinterface/tqtextcodec.cpp +++ /dev/null @@ -1,515 +0,0 @@ -/* - -Copyright (C) 2010 Timothy Pearson <[email protected]> - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public -License as published by the Free Software Foundation; either -version 2 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public License -along with this library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. - -*/ - -#include <tqt.h> -#include <tqtextcodec.h> - -#ifdef USE_QT4 - -// returns a string containing the letters and numbers from input, -// with a space separating run of a character class. e.g. "iso8859-1" -// becomes "iso 8859 1" -static QString lettersAndNumbers( const char * input ) -{ - QString result; - QChar c; - - while( input && *input ) { - c = *input; - if ( c.isLetter() || c.isNumber() ) - result += c.lower(); - if ( input[1] ) { - // add space at character class transition, except - // transition from upper-case to lower-case letter - QChar n( input[1] ); - if ( c.isLetter() && n.isLetter() ) { - if ( c == c.lower() && n == n.upper() ) - result += ' '; - } else if ( c.category() != n.category() ) { - result += ' '; - } - } - input++; - } - return result.simplifyWhiteSpace(); -} - -#define CHAINED 0xffff - -struct QMultiByteUnicodeTable { - // If multiByte, ignore unicode and index into multiByte - // with the next character. - QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { } - - ~QMultiByteUnicodeTable() - { - if ( multiByte ) - delete [] multiByte; - } - - ushort unicode; - QMultiByteUnicodeTable* multiByte; -}; - -static int getByte(char* &cursor) -{ - int byte = 0; - if ( *cursor ) { - if ( cursor[1] == 'x' ) - byte = strtol(cursor+2,&cursor,16); - else if ( cursor[1] == 'd' ) - byte = strtol(cursor+2,&cursor,10); - else - byte = strtol(cursor+2,&cursor,8); - } - return byte&0xff; -} - -class QTextCodecFromIOD; - -class QTextCodecFromIODDecoder : public QTextDecoder { - const QTextCodecFromIOD* codec; - QMultiByteUnicodeTable* mb; -public: - QTextCodecFromIODDecoder(const QTextCodecFromIOD* c); - //QString toUnicode(const char* chars, int len); - QString convertToUnicode(const char* chars, int len, int *state); -}; - -class QTextCodecFromIOD : public QTextCodec { - friend class QTextCodecFromIODDecoder; - - TQCString n; - - // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte, - // use from_unicode_page_multiByte[row()][cell()] as string. - char** from_unicode_page; - char*** from_unicode_page_multiByte; - char unkn; - - // Only one of these is used - ushort* to_unicode; - QMultiByteUnicodeTable* to_unicode_multiByte; - int max_bytes_per_char; - TQStrList aliases; - - bool stateless() const { return !to_unicode_multiByte; } - -public: - QTextCodecFromIOD(QIODevice* iod) - { - from_unicode_page = 0; - to_unicode_multiByte = 0; - to_unicode = 0; - from_unicode_page_multiByte = 0; - max_bytes_per_char = 1; - - const int maxlen=100; - char line[maxlen]; - char esc='\\'; - char comm='%'; - bool incmap = FALSE; - while (iod->readLine(line,maxlen) > 0) { - if (0==qstrnicmp(line,"<code_set_name>",15)) - n = line+15; - else if (0==qstrnicmp(line,"<escape_char> ",14)) - esc = line[14]; - else if (0==qstrnicmp(line,"<comment_char> ",15)) - comm = line[15]; - else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) { - aliases.append(line+8); - } else if (0==qstrnicmp(line,"CHARMAP",7)) { - if (!from_unicode_page) { - from_unicode_page = new char*[256]; - for (int i=0; i<256; i++) - from_unicode_page[i]=0; - } - if (!to_unicode) { - to_unicode = new ushort[256]; - } - incmap = TRUE; - } else if (0==qstrnicmp(line,"END CHARMAP",11)) - break; - else if (incmap) { - char* cursor = line; - int byte=-1,unicode=-1; - ushort* mb_unicode=0; - const int maxmb=8; // more -> we'll need to improve datastructures - char mb[maxmb+1]; - int nmb=0; - - while (*cursor) { - if (cursor[0]=='<' && cursor[1]=='U' && - cursor[2]>='0' && cursor[2]<='9' && - cursor[3]>='0' && cursor[3]<='9') { - - unicode = strtol(cursor+2,&cursor,16); - - } else if (*cursor==esc) { - - byte = getByte(cursor); - - if ( *cursor == esc ) { - if ( !to_unicode_multiByte ) { - to_unicode_multiByte = - new QMultiByteUnicodeTable[256]; - for (int i=0; i<256; i++) { - to_unicode_multiByte[i].unicode = - to_unicode[i]; - to_unicode_multiByte[i].multiByte = 0; - } - delete [] to_unicode; - to_unicode = 0; - } - QMultiByteUnicodeTable* mbut = - to_unicode_multiByte+byte; - mb[nmb++] = byte; - while ( nmb < maxmb && *cursor == esc ) { - // Always at least once - - mbut->unicode = CHAINED; - byte = getByte(cursor); - mb[nmb++] = byte; - if (!mbut->multiByte) { - mbut->multiByte = - new QMultiByteUnicodeTable[256]; - } - mbut = mbut->multiByte+byte; - mb_unicode = & mbut->unicode; - } - - if ( nmb > max_bytes_per_char ) - max_bytes_per_char = nmb; - } - } else { - cursor++; - } - } - - if (unicode >= 0 && unicode <= 0xffff) - { - QChar ch((ushort)unicode); - if (!from_unicode_page[ch.row()]) { - from_unicode_page[ch.row()] = new char[256]; - for (int i=0; i<256; i++) - from_unicode_page[ch.row()][i]=0; - } - if ( mb_unicode ) { - from_unicode_page[ch.row()][ch.cell()] = 0; - if (!from_unicode_page_multiByte) { - from_unicode_page_multiByte = new char**[256]; - for (int i=0; i<256; i++) - from_unicode_page_multiByte[i]=0; - } - if (!from_unicode_page_multiByte[ch.row()]) { - from_unicode_page_multiByte[ch.row()] = new char*[256]; - for (int i=0; i<256; i++) - from_unicode_page_multiByte[ch.row()][i] = 0; - } - mb[nmb++] = 0; - from_unicode_page_multiByte[ch.row()][ch.cell()] - = qstrdup(mb); - *mb_unicode = unicode; - } else { - from_unicode_page[ch.row()][ch.cell()] = (char)byte; - if ( to_unicode ) - to_unicode[byte] = unicode; - else - to_unicode_multiByte[byte].unicode = unicode; - } - } else { - } - } - } - n = n.stripWhiteSpace(); - - unkn = '?'; // ##### Might be a bad choice. - } - - ~QTextCodecFromIOD() - { - if ( from_unicode_page ) { - for (int i=0; i<256; i++) - if (from_unicode_page[i]) - delete [] from_unicode_page[i]; - } - if ( from_unicode_page_multiByte ) { - for (int i=0; i<256; i++) - if (from_unicode_page_multiByte[i]) - for (int j=0; j<256; j++) - if (from_unicode_page_multiByte[i][j]) - delete [] from_unicode_page_multiByte[i][j]; - } - if ( to_unicode ) - delete [] to_unicode; - if ( to_unicode_multiByte ) - delete [] to_unicode_multiByte; - } - - bool ok() const - { - return !!from_unicode_page; - } - - QTextDecoder* makeDecoder() const - { - if ( stateless() ) - return QTextCodec::makeDecoder(); - else - return new QTextCodecFromIODDecoder(this); - } - - const char* qtio_name() const - { - return n; - } - - int mibEnum() const - { - return 0; // #### Unknown. - } - - int heuristicContentMatch(const char*, int) const - { - return 0; - } - - int heuristicNameMatch(const char* hint) const - { - int bestr = QTextCodec::heuristicNameMatch(hint); - TQStrListIterator it(aliases); - char* a; - while ((a=it.current())) { - ++it; - int r = simpleHeuristicNameMatch(a,hint); - if (r > bestr) - bestr = r; - } - return bestr; - } - - QString toUnicode(const char* chars, int len) const - { - const uchar* uchars = (const uchar*)chars; - QString result; - QMultiByteUnicodeTable* multiByte=to_unicode_multiByte; - if ( multiByte ) { - while (len--) { - QMultiByteUnicodeTable& mb = multiByte[*uchars]; - if ( mb.multiByte ) { - // Chained multi-byte - multiByte = mb.multiByte; - } else { - result += QChar(mb.unicode); - multiByte=to_unicode_multiByte; - } - uchars++; - } - } else { - while (len--) - result += QChar(to_unicode[*uchars++]); - } - return result; - } - - QString convertToUnicode(const char* chars, int len, ConverterState *state) const - { - return toUnicode(chars, len); - } - -#if !defined(Q_NO_USING_KEYWORD) - using QTextCodec::fromUnicode; -#endif - TQCString fromUnicode(const QString& uc, int& lenInOut) const - { - if (lenInOut > (int)uc.length()) - lenInOut = uc.length(); - int rlen = lenInOut*max_bytes_per_char; - TQCString rstr(rlen+1); - char* cursor = rstr.data(); - char* s=0; - int l = lenInOut; - int lout = 0; - for (int i=0; i<l; i++) { - QChar ch = uc[i]; - if ( ch == QChar() ) { - // special - *cursor++ = 0; - } else if ( from_unicode_page[ch.row()] && - from_unicode_page[ch.row()][ch.cell()] ) - { - *cursor++ = from_unicode_page[ch.row()][ch.cell()]; - lout++; - } else if ( from_unicode_page_multiByte && - from_unicode_page_multiByte[ch.row()] && - (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) ) - { - while (*s) { - *cursor++ = *s++; - lout++; - } - } else { - *cursor++ = unkn; - lout++; - } - } - *cursor = 0; - lenInOut = lout; - return rstr; - } - - QByteArray convertFromUnicode(const QChar *charin, int len, ConverterState *state) const - { - return fromUnicode(charin, len); - } - - QByteArray name() const - { - return qtio_name(); - } -}; - -// QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) : -// codec(c) -// { -// mb = codec->to_unicode_multiByte; -// } - -QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state) -{ - const uchar* uchars = (const uchar*)chars; - QString result; - while (len--) { - QMultiByteUnicodeTable& t = mb[*uchars]; - if ( t.multiByte ) { - // Chained multi-byte - mb = t.multiByte; - } else { - if ( t.unicode ) - result += QChar(t.unicode); - mb=codec->to_unicode_multiByte; - } - uchars++; - } - return result; -} - -#ifndef QT_NO_CODECS -// Cannot use <pre> or \code -/*! - Reads a POSIX2 charmap definition from \a iod. - The parser recognizes the following lines: - -<font name="sans"> - <code_set_name> <i>name</i></br> - <escape_char> <i>character</i></br> - % alias <i>alias</i></br> - CHARMAP</br> - <<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br> - <<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br> - <<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br> - <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br> - END CHARMAP</br> -</font> - - The resulting QTextCodec is returned (and also added to the global - list of codecs). The name() of the result is taken from the - code_set_name. - - Note that a codec constructed in this way uses much more memory - and is slower than a hand-written QTextCodec subclass, since - tables in code are kept in memory shared by all Qt applications. - - \sa loadCharmapFile() -*/ -QTextCodec* QTextCodec::loadCharmap(QIODevice* iod) -{ - QTextCodecFromIOD* r = new QTextCodecFromIOD(iod); - if ( !r->ok() ) { - delete r; - r = 0; - } - return r; -} - -/*! - A convenience function for loadCharmap() that loads the charmap - definition from the file \a filename. -*/ -QTextCodec* QTextCodec::loadCharmapFile(QString filename) -{ - QFile f(filename); - if (f.open(IO_ReadOnly)) { - QTextCodecFromIOD* r = new QTextCodecFromIOD(&f); - if ( !r->ok() ) - delete r; - else - return r; - } - return 0; -} - -/*! - Returns a value indicating how likely it is that this decoder is - appropriate for decoding some format that has the given name. The - name is compared with the \a hint. - - A good match returns a positive number around the length of the - string. A bad match is negative. - - The default implementation calls simpleHeuristicNameMatch() with - the name of the codec. -*/ -int QTextCodec::heuristicNameMatch(const char* hint) const -{ - return simpleHeuristicNameMatch(name(),hint); -} - -/*! - A simple utility function for heuristicNameMatch(): it does some - very minor character-skipping so that almost-exact matches score - high. \a name is the text we're matching and \a hint is used for - the comparison. -*/ -int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) -{ - // if they're the same, return a perfect score. - if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 ) - return qstrlen( hint ); - - // if the letters and numbers are the same, we have an "almost" - // perfect match. - QString h( lettersAndNumbers( hint ) ); - QString n( lettersAndNumbers( name ) ); - if ( h == n ) - return qstrlen( hint )-1; - - if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) - return qstrlen( hint )-2; - - // could do some more here, but I don't think it's worth it - - return 0; -} - -#endif //QT_NO_CODECS - -#endif // USE_QT4
\ No newline at end of file |