summaryrefslogtreecommitdiffstats
path: root/qtinterface/tqtextcodec.cpp
diff options
context:
space:
mode:
authorTimothy Pearson <[email protected]>2012-01-26 20:43:47 -0600
committerTimothy Pearson <[email protected]>2012-01-26 20:43:47 -0600
commit27917305452f2a55ae3be74e83b8a724248ec43f (patch)
tree0bfa420031e546c3cfba2a63e92d7d4191d885b6 /qtinterface/tqtextcodec.cpp
parentdc87fbcfcf77bc9bed86b9ec03aa8163a7bf15d4 (diff)
downloadtqtinterface-27917305452f2a55ae3be74e83b8a724248ec43f.tar.gz
tqtinterface-27917305452f2a55ae3be74e83b8a724248ec43f.zip
Split out qt3 and tqt3 files
Diffstat (limited to 'qtinterface/tqtextcodec.cpp')
-rw-r--r--qtinterface/tqtextcodec.cpp515
1 files changed, 0 insertions, 515 deletions
diff --git a/qtinterface/tqtextcodec.cpp b/qtinterface/tqtextcodec.cpp
deleted file mode 100644
index 6e047a5..0000000
--- a/qtinterface/tqtextcodec.cpp
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
-
-Copyright (C) 2010 Timothy Pearson <[email protected]>
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public
-License as published by the Free Software Foundation; either
-version 2 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public License
-along with this library; see the file COPYING.LIB. If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.
-
-*/
-
-#include <tqt.h>
-#include <tqtextcodec.h>
-
-#ifdef USE_QT4
-
-// returns a string containing the letters and numbers from input,
-// with a space separating run of a character class. e.g. "iso8859-1"
-// becomes "iso 8859 1"
-static QString lettersAndNumbers( const char * input )
-{
- QString result;
- QChar c;
-
- while( input && *input ) {
- c = *input;
- if ( c.isLetter() || c.isNumber() )
- result += c.lower();
- if ( input[1] ) {
- // add space at character class transition, except
- // transition from upper-case to lower-case letter
- QChar n( input[1] );
- if ( c.isLetter() && n.isLetter() ) {
- if ( c == c.lower() && n == n.upper() )
- result += ' ';
- } else if ( c.category() != n.category() ) {
- result += ' ';
- }
- }
- input++;
- }
- return result.simplifyWhiteSpace();
-}
-
-#define CHAINED 0xffff
-
-struct QMultiByteUnicodeTable {
- // If multiByte, ignore unicode and index into multiByte
- // with the next character.
- QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
-
- ~QMultiByteUnicodeTable()
- {
- if ( multiByte )
- delete [] multiByte;
- }
-
- ushort unicode;
- QMultiByteUnicodeTable* multiByte;
-};
-
-static int getByte(char* &cursor)
-{
- int byte = 0;
- if ( *cursor ) {
- if ( cursor[1] == 'x' )
- byte = strtol(cursor+2,&cursor,16);
- else if ( cursor[1] == 'd' )
- byte = strtol(cursor+2,&cursor,10);
- else
- byte = strtol(cursor+2,&cursor,8);
- }
- return byte&0xff;
-}
-
-class QTextCodecFromIOD;
-
-class QTextCodecFromIODDecoder : public QTextDecoder {
- const QTextCodecFromIOD* codec;
- QMultiByteUnicodeTable* mb;
-public:
- QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
- //QString toUnicode(const char* chars, int len);
- QString convertToUnicode(const char* chars, int len, int *state);
-};
-
-class QTextCodecFromIOD : public QTextCodec {
- friend class QTextCodecFromIODDecoder;
-
- TQCString n;
-
- // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
- // use from_unicode_page_multiByte[row()][cell()] as string.
- char** from_unicode_page;
- char*** from_unicode_page_multiByte;
- char unkn;
-
- // Only one of these is used
- ushort* to_unicode;
- QMultiByteUnicodeTable* to_unicode_multiByte;
- int max_bytes_per_char;
- TQStrList aliases;
-
- bool stateless() const { return !to_unicode_multiByte; }
-
-public:
- QTextCodecFromIOD(QIODevice* iod)
- {
- from_unicode_page = 0;
- to_unicode_multiByte = 0;
- to_unicode = 0;
- from_unicode_page_multiByte = 0;
- max_bytes_per_char = 1;
-
- const int maxlen=100;
- char line[maxlen];
- char esc='\\';
- char comm='%';
- bool incmap = FALSE;
- while (iod->readLine(line,maxlen) > 0) {
- if (0==qstrnicmp(line,"<code_set_name>",15))
- n = line+15;
- else if (0==qstrnicmp(line,"<escape_char> ",14))
- esc = line[14];
- else if (0==qstrnicmp(line,"<comment_char> ",15))
- comm = line[15];
- else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
- aliases.append(line+8);
- } else if (0==qstrnicmp(line,"CHARMAP",7)) {
- if (!from_unicode_page) {
- from_unicode_page = new char*[256];
- for (int i=0; i<256; i++)
- from_unicode_page[i]=0;
- }
- if (!to_unicode) {
- to_unicode = new ushort[256];
- }
- incmap = TRUE;
- } else if (0==qstrnicmp(line,"END CHARMAP",11))
- break;
- else if (incmap) {
- char* cursor = line;
- int byte=-1,unicode=-1;
- ushort* mb_unicode=0;
- const int maxmb=8; // more -> we'll need to improve datastructures
- char mb[maxmb+1];
- int nmb=0;
-
- while (*cursor) {
- if (cursor[0]=='<' && cursor[1]=='U' &&
- cursor[2]>='0' && cursor[2]<='9' &&
- cursor[3]>='0' && cursor[3]<='9') {
-
- unicode = strtol(cursor+2,&cursor,16);
-
- } else if (*cursor==esc) {
-
- byte = getByte(cursor);
-
- if ( *cursor == esc ) {
- if ( !to_unicode_multiByte ) {
- to_unicode_multiByte =
- new QMultiByteUnicodeTable[256];
- for (int i=0; i<256; i++) {
- to_unicode_multiByte[i].unicode =
- to_unicode[i];
- to_unicode_multiByte[i].multiByte = 0;
- }
- delete [] to_unicode;
- to_unicode = 0;
- }
- QMultiByteUnicodeTable* mbut =
- to_unicode_multiByte+byte;
- mb[nmb++] = byte;
- while ( nmb < maxmb && *cursor == esc ) {
- // Always at least once
-
- mbut->unicode = CHAINED;
- byte = getByte(cursor);
- mb[nmb++] = byte;
- if (!mbut->multiByte) {
- mbut->multiByte =
- new QMultiByteUnicodeTable[256];
- }
- mbut = mbut->multiByte+byte;
- mb_unicode = & mbut->unicode;
- }
-
- if ( nmb > max_bytes_per_char )
- max_bytes_per_char = nmb;
- }
- } else {
- cursor++;
- }
- }
-
- if (unicode >= 0 && unicode <= 0xffff)
- {
- QChar ch((ushort)unicode);
- if (!from_unicode_page[ch.row()]) {
- from_unicode_page[ch.row()] = new char[256];
- for (int i=0; i<256; i++)
- from_unicode_page[ch.row()][i]=0;
- }
- if ( mb_unicode ) {
- from_unicode_page[ch.row()][ch.cell()] = 0;
- if (!from_unicode_page_multiByte) {
- from_unicode_page_multiByte = new char**[256];
- for (int i=0; i<256; i++)
- from_unicode_page_multiByte[i]=0;
- }
- if (!from_unicode_page_multiByte[ch.row()]) {
- from_unicode_page_multiByte[ch.row()] = new char*[256];
- for (int i=0; i<256; i++)
- from_unicode_page_multiByte[ch.row()][i] = 0;
- }
- mb[nmb++] = 0;
- from_unicode_page_multiByte[ch.row()][ch.cell()]
- = qstrdup(mb);
- *mb_unicode = unicode;
- } else {
- from_unicode_page[ch.row()][ch.cell()] = (char)byte;
- if ( to_unicode )
- to_unicode[byte] = unicode;
- else
- to_unicode_multiByte[byte].unicode = unicode;
- }
- } else {
- }
- }
- }
- n = n.stripWhiteSpace();
-
- unkn = '?'; // ##### Might be a bad choice.
- }
-
- ~QTextCodecFromIOD()
- {
- if ( from_unicode_page ) {
- for (int i=0; i<256; i++)
- if (from_unicode_page[i])
- delete [] from_unicode_page[i];
- }
- if ( from_unicode_page_multiByte ) {
- for (int i=0; i<256; i++)
- if (from_unicode_page_multiByte[i])
- for (int j=0; j<256; j++)
- if (from_unicode_page_multiByte[i][j])
- delete [] from_unicode_page_multiByte[i][j];
- }
- if ( to_unicode )
- delete [] to_unicode;
- if ( to_unicode_multiByte )
- delete [] to_unicode_multiByte;
- }
-
- bool ok() const
- {
- return !!from_unicode_page;
- }
-
- QTextDecoder* makeDecoder() const
- {
- if ( stateless() )
- return QTextCodec::makeDecoder();
- else
- return new QTextCodecFromIODDecoder(this);
- }
-
- const char* qtio_name() const
- {
- return n;
- }
-
- int mibEnum() const
- {
- return 0; // #### Unknown.
- }
-
- int heuristicContentMatch(const char*, int) const
- {
- return 0;
- }
-
- int heuristicNameMatch(const char* hint) const
- {
- int bestr = QTextCodec::heuristicNameMatch(hint);
- TQStrListIterator it(aliases);
- char* a;
- while ((a=it.current())) {
- ++it;
- int r = simpleHeuristicNameMatch(a,hint);
- if (r > bestr)
- bestr = r;
- }
- return bestr;
- }
-
- QString toUnicode(const char* chars, int len) const
- {
- const uchar* uchars = (const uchar*)chars;
- QString result;
- QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
- if ( multiByte ) {
- while (len--) {
- QMultiByteUnicodeTable& mb = multiByte[*uchars];
- if ( mb.multiByte ) {
- // Chained multi-byte
- multiByte = mb.multiByte;
- } else {
- result += QChar(mb.unicode);
- multiByte=to_unicode_multiByte;
- }
- uchars++;
- }
- } else {
- while (len--)
- result += QChar(to_unicode[*uchars++]);
- }
- return result;
- }
-
- QString convertToUnicode(const char* chars, int len, ConverterState *state) const
- {
- return toUnicode(chars, len);
- }
-
-#if !defined(Q_NO_USING_KEYWORD)
- using QTextCodec::fromUnicode;
-#endif
- TQCString fromUnicode(const QString& uc, int& lenInOut) const
- {
- if (lenInOut > (int)uc.length())
- lenInOut = uc.length();
- int rlen = lenInOut*max_bytes_per_char;
- TQCString rstr(rlen+1);
- char* cursor = rstr.data();
- char* s=0;
- int l = lenInOut;
- int lout = 0;
- for (int i=0; i<l; i++) {
- QChar ch = uc[i];
- if ( ch == QChar() ) {
- // special
- *cursor++ = 0;
- } else if ( from_unicode_page[ch.row()] &&
- from_unicode_page[ch.row()][ch.cell()] )
- {
- *cursor++ = from_unicode_page[ch.row()][ch.cell()];
- lout++;
- } else if ( from_unicode_page_multiByte &&
- from_unicode_page_multiByte[ch.row()] &&
- (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
- {
- while (*s) {
- *cursor++ = *s++;
- lout++;
- }
- } else {
- *cursor++ = unkn;
- lout++;
- }
- }
- *cursor = 0;
- lenInOut = lout;
- return rstr;
- }
-
- QByteArray convertFromUnicode(const QChar *charin, int len, ConverterState *state) const
- {
- return fromUnicode(charin, len);
- }
-
- QByteArray name() const
- {
- return qtio_name();
- }
-};
-
-// QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
-// codec(c)
-// {
-// mb = codec->to_unicode_multiByte;
-// }
-
-QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state)
-{
- const uchar* uchars = (const uchar*)chars;
- QString result;
- while (len--) {
- QMultiByteUnicodeTable& t = mb[*uchars];
- if ( t.multiByte ) {
- // Chained multi-byte
- mb = t.multiByte;
- } else {
- if ( t.unicode )
- result += QChar(t.unicode);
- mb=codec->to_unicode_multiByte;
- }
- uchars++;
- }
- return result;
-}
-
-#ifndef QT_NO_CODECS
-// Cannot use <pre> or \code
-/*!
- Reads a POSIX2 charmap definition from \a iod.
- The parser recognizes the following lines:
-
-<font name="sans">
-&nbsp;&nbsp;&lt;code_set_name&gt; <i>name</i></br>
-&nbsp;&nbsp;&lt;escape_char&gt; <i>character</i></br>
-&nbsp;&nbsp;% alias <i>alias</i></br>
-&nbsp;&nbsp;CHARMAP</br>
-&nbsp;&nbsp;&lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
-&nbsp;&nbsp;&lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
-&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
-&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...</br>
-&nbsp;&nbsp;END CHARMAP</br>
-</font>
-
- The resulting QTextCodec is returned (and also added to the global
- list of codecs). The name() of the result is taken from the
- code_set_name.
-
- Note that a codec constructed in this way uses much more memory
- and is slower than a hand-written QTextCodec subclass, since
- tables in code are kept in memory shared by all Qt applications.
-
- \sa loadCharmapFile()
-*/
-QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
-{
- QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
- if ( !r->ok() ) {
- delete r;
- r = 0;
- }
- return r;
-}
-
-/*!
- A convenience function for loadCharmap() that loads the charmap
- definition from the file \a filename.
-*/
-QTextCodec* QTextCodec::loadCharmapFile(QString filename)
-{
- QFile f(filename);
- if (f.open(IO_ReadOnly)) {
- QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
- if ( !r->ok() )
- delete r;
- else
- return r;
- }
- return 0;
-}
-
-/*!
- Returns a value indicating how likely it is that this decoder is
- appropriate for decoding some format that has the given name. The
- name is compared with the \a hint.
-
- A good match returns a positive number around the length of the
- string. A bad match is negative.
-
- The default implementation calls simpleHeuristicNameMatch() with
- the name of the codec.
-*/
-int QTextCodec::heuristicNameMatch(const char* hint) const
-{
- return simpleHeuristicNameMatch(name(),hint);
-}
-
-/*!
- A simple utility function for heuristicNameMatch(): it does some
- very minor character-skipping so that almost-exact matches score
- high. \a name is the text we're matching and \a hint is used for
- the comparison.
-*/
-int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
-{
- // if they're the same, return a perfect score.
- if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
- return qstrlen( hint );
-
- // if the letters and numbers are the same, we have an "almost"
- // perfect match.
- QString h( lettersAndNumbers( hint ) );
- QString n( lettersAndNumbers( name ) );
- if ( h == n )
- return qstrlen( hint )-1;
-
- if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
- return qstrlen( hint )-2;
-
- // could do some more here, but I don't think it's worth it
-
- return 0;
-}
-
-#endif //QT_NO_CODECS
-
-#endif // USE_QT4 \ No newline at end of file