1 files changed, 460 insertions, 0 deletions
diff --git a/tdehtml/xml/dom_stringimpl.cpp b/tdehtml/xml/dom_stringimpl.cpp
new file mode 100644
index 000000000..12f1481c0
--- /dev/null
+++ b/tdehtml/xml/dom_stringimpl.cpp
@@ -0,0 +1,460 @@
+/**
+ * This file is part of the DOM implementation for KDE.
+ *
+ * Copyright (C) 1999-2003 Lars Knoll ([email protected])
+ *           (C) 1999 Antti Koivisto ([email protected])
+ *           (C) 2001-2003 Dirk Mueller ( [email protected] )
+ *           (C) 2002 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "dom_stringimpl.h"
+
+#include <kdebug.h>
+
+#include <string.h>
+#include <tqstringlist.h>
+
+using namespace DOM;
+using namespace tdehtml;
+
+
+DOMStringImpl::DOMStringImpl(const char *str)
+{
+    if(str && *str)
+    {
+        l = strlen(str);
+        s = QT_ALLOC_QCHAR_VEC( l );
+        int i = l;
+        TQChar* ptr = s;
+        while( i-- )
+            *ptr++ = *str++;
+    }
+    else
+    {
+        s = QT_ALLOC_QCHAR_VEC( 1 );  // crash protection
+        s[0] = 0x0; // == TQChar::null;
+        l = 0;
+    }
+}
+
+// FIXME: should be a cached flag maybe.
+bool DOMStringImpl::containsOnlyWhitespace() const
+{
+    if (!s)
+        return true;
+
+    for (uint i = 0; i < l; i++) {
+        TQChar c = s[i];
+        if (c.unicode() <= 0x7F) {
+            if (c.unicode() > ' ')
+                return false;
+        } else {
+            if (c.direction() != TQChar::DirWS)
+                return false;
+        }
+    }
+    return true;
+}
+
+
+void DOMStringImpl::append(DOMStringImpl *str)
+{
+    if(str && str->l != 0)
+    {
+        int newlen = l+str->l;
+        TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
+        memcpy(c, s, l*sizeof(TQChar));
+        memcpy(c+l, str->s, str->l*sizeof(TQChar));
+        if(s) QT_DELETE_QCHAR_VEC(s);
+        s = c;
+        l = newlen;
+    }
+}
+
+void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos)
+{
+    if(pos > l)
+    {
+        append(str);
+        return;
+    }
+    if(str && str->l != 0)
+    {
+        int newlen = l+str->l;
+        TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
+        memcpy(c, s, pos*sizeof(TQChar));
+        memcpy(c+pos, str->s, str->l*sizeof(TQChar));
+        memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(TQChar));
+        if(s) QT_DELETE_QCHAR_VEC(s);
+        s = c;
+        l = newlen;
+    }
+}
+
+void DOMStringImpl::truncate(int len)
+{
+    if(len > (int)l) return;
+
+    int nl = len < 1 ? 1 : len;
+    TQChar *c = QT_ALLOC_QCHAR_VEC(nl);
+    memcpy(c, s, nl*sizeof(TQChar));
+    if(s) QT_DELETE_QCHAR_VEC(s);
+    s = c;
+    l = len;
+}
+
+void DOMStringImpl::remove(unsigned int pos, int len)
+{
+  if(pos >= l ) return;
+  if(pos+len > l)
+    len = l - pos;
+
+  uint newLen = l-len;
+  TQChar *c = QT_ALLOC_QCHAR_VEC(newLen);
+  memcpy(c, s, pos*sizeof(TQChar));
+  memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(TQChar));
+  if(s) QT_DELETE_QCHAR_VEC(s);
+  s = c;
+  l = newLen;
+}
+
+DOMStringImpl *DOMStringImpl::split(unsigned int pos)
+{
+  if( pos >=l ) return new DOMStringImpl();
+
+  uint newLen = l-pos;
+  DOMStringImpl *str = new DOMStringImpl(s + pos, newLen);
+  truncate(pos);
+  return str;
+}
+
+DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len)
+{
+  if( pos >=l ) return new DOMStringImpl();
+  if(pos+len > l)
+    len = l - pos;
+
+  return new DOMStringImpl(s + pos, len);
+}
+
+// Collapses white-space according to CSS 2.1 rules
+DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS)
+{
+    if (preserveLF && preserveWS) return this;
+
+    // Notice we are likely allocating more space than needed (worst case)
+    TQChar *n = QT_ALLOC_QCHAR_VEC(l);
+
+    unsigned int pos = 0;
+    bool collapsing = false;   // collapsing white-space
+    bool collapsingLF = false; // collapsing around linefeed
+    bool changedLF = false;
+    for(unsigned int i=0; i<l; i++) {
+        TQChar ch = s[i];
+
+        // We act on \r as we would on \n because CSS uses it to indicate new-line
+        if (ch == '\r') ch = '\n';
+        else
+        // ### The XML parser lets \t through, for now treat them as spaces
+        if (ch == '\t') ch = ' ';
+
+        if (!preserveLF && ch == '\n') {
+            // ### Not strictly correct according to CSS3 text-module.
+            // - In ideographic languages linefeed should be ignored
+            // - and in Thai and Khmer it should be treated as a zero-width space
+            ch = ' '; // Treat as space
+            changedLF = true;
+        }
+
+        if (collapsing) {
+            if (ch == ' ')
+                continue;
+            if (ch == '\n') {
+                collapsingLF = true;
+                continue;
+            }
+
+            n[pos++] = (collapsingLF) ? '\n' : ' ';
+            collapsing = false;
+            collapsingLF = false;
+        }
+        else
+        if (!preserveWS && ch == ' ') {
+            collapsing = true;
+            continue;
+        }
+        else
+        if (!preserveWS && ch == '\n') {
+            collapsing = true;
+            collapsingLF = true;
+            continue;
+        }
+
+        n[pos++] = ch;
+    }
+    if (collapsing)
+        n[pos++] = ((collapsingLF) ? '\n' : ' ');
+
+    if (pos == l && !changedLF) {
+        QT_DELETE_QCHAR_VEC(n);
+        return this;
+    }
+    else {
+        DOMStringImpl* out = new DOMStringImpl();
+        out->s = n;
+        out->l = pos;
+
+        return out;
+    }
+}
+
+static Length parseLength(const TQChar *s, unsigned int l)
+{
+    if (l == 0) {
+        return Length(1, Relative);
+    }
+
+    unsigned i = 0;
+    while (i < l && s[i].isSpace())
+        ++i;
+    if (i < l && (s[i] == '+' || s[i] == '-'))
+        ++i;
+    while (i < l && s[i].isDigit())
+        ++i;
+
+    bool ok;
+    int r = TQConstString(s, i).string().toInt(&ok);
+
+    /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
+    while (i < l && (s[i].isDigit() || s[i] == '.'))
+        ++i;
+
+    /* IE Quirk: Skip any whitespace (20 % => 20%) */
+    while (i < l && s[i].isSpace())
+        ++i;
+
+    if (ok) {
+        if (i == l) {
+            return Length(r, Fixed);
+        } else {
+            const TQChar* next = s+i;
+
+            if (*next == '%')
+                return Length(r, Percent);
+
+            if (*next == '*')
+                return Length(r, Relative);
+        }
+        return Length(r, Fixed);
+    } else {
+        if (i < l) {
+            const TQChar* next = s+i;
+
+            if (*next == '*')
+                return Length(1, Relative);
+
+            if (*next == '%')
+                return Length(1, Relative);
+        }
+    }
+    return Length(0, Relative);
+}
+
+tdehtml::Length* DOMStringImpl::toCoordsArray(int& len) const
+{
+    TQString str(s, l);
+    for(unsigned int i=0; i < l; i++) {
+        TQChar cc = s[i];
+        if (cc > TQChar('9') || (cc < TQChar('0') && cc != '-' && cc != '*' && cc != '.'))
+            str[i] = ' ';
+    }
+    str = str.simplifyWhiteSpace();
+
+    len = str.contains(' ') + 1;
+    tdehtml::Length* r = new tdehtml::Length[len];
+
+    int i = 0;
+    int pos = 0;
+    int pos2;
+
+    while((pos2 = str.find(' ', pos)) != -1) {
+        r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos);
+        pos = pos2+1;
+    }
+    r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos);
+
+    return r;
+}
+
+tdehtml::Length* DOMStringImpl::toLengthArray(int& len) const
+{
+    TQString str(s, l);
+    str = str.simplifyWhiteSpace();
+
+    len = str.contains(',') + 1;
+
+    // If we have no commas, we have no array.
+    if( len == 1 )
+        return 0L;
+
+    tdehtml::Length* r = new tdehtml::Length[len];
+
+    int i = 0;
+    int pos = 0;
+    int pos2;
+
+    while((pos2 = str.find(',', pos)) != -1) {
+        r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos);
+        pos = pos2+1;
+    }
+
+    /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
+    if (str.length()-pos > 0)
+        r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos);
+    else
+        len--;
+
+    return r;
+}
+
+bool DOMStringImpl::isLower() const
+{
+    unsigned int i;
+    for (i = 0; i < l; i++)
+	if (s[i].lower() != s[i])
+	    return false;
+    return true;
+}
+
+DOMStringImpl *DOMStringImpl::lower() const
+{
+    DOMStringImpl *c = new DOMStringImpl;
+    if(!l) return c;
+
+    c->s = QT_ALLOC_QCHAR_VEC(l);
+    c->l = l;
+
+    for (unsigned int i = 0; i < l; i++)
+	c->s[i] = s[i].lower();
+
+    return c;
+}
+
+DOMStringImpl *DOMStringImpl::upper() const
+{
+    DOMStringImpl *c = new DOMStringImpl;
+    if(!l) return c;
+
+    c->s = QT_ALLOC_QCHAR_VEC(l);
+    c->l = l;
+
+    for (unsigned int i = 0; i < l; i++)
+	c->s[i] = s[i].upper();
+
+    return c;
+}
+
+DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const
+{
+    bool canCapitalize= !noFirstCap;
+    DOMStringImpl *c = new DOMStringImpl;
+    if(!l) return c;
+
+    c->s = QT_ALLOC_QCHAR_VEC(l);
+    c->l = l;
+
+    for (unsigned int i=0; i<l; i++)
+    {
+        if (s[i].isLetterOrNumber() && canCapitalize)
+        {
+            c->s[i]=s[i].upper();
+            canCapitalize=false;
+        }
+        else
+        {
+            c->s[i]=s[i];
+            if (s[i].isSpace())
+                canCapitalize=true;
+        }
+    }
+
+    return c;
+}
+
+TQString DOMStringImpl::string() const
+{
+    return TQString(s, l);
+}
+
+int DOMStringImpl::toInt(bool* ok) const
+{
+    // match \s*[+-]?\d*
+    unsigned i = 0;
+    while (i < l && s[i].isSpace())
+        ++i;
+    if (i < l && (s[i] == '+' || s[i] == '-'))
+        ++i;
+    while (i < l && s[i].isDigit())
+        ++i;
+
+    return TQConstString(s, i).string().toInt(ok);
+}
+
+static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'};
+static const unsigned short lt[] =  {'&', 'l', 't', ';'};
+static const unsigned short gt[] =  {'&', 'g', 't', ';'};
+
+DOMStringImpl *DOMStringImpl::escapeHTML()
+{
+    unsigned outL = 0;
+    for (unsigned int i = 0; i < l; ++i ) {
+        if ( s[i] == '&' )
+            outL += 5; //&amp;
+        else if (s[i] == '<' || s[i] == '>')
+            outL += 4; //&gt;/&lt;
+        else
+            ++outL;
+    }
+    if (outL == l)
+        return this;
+
+    
+    DOMStringImpl* toRet = new DOMStringImpl();
+    toRet->s = QT_ALLOC_QCHAR_VEC(outL);
+    toRet->l = outL;
+
+    unsigned outP = 0;
+    for (unsigned int i = 0; i < l; ++i ) {
+        if ( s[i] == '&' ) {
+            memcpy(&toRet->s[outP], amp, sizeof(amp));
+            outP += 5; 
+        } else if (s[i] == '<') {
+            memcpy(&toRet->s[outP], lt, sizeof(lt));
+            outP += 4;
+        } else if (s[i] == '>') {
+            memcpy(&toRet->s[outP], gt, sizeof(gt));
+            outP += 4;
+        } else {
+            toRet->s[outP] = s[i];
+            ++outP;
+        }
+    }
+    return toRet;
+}
+