summaryrefslogtreecommitdiffstats
path: root/tdehtml/xml/dom_stringimpl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tdehtml/xml/dom_stringimpl.cpp')
-rw-r--r--tdehtml/xml/dom_stringimpl.cpp460
1 files changed, 460 insertions, 0 deletions
diff --git a/tdehtml/xml/dom_stringimpl.cpp b/tdehtml/xml/dom_stringimpl.cpp
new file mode 100644
index 000000000..12f1481c0
--- /dev/null
+++ b/tdehtml/xml/dom_stringimpl.cpp
@@ -0,0 +1,460 @@
+/**
+ * This file is part of the DOM implementation for KDE.
+ *
+ * Copyright (C) 1999-2003 Lars Knoll ([email protected])
+ * (C) 1999 Antti Koivisto ([email protected])
+ * (C) 2001-2003 Dirk Mueller ( [email protected] )
+ * (C) 2002 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "dom_stringimpl.h"
+
+#include <kdebug.h>
+
+#include <string.h>
+#include <tqstringlist.h>
+
+using namespace DOM;
+using namespace tdehtml;
+
+
+DOMStringImpl::DOMStringImpl(const char *str)
+{
+ if(str && *str)
+ {
+ l = strlen(str);
+ s = QT_ALLOC_QCHAR_VEC( l );
+ int i = l;
+ TQChar* ptr = s;
+ while( i-- )
+ *ptr++ = *str++;
+ }
+ else
+ {
+ s = QT_ALLOC_QCHAR_VEC( 1 ); // crash protection
+ s[0] = 0x0; // == TQChar::null;
+ l = 0;
+ }
+}
+
+// FIXME: should be a cached flag maybe.
+bool DOMStringImpl::containsOnlyWhitespace() const
+{
+ if (!s)
+ return true;
+
+ for (uint i = 0; i < l; i++) {
+ TQChar c = s[i];
+ if (c.unicode() <= 0x7F) {
+ if (c.unicode() > ' ')
+ return false;
+ } else {
+ if (c.direction() != TQChar::DirWS)
+ return false;
+ }
+ }
+ return true;
+}
+
+
+void DOMStringImpl::append(DOMStringImpl *str)
+{
+ if(str && str->l != 0)
+ {
+ int newlen = l+str->l;
+ TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
+ memcpy(c, s, l*sizeof(TQChar));
+ memcpy(c+l, str->s, str->l*sizeof(TQChar));
+ if(s) QT_DELETE_QCHAR_VEC(s);
+ s = c;
+ l = newlen;
+ }
+}
+
+void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos)
+{
+ if(pos > l)
+ {
+ append(str);
+ return;
+ }
+ if(str && str->l != 0)
+ {
+ int newlen = l+str->l;
+ TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
+ memcpy(c, s, pos*sizeof(TQChar));
+ memcpy(c+pos, str->s, str->l*sizeof(TQChar));
+ memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(TQChar));
+ if(s) QT_DELETE_QCHAR_VEC(s);
+ s = c;
+ l = newlen;
+ }
+}
+
+void DOMStringImpl::truncate(int len)
+{
+ if(len > (int)l) return;
+
+ int nl = len < 1 ? 1 : len;
+ TQChar *c = QT_ALLOC_QCHAR_VEC(nl);
+ memcpy(c, s, nl*sizeof(TQChar));
+ if(s) QT_DELETE_QCHAR_VEC(s);
+ s = c;
+ l = len;
+}
+
+void DOMStringImpl::remove(unsigned int pos, int len)
+{
+ if(pos >= l ) return;
+ if(pos+len > l)
+ len = l - pos;
+
+ uint newLen = l-len;
+ TQChar *c = QT_ALLOC_QCHAR_VEC(newLen);
+ memcpy(c, s, pos*sizeof(TQChar));
+ memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(TQChar));
+ if(s) QT_DELETE_QCHAR_VEC(s);
+ s = c;
+ l = newLen;
+}
+
+DOMStringImpl *DOMStringImpl::split(unsigned int pos)
+{
+ if( pos >=l ) return new DOMStringImpl();
+
+ uint newLen = l-pos;
+ DOMStringImpl *str = new DOMStringImpl(s + pos, newLen);
+ truncate(pos);
+ return str;
+}
+
+DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len)
+{
+ if( pos >=l ) return new DOMStringImpl();
+ if(pos+len > l)
+ len = l - pos;
+
+ return new DOMStringImpl(s + pos, len);
+}
+
+// Collapses white-space according to CSS 2.1 rules
+DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS)
+{
+ if (preserveLF && preserveWS) return this;
+
+ // Notice we are likely allocating more space than needed (worst case)
+ TQChar *n = QT_ALLOC_QCHAR_VEC(l);
+
+ unsigned int pos = 0;
+ bool collapsing = false; // collapsing white-space
+ bool collapsingLF = false; // collapsing around linefeed
+ bool changedLF = false;
+ for(unsigned int i=0; i<l; i++) {
+ TQChar ch = s[i];
+
+ // We act on \r as we would on \n because CSS uses it to indicate new-line
+ if (ch == '\r') ch = '\n';
+ else
+ // ### The XML parser lets \t through, for now treat them as spaces
+ if (ch == '\t') ch = ' ';
+
+ if (!preserveLF && ch == '\n') {
+ // ### Not strictly correct according to CSS3 text-module.
+ // - In ideographic languages linefeed should be ignored
+ // - and in Thai and Khmer it should be treated as a zero-width space
+ ch = ' '; // Treat as space
+ changedLF = true;
+ }
+
+ if (collapsing) {
+ if (ch == ' ')
+ continue;
+ if (ch == '\n') {
+ collapsingLF = true;
+ continue;
+ }
+
+ n[pos++] = (collapsingLF) ? '\n' : ' ';
+ collapsing = false;
+ collapsingLF = false;
+ }
+ else
+ if (!preserveWS && ch == ' ') {
+ collapsing = true;
+ continue;
+ }
+ else
+ if (!preserveWS && ch == '\n') {
+ collapsing = true;
+ collapsingLF = true;
+ continue;
+ }
+
+ n[pos++] = ch;
+ }
+ if (collapsing)
+ n[pos++] = ((collapsingLF) ? '\n' : ' ');
+
+ if (pos == l && !changedLF) {
+ QT_DELETE_QCHAR_VEC(n);
+ return this;
+ }
+ else {
+ DOMStringImpl* out = new DOMStringImpl();
+ out->s = n;
+ out->l = pos;
+
+ return out;
+ }
+}
+
+static Length parseLength(const TQChar *s, unsigned int l)
+{
+ if (l == 0) {
+ return Length(1, Relative);
+ }
+
+ unsigned i = 0;
+ while (i < l && s[i].isSpace())
+ ++i;
+ if (i < l && (s[i] == '+' || s[i] == '-'))
+ ++i;
+ while (i < l && s[i].isDigit())
+ ++i;
+
+ bool ok;
+ int r = TQConstString(s, i).string().toInt(&ok);
+
+ /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
+ while (i < l && (s[i].isDigit() || s[i] == '.'))
+ ++i;
+
+ /* IE Quirk: Skip any whitespace (20 % => 20%) */
+ while (i < l && s[i].isSpace())
+ ++i;
+
+ if (ok) {
+ if (i == l) {
+ return Length(r, Fixed);
+ } else {
+ const TQChar* next = s+i;
+
+ if (*next == '%')
+ return Length(r, Percent);
+
+ if (*next == '*')
+ return Length(r, Relative);
+ }
+ return Length(r, Fixed);
+ } else {
+ if (i < l) {
+ const TQChar* next = s+i;
+
+ if (*next == '*')
+ return Length(1, Relative);
+
+ if (*next == '%')
+ return Length(1, Relative);
+ }
+ }
+ return Length(0, Relative);
+}
+
+tdehtml::Length* DOMStringImpl::toCoordsArray(int& len) const
+{
+ TQString str(s, l);
+ for(unsigned int i=0; i < l; i++) {
+ TQChar cc = s[i];
+ if (cc > TQChar('9') || (cc < TQChar('0') && cc != '-' && cc != '*' && cc != '.'))
+ str[i] = ' ';
+ }
+ str = str.simplifyWhiteSpace();
+
+ len = str.contains(' ') + 1;
+ tdehtml::Length* r = new tdehtml::Length[len];
+
+ int i = 0;
+ int pos = 0;
+ int pos2;
+
+ while((pos2 = str.find(' ', pos)) != -1) {
+ r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos);
+ pos = pos2+1;
+ }
+ r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos);
+
+ return r;
+}
+
+tdehtml::Length* DOMStringImpl::toLengthArray(int& len) const
+{
+ TQString str(s, l);
+ str = str.simplifyWhiteSpace();
+
+ len = str.contains(',') + 1;
+
+ // If we have no commas, we have no array.
+ if( len == 1 )
+ return 0L;
+
+ tdehtml::Length* r = new tdehtml::Length[len];
+
+ int i = 0;
+ int pos = 0;
+ int pos2;
+
+ while((pos2 = str.find(',', pos)) != -1) {
+ r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos);
+ pos = pos2+1;
+ }
+
+ /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
+ if (str.length()-pos > 0)
+ r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos);
+ else
+ len--;
+
+ return r;
+}
+
+bool DOMStringImpl::isLower() const
+{
+ unsigned int i;
+ for (i = 0; i < l; i++)
+ if (s[i].lower() != s[i])
+ return false;
+ return true;
+}
+
+DOMStringImpl *DOMStringImpl::lower() const
+{
+ DOMStringImpl *c = new DOMStringImpl;
+ if(!l) return c;
+
+ c->s = QT_ALLOC_QCHAR_VEC(l);
+ c->l = l;
+
+ for (unsigned int i = 0; i < l; i++)
+ c->s[i] = s[i].lower();
+
+ return c;
+}
+
+DOMStringImpl *DOMStringImpl::upper() const
+{
+ DOMStringImpl *c = new DOMStringImpl;
+ if(!l) return c;
+
+ c->s = QT_ALLOC_QCHAR_VEC(l);
+ c->l = l;
+
+ for (unsigned int i = 0; i < l; i++)
+ c->s[i] = s[i].upper();
+
+ return c;
+}
+
+DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const
+{
+ bool canCapitalize= !noFirstCap;
+ DOMStringImpl *c = new DOMStringImpl;
+ if(!l) return c;
+
+ c->s = QT_ALLOC_QCHAR_VEC(l);
+ c->l = l;
+
+ for (unsigned int i=0; i<l; i++)
+ {
+ if (s[i].isLetterOrNumber() && canCapitalize)
+ {
+ c->s[i]=s[i].upper();
+ canCapitalize=false;
+ }
+ else
+ {
+ c->s[i]=s[i];
+ if (s[i].isSpace())
+ canCapitalize=true;
+ }
+ }
+
+ return c;
+}
+
+TQString DOMStringImpl::string() const
+{
+ return TQString(s, l);
+}
+
+int DOMStringImpl::toInt(bool* ok) const
+{
+ // match \s*[+-]?\d*
+ unsigned i = 0;
+ while (i < l && s[i].isSpace())
+ ++i;
+ if (i < l && (s[i] == '+' || s[i] == '-'))
+ ++i;
+ while (i < l && s[i].isDigit())
+ ++i;
+
+ return TQConstString(s, i).string().toInt(ok);
+}
+
+static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'};
+static const unsigned short lt[] = {'&', 'l', 't', ';'};
+static const unsigned short gt[] = {'&', 'g', 't', ';'};
+
+DOMStringImpl *DOMStringImpl::escapeHTML()
+{
+ unsigned outL = 0;
+ for (unsigned int i = 0; i < l; ++i ) {
+ if ( s[i] == '&' )
+ outL += 5; //&amp;
+ else if (s[i] == '<' || s[i] == '>')
+ outL += 4; //&gt;/&lt;
+ else
+ ++outL;
+ }
+ if (outL == l)
+ return this;
+
+
+ DOMStringImpl* toRet = new DOMStringImpl();
+ toRet->s = QT_ALLOC_QCHAR_VEC(outL);
+ toRet->l = outL;
+
+ unsigned outP = 0;
+ for (unsigned int i = 0; i < l; ++i ) {
+ if ( s[i] == '&' ) {
+ memcpy(&toRet->s[outP], amp, sizeof(amp));
+ outP += 5;
+ } else if (s[i] == '<') {
+ memcpy(&toRet->s[outP], lt, sizeof(lt));
+ outP += 4;
+ } else if (s[i] == '>') {
+ memcpy(&toRet->s[outP], gt, sizeof(gt));
+ outP += 4;
+ } else {
+ toRet->s[outP] = s[i];
+ ++outP;
+ }
+ }
+ return toRet;
+}
+