summaryrefslogtreecommitdiffstats
path: root/khtml/html/htmltokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'khtml/html/htmltokenizer.cpp')
-rw-r--r--khtml/html/htmltokenizer.cpp1798
1 files changed, 0 insertions, 1798 deletions
diff --git a/khtml/html/htmltokenizer.cpp b/khtml/html/htmltokenizer.cpp
deleted file mode 100644
index 50e3ea69e..000000000
--- a/khtml/html/htmltokenizer.cpp
+++ /dev/null
@@ -1,1798 +0,0 @@
-/*
- This file is part of the KDE libraries
-
- Copyright (C) 1997 Martin Jones ([email protected])
- (C) 1997 Torben Weis ([email protected])
- (C) 1998 Waldo Bastian ([email protected])
- (C) 1999 Lars Knoll ([email protected])
- (C) 1999 Antti Koivisto ([email protected])
- (C) 2001-2003 Dirk Mueller ([email protected])
- (C) 2004 Apple Computer, Inc.
- (C) 2006 Germain Garand ([email protected])
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public License
- along with this library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
-*/
-//----------------------------------------------------------------------------
-//
-// KDE HTML Widget - Tokenizers
-
-//#define TOKEN_DEBUG 1
-//#define TOKEN_DEBUG 2
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "html/htmltokenizer.h"
-#include "html/html_documentimpl.h"
-#include "html/htmlparser.h"
-#include "html/dtd.h"
-
-#include "misc/loader.h"
-#include "misc/htmlhashes.h"
-
-#include "khtmlview.h"
-#include "khtml_part.h"
-#include "xml/dom_docimpl.h"
-#include "css/csshelper.h"
-#include "ecma/kjs_proxy.h"
-#include <kcharsets.h>
-#include <kglobal.h>
-#include <ctype.h>
-#include <assert.h>
-#include <tqvariant.h>
-#include <kdebug.h>
-#include <stdlib.h>
-
-#include "kentities.c"
-
-using namespace khtml;
-
-static const TQChar commentStart [] = { '<','!','-','-', TQChar::null };
-
-static const char scriptEnd [] = "</script";
-static const char xmpEnd [] = "</xmp";
-static const char styleEnd [] = "</style";
-static const char textareaEnd [] = "</textarea";
-static const char titleEnd [] = "</title";
-
-#define KHTML_ALLOC_QCHAR_VEC( N ) (TQChar*) malloc( sizeof(TQChar)*( N ) )
-#define KHTML_REALLOC_QCHAR_VEC(P, N ) (TQChar*) realloc(P, sizeof(TQChar)*( N ))
-#define KHTML_DELETE_QCHAR_VEC( P ) free((char*)( P ))
-
-// Full support for MS Windows extensions to Latin-1.
-// Technically these extensions should only be activated for pages
-// marked "windows-1252" or "cp1252", but
-// in the standard Microsoft way, these extensions infect hundreds of thousands
-// of web pages. Note that people with non-latin-1 Microsoft extensions
-// are SOL.
-//
-// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
-// http://www.bbsinc.com/iso8859.html
-// http://www.obviously.com/
-//
-// There may be better equivalents
-#if 0
-#define fixUpChar(x)
-#else
-#define fixUpChar(x) \
- switch ((x).unicode()) \
- { \
- case 0x80: (x) = 0x20ac; break; \
- case 0x82: (x) = 0x201a; break; \
- case 0x83: (x) = 0x0192; break; \
- case 0x84: (x) = 0x201e; break; \
- case 0x85: (x) = 0x2026; break; \
- case 0x86: (x) = 0x2020; break; \
- case 0x87: (x) = 0x2021; break; \
- case 0x88: (x) = 0x02C6; break; \
- case 0x89: (x) = 0x2030; break; \
- case 0x8A: (x) = 0x0160; break; \
- case 0x8b: (x) = 0x2039; break; \
- case 0x8C: (x) = 0x0152; break; \
- case 0x8E: (x) = 0x017D; break; \
- case 0x91: (x) = 0x2018; break; \
- case 0x92: (x) = 0x2019; break; \
- case 0x93: (x) = 0x201C; break; \
- case 0x94: (x) = 0X201D; break; \
- case 0x95: (x) = 0x2022; break; \
- case 0x96: (x) = 0x2013; break; \
- case 0x97: (x) = 0x2014; break; \
- case 0x98: (x) = 0x02DC; break; \
- case 0x99: (x) = 0x2122; break; \
- case 0x9A: (x) = 0x0161; break; \
- case 0x9b: (x) = 0x203A; break; \
- case 0x9C: (x) = 0x0153; break; \
- case 0x9E: (x) = 0x017E; break; \
- case 0x9F: (x) = 0x0178; break; \
- default: break; \
- }
-#endif
-// ----------------------------------------------------------------------------
-
-HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
-{
- view = _view;
- buffer = 0;
- scriptCode = 0;
- scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
- charsets = TDEGlobal::charsets();
- parser = new KHTMLParser(_view, _doc);
- m_executingScript = 0;
- m_autoCloseTimer = 0;
- onHold = false;
-
- reset();
-}
-
-HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, DOM::DocumentFragmentImpl *i)
-{
- view = 0;
- buffer = 0;
- scriptCode = 0;
- scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
- charsets = TDEGlobal::charsets();
- parser = new KHTMLParser( i, _doc );
- m_executingScript = 0;
- m_autoCloseTimer = 0;
- onHold = false;
-
- reset();
-}
-
-void HTMLTokenizer::reset()
-{
- assert(m_executingScript == 0);
- Q_ASSERT(onHold == false);
- m_abort = false;
-
- while (!cachedScript.isEmpty())
- cachedScript.dequeue()->deref(this);
-
- if ( buffer )
- KHTML_DELETE_QCHAR_VEC(buffer);
- buffer = dest = 0;
- size = 0;
-
- if ( scriptCode )
- KHTML_DELETE_QCHAR_VEC(scriptCode);
- scriptCode = 0;
- scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
-
- if (m_autoCloseTimer) {
- killTimer(m_autoCloseTimer);
- m_autoCloseTimer = 0;
- }
-
- currToken.reset();
-}
-
-void HTMLTokenizer::begin()
-{
- m_executingScript = 0;
- onHold = false;
- reset();
- size = 254;
- buffer = KHTML_ALLOC_QCHAR_VEC( 255 );
- dest = buffer;
- tag = NoTag;
- pending = NonePending;
- discard = NoneDiscard;
- pre = false;
- prePos = 0;
- plaintext = false;
- xmp = false;
- processingInstruction = false;
- script = false;
- escaped = false;
- style = false;
- skipLF = false;
- select = false;
- comment = false;
- server = false;
- textarea = false;
- title = false;
- startTag = false;
- tquote = NoQuote;
- searchCount = 0;
- Entity = NoEntity;
- noMoreData = false;
- brokenComments = false;
- brokenServer = false;
- brokenScript = false;
- lineno = 0;
- scriptStartLineno = 0;
- tagStartLineno = 0;
-}
-
-void HTMLTokenizer::processListing(TokenizerString list)
-{
- bool old_pre = pre;
-
- // This function adds the listing 'list' as
- // preformatted text-tokens to the token-collection
- // thereby converting TABs.
- if(!style) pre = true;
- prePos = 0;
-
- while ( !list.isEmpty() )
- {
- checkBuffer(3*TAB_SIZE);
-
- if (skipLF && ( *list != '\n' ))
- {
- skipLF = false;
- }
-
- if (skipLF)
- {
- skipLF = false;
- ++list;
- }
- else if (( *list == '\n' ) || ( *list == '\r' ))
- {
- if (discard == LFDiscard)
- {
- // Ignore this LF
- discard = NoneDiscard; // We have discarded 1 LF
- }
- else
- {
- // Process this LF
- if (pending)
- addPending();
-
- // we used to do it not at all and we want to have
- // it fixed for textarea. So here we are
- if ( textarea ) {
- prePos++;
- *dest++ = *list;
- } else
- pending = LFPending;
- }
- /* Check for MS-DOS CRLF sequence */
- if (*list == '\r')
- {
- skipLF = true;
- }
- ++list;
- }
- else if (( *list == ' ' ) || ( *list == '\t'))
- {
- if (pending)
- addPending();
- if (*list == ' ')
- pending = SpacePending;
- else
- pending = TabPending;
-
- ++list;
- }
- else
- {
- discard = NoneDiscard;
- if (pending)
- addPending();
-
- prePos++;
- *dest++ = *list;
- ++list;
- }
-
- }
-
- if ((pending == SpacePending) || (pending == TabPending))
- addPending();
- else
- pending = NonePending;
-
- prePos = 0;
- pre = old_pre;
-}
-
-void HTMLTokenizer::parseSpecial(TokenizerString &src)
-{
- assert( textarea || title || !Entity );
- assert( !tag );
- assert( xmp+textarea+title+style+script == 1 );
- if (script)
- scriptStartLineno = lineno+src.lineCount();
-
- if ( comment ) parseComment( src );
-
- while ( !src.isEmpty() ) {
- checkScriptBuffer();
- unsigned char ch = src->latin1();
- if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && TQConstString( scriptCode+scriptCodeSize-3, 3 ).string() == "<!-" ) {
- comment = true;
- scriptCode[ scriptCodeSize++ ] = ch;
- ++src;
- parseComment( src );
- continue;
- }
- if ( scriptCodeResync && !tquote && ( ch == '>' ) ) {
- ++src;
- scriptCodeSize = scriptCodeResync-1;
- scriptCodeResync = 0;
- scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
- if ( script )
- scriptHandler();
- else {
- processListing(TokenizerString(scriptCode, scriptCodeSize));
- processToken();
- if ( style ) { currToken.tid = ID_STYLE + ID_CLOSE_TAG; }
- else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; }
- else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; }
- else if ( xmp ) { currToken.tid = ID_XMP + ID_CLOSE_TAG; }
- processToken();
- script = style = textarea = title = xmp = false;
- tquote = NoQuote;
- scriptCodeSize = scriptCodeResync = 0;
- }
- return;
- }
- // possible end of tagname, lets check.
- if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch &&
- scriptCodeSize >= searchStopperLen &&
- !TQConstString( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).string().find( searchStopper, 0, false )) {
- scriptCodeResync = scriptCodeSize-searchStopperLen+1;
- tquote = NoQuote;
- continue;
- }
- if ( scriptCodeResync && !escaped ) {
- if(ch == '\"')
- tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
- else if(ch == '\'')
- tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
- else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
- tquote = NoQuote;
- }
- escaped = ( !escaped && ch == '\\' );
- if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') {
- TQChar *scriptCodeDest = scriptCode+scriptCodeSize;
- ++src;
- parseEntity(src,scriptCodeDest,true);
- scriptCodeSize = scriptCodeDest-scriptCode;
- }
- else {
- scriptCode[ scriptCodeSize++ ] = *src;
- ++src;
- }
- }
-}
-
-void HTMLTokenizer::scriptHandler()
-{
- TQString currentScriptSrc = scriptSrc;
- scriptSrc = TQString::null;
-
- processListing(TokenizerString(scriptCode, scriptCodeSize));
- TQString exScript( buffer, dest-buffer );
-
- processToken();
- currToken.tid = ID_SCRIPT + ID_CLOSE_TAG;
- processToken();
-
- // Scripts following a frameset element should not be executed or even loaded in the case of extern scripts.
- bool followingFrameset = (parser->doc()->body() && parser->doc()->body()->id() == ID_FRAMESET);
- bool effectiveScript = !parser->skipMode() && !followingFrameset;
- bool deferredScript = false;
-
- if ( effectiveScript ) {
- CachedScript* cs = 0;
-
- // forget what we just got, load from src url instead
- if ( !currentScriptSrc.isEmpty() && javascript &&
- (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) {
- cachedScript.enqueue(cs);
- }
-
- if (cs) {
- pendingQueue.push(src);
- uint scriptCount = cachedScript.count();
- setSrc(TokenizerString());
- scriptCodeSize = scriptCodeResync = 0;
- cs->ref(this);
- if (cachedScript.count() == scriptCount)
- deferredScript = true;
- }
- else if (currentScriptSrc.isEmpty() && view && javascript ) {
- pendingQueue.push(src);
- setSrc(TokenizerString());
- scriptCodeSize = scriptCodeResync = 0;
- scriptExecution( exScript, TQString::null, tagStartLineno /*scriptStartLineno*/ );
- } else {
- // script was filtered or disallowed
- effectiveScript = false;
- }
- }
-
- script = false;
- scriptCodeSize = scriptCodeResync = 0;
-
- if ( !effectiveScript )
- return;
-
- if ( !m_executingScript && cachedScript.isEmpty() ) {
- src.append(pendingQueue.pop());
- } else if ( cachedScript.isEmpty() ) {
- write( pendingQueue.pop(), false );
- } else if ( !deferredScript && pendingQueue.count() > 1) {
- TokenizerString t = pendingQueue.pop();
- pendingQueue.top().prepend( t );
- }
-}
-
-void HTMLTokenizer::scriptExecution( const TQString& str, const TQString& scriptURL,
- int baseLine)
-{
- bool oldscript = script;
- m_executingScript++;
- script = false;
- TQString url;
- if (scriptURL.isNull() && view)
- url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL().url();
- else
- url = scriptURL;
-
- if (view)
- view->part()->executeScript(url,baseLine+1,Node(),str);
- m_executingScript--;
- script = oldscript;
-}
-
-void HTMLTokenizer::parseComment(TokenizerString &src)
-{
- // SGML strict
- bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style;
- int delimiterCount = 0;
- bool canClose = false;
-
- checkScriptBuffer(src.length());
- while ( src.length() ) {
- scriptCode[ scriptCodeSize++ ] = *src;
-
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("comment is now: *%s*", src.toString().left(16).latin1());
-#endif
-
- if (strict)
- {
- if (src->unicode() == '-') {
- delimiterCount++;
- if (delimiterCount == 2) {
- delimiterCount = 0;
- canClose = !canClose;
- }
- }
- else
- delimiterCount = 0;
- }
-
- if ((!strict || canClose) && src->unicode() == '>')
- {
- bool handleBrokenComments = brokenComments && !( script || style );
- bool scriptEnd=false;
- if (!strict)
- {
- if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' &&
- scriptCode[scriptCodeSize-2] == '-' )
- scriptEnd=true;
- }
-
- if (canClose || handleBrokenComments || scriptEnd ){
- ++src;
- if ( !( title || script || xmp || textarea || style) ) {
-#ifdef COMMENTS_IN_DOM
- checkScriptBuffer();
- scriptCode[ scriptCodeSize ] = 0;
- scriptCode[ scriptCodeSize + 1 ] = 0;
- currToken.tid = ID_COMMENT;
- processListing(DOMStringIt(scriptCode, scriptCodeSize - 2));
- processToken();
- currToken.tid = ID_COMMENT + ID_CLOSE_TAG;
- processToken();
-#endif
- scriptCodeSize = 0;
- }
- comment = false;
- return; // Finished parsing comment
- }
- }
- ++src;
- }
-}
-
-void HTMLTokenizer::parseServer(TokenizerString &src)
-{
- checkScriptBuffer(src.length());
- while ( !src.isEmpty() ) {
- scriptCode[ scriptCodeSize++ ] = *src;
- if (src->unicode() == '>' &&
- scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
- ++src;
- server = false;
- scriptCodeSize = 0;
- return; // Finished parsing server include
- }
- ++src;
- }
-}
-
-void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src)
-{
- char oldchar = 0;
- while ( !src.isEmpty() )
- {
- unsigned char chbegin = src->latin1();
- if(chbegin == '\'') {
- tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
- }
- else if(chbegin == '\"') {
- tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
- }
- // Look for '?>'
- // some crappy sites omit the "?" before it, so
- // we look for an unquoted '>' instead. (IE compatible)
- else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )
- {
- // We got a '?>' sequence
- processingInstruction = false;
- ++src;
- discard=LFDiscard;
- return; // Finished parsing comment!
- }
- ++src;
- oldchar = chbegin;
- }
-}
-
-void HTMLTokenizer::parseText(TokenizerString &src)
-{
- while ( !src.isEmpty() )
- {
- // do we need to enlarge the buffer?
- checkBuffer();
-
- // ascii is okay because we only do ascii comparisons
- unsigned char chbegin = src->latin1();
-
- if (skipLF && ( chbegin != '\n' ))
- {
- skipLF = false;
- }
-
- if (skipLF)
- {
- skipLF = false;
- ++src;
- }
- else if (( chbegin == '\n' ) || ( chbegin == '\r' ))
- {
- if (chbegin == '\r')
- skipLF = true;
-
- *dest++ = '\n';
- ++src;
- }
- else {
- *dest++ = *src;
- ++src;
- }
- }
-}
-
-
-void HTMLTokenizer::parseEntity(TokenizerString &src, TQChar *&dest, bool start)
-{
- if( start )
- {
- cBufferPos = 0;
- entityLen = 0;
- Entity = SearchEntity;
- }
-
- while( !src.isEmpty() )
- {
- ushort cc = src->unicode();
- switch(Entity) {
- case NoEntity:
- return;
-
- break;
- case SearchEntity:
- if(cc == '#') {
- cBuffer[cBufferPos++] = cc;
- ++src;
- Entity = NumericSearch;
- }
- else
- Entity = EntityName;
-
- break;
-
- case NumericSearch:
- if(cc == 'x' || cc == 'X') {
- cBuffer[cBufferPos++] = cc;
- ++src;
- Entity = Hexadecimal;
- }
- else if(cc >= '0' && cc <= '9')
- Entity = Decimal;
- else
- Entity = SearchSemicolon;
-
- break;
-
- case Hexadecimal:
- {
- int uc = EntityChar.unicode();
- int ll = kMin<uint>(src.length(), 8);
- while(ll--) {
- TQChar csrc(src->lower());
- cc = csrc.cell();
-
- if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {
- break;
- }
- uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
- cBuffer[cBufferPos++] = cc;
- ++src;
- }
- EntityChar = TQChar(uc);
- Entity = SearchSemicolon;
- break;
- }
- case Decimal:
- {
- int uc = EntityChar.unicode();
- int ll = kMin(src.length(), 9-cBufferPos);
- while(ll--) {
- cc = src->cell();
-
- if(src->row() || !(cc >= '0' && cc <= '9')) {
- Entity = SearchSemicolon;
- break;
- }
-
- uc = uc * 10 + (cc - '0');
- cBuffer[cBufferPos++] = cc;
- ++src;
- }
- EntityChar = TQChar(uc);
- if(cBufferPos == 9) Entity = SearchSemicolon;
- break;
- }
- case EntityName:
- {
- int ll = kMin(src.length(), 9-cBufferPos);
- while(ll--) {
- TQChar csrc = *src;
- cc = csrc.cell();
-
- if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||
- (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
- Entity = SearchSemicolon;
- break;
- }
-
- cBuffer[cBufferPos++] = cc;
- ++src;
-
- // be IE compatible and interpret even unterminated entities
- // outside tags. like "foo &nbspstuff bla".
- if ( tag == NoTag ) {
- const entity* e = kde_findEntity(cBuffer, cBufferPos);
- if ( e && e->code < 256 ) {
- EntityChar = e->code;
- entityLen = cBufferPos;
- }
- }
- }
- if(cBufferPos == 9) Entity = SearchSemicolon;
- if(Entity == SearchSemicolon) {
- if(cBufferPos > 1) {
- const entity *e = kde_findEntity(cBuffer, cBufferPos);
- // IE only accepts unterminated entities < 256,
- // Gecko accepts them all, but only outside tags
- if(e && ( tag == NoTag || e->code < 256 || *src == ';' )) {
- EntityChar = e->code;
- entityLen = cBufferPos;
- }
- }
- }
- break;
- }
- case SearchSemicolon:
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << endl;
-#endif
- fixUpChar(EntityChar);
-
- if (*src == ';')
- ++src;
-
- if ( !EntityChar.isNull() ) {
- checkBuffer();
- if (entityLen > 0 && entityLen < cBufferPos) {
- int rem = cBufferPos - entityLen;
- src.prepend( TokenizerString(TQString::fromAscii(cBuffer+entityLen, rem)) );
- }
- src.push( EntityChar );
- } else {
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << "unknown entity!" << endl;
-#endif
- checkBuffer(11);
- // ignore the sequence, add it to the buffer as plaintext
- *dest++ = '&';
- for(unsigned int i = 0; i < cBufferPos; i++)
- dest[i] = cBuffer[i];
- dest += cBufferPos;
- if (pre)
- prePos += cBufferPos+1;
- }
-
- Entity = NoEntity;
- EntityChar = TQChar::null;
- return;
- };
- }
-}
-
-void HTMLTokenizer::parseTag(TokenizerString &src)
-{
- assert(!Entity );
- checkScriptBuffer( src.length() );
-
- while ( !src.isEmpty() )
- {
- checkBuffer();
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- uint l = 0;
- while(l < src.length() && (src.toString()[l]).latin1() != '>')
- l++;
- tqDebug("src is now: *%s*, tquote: %d",
- src.toString().left(l).latin1(), tquote);
-#endif
- switch(tag) {
- case NoTag:
- return;
- case TagName:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("TagName");
-#endif
- if (searchCount > 0)
- {
- if (*src == commentStart[searchCount])
- {
- searchCount++;
- if (searchCount == 4)
- {
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << "Found comment" << endl;
-#endif
- // Found '<!--' sequence
- ++src;
- dest = buffer; // ignore the previous part of this tag
- tag = NoTag;
-
- comment = true;
- parseComment(src);
- return; // Finished parsing tag!
- }
- // cuts of high part, is okay
- cBuffer[cBufferPos++] = src->cell();
- ++src;
- break;
- }
- else
- searchCount = 0; // Stop looking for '<!--' sequence
- }
-
- bool finish = false;
- unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos);
- while(ll--) {
- ushort curchar = *src;
- if(curchar <= ' ' || curchar == '>' ) {
- finish = true;
- break;
- }
- // this is a nasty performance trick. will work for the A-Z
- // characters, but not for others. if it contains one,
- // we fail anyway
- char cc = curchar;
- cBuffer[cBufferPos++] = cc | 0x20;
- ++src;
- }
-
- // Disadvantage: we add the possible rest of the tag
- // as attribute names. ### judge if this causes problems
- if(finish || CBUFLEN == cBufferPos) {
- bool beginTag;
- char* ptr = cBuffer;
- unsigned int len = cBufferPos;
- cBuffer[cBufferPos] = '\0';
- if ((cBufferPos > 0) && (*ptr == '/'))
- {
- // End Tag
- beginTag = false;
- ptr++;
- len--;
- }
- else
- // Start Tag
- beginTag = true;
- // Accept empty xml tags like <br/>
- if(len > 1 && ptr[len-1] == '/' ) {
- ptr[--len] = '\0';
- // if its like <br/> and not like <input/ value=foo>, take it as flat
- if (*src == '>')
- currToken.flat = true;
- }
-
- uint tagID = khtml::getTagID(ptr, len);
- if (!tagID) {
-#ifdef TOKEN_DEBUG
- TQCString tmp(ptr, len+1);
- kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl;
-#endif
- dest = buffer;
- }
- else
- {
-#ifdef TOKEN_DEBUG
- TQCString tmp(ptr, len+1);
- kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl;
-#endif
- currToken.tid = beginTag ? tagID : tagID + ID_CLOSE_TAG;
- dest = buffer;
- }
- tag = SearchAttribute;
- cBufferPos = 0;
- }
- break;
- }
- case SearchAttribute:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("SearchAttribute");
-#endif
- bool atespace = false;
- ushort curchar;
- while(!src.isEmpty()) {
- curchar = *src;
- if(curchar > ' ') {
- if(curchar == '<' || curchar == '>')
- tag = SearchEnd;
- else if(atespace && (curchar == '\'' || curchar == '"'))
- {
- tag = SearchValue;
- *dest++ = 0;
- attrName = TQString::null;
- }
- else
- tag = AttributeName;
-
- cBufferPos = 0;
- break;
- }
- atespace = true;
- ++src;
- }
- break;
- }
- case AttributeName:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("AttributeName");
-#endif
- ushort curchar;
- int ll = kMin(src.length(), CBUFLEN-cBufferPos);
-
- while(ll--) {
- curchar = *src;
- if(curchar <= '>') {
- if(curchar <= ' ' || curchar == '=' || curchar == '>') {
- unsigned int a;
- cBuffer[cBufferPos] = '\0';
- a = khtml::getAttrID(cBuffer, cBufferPos);
- if ( !a ) {
- // did we just get /> or e.g checked/>
- if (curchar == '>' && cBufferPos >=1 && cBuffer[cBufferPos-1] == '/') {
- currToken.flat = true;
- if (cBufferPos>1)
- a = khtml::getAttrID(cBuffer, cBufferPos-1);
- }
- if (!a)
- attrName = TQString::fromLatin1(TQCString(cBuffer, cBufferPos+1).data());
- }
-
- dest = buffer;
- *dest++ = a;
-#ifdef TOKEN_DEBUG
- if (!a || (cBufferPos && *cBuffer == '!'))
- kdDebug( 6036 ) << "Unknown attribute: *" << TQCString(cBuffer, cBufferPos+1).data() << "*" << endl;
- else
- kdDebug( 6036 ) << "Known attribute: " << TQCString(cBuffer, cBufferPos+1).data() << endl;
-#endif
-
- tag = SearchEqual;
- break;
- }
- }
- cBuffer[cBufferPos++] =
- ( curchar >= 'A' && curchar <= 'Z' ) ? curchar | 0x20 : curchar;
- ++src;
- }
- if ( cBufferPos == CBUFLEN ) {
- cBuffer[cBufferPos] = '\0';
- attrName = TQString::fromLatin1(TQCString(cBuffer, cBufferPos+1).data());
- dest = buffer;
- *dest++ = 0;
- tag = SearchEqual;
- }
- break;
- }
- case SearchEqual:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("SearchEqual");
-#endif
- ushort curchar;
- bool atespace = false;
- while(!src.isEmpty()) {
- curchar = src->unicode();
- if(curchar > ' ') {
- if(curchar == '=') {
-#ifdef TOKEN_DEBUG
- kdDebug(6036) << "found equal" << endl;
-#endif
- tag = SearchValue;
- ++src;
- }
- else if(atespace && (curchar == '\'' || curchar == '"'))
- {
- tag = SearchValue;
- *dest++ = 0;
- attrName = TQString::null;
- }
- else {
- DOMString v("");
- currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
- dest = buffer;
- tag = SearchAttribute;
- }
- break;
- }
- atespace = true;
- ++src;
- }
- break;
- }
- case SearchValue:
- {
- ushort curchar;
- while(!src.isEmpty()) {
- curchar = src->unicode();
- if(curchar > ' ') {
- if(( curchar == '\'' || curchar == '\"' )) {
- tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
- tag = QuotedValue;
- ++src;
- } else
- tag = Value;
-
- break;
- }
- ++src;
- }
- break;
- }
- case QuotedValue:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("QuotedValue");
-#endif
- ushort curchar;
- while(!src.isEmpty()) {
- checkBuffer();
-
- curchar = src->unicode();
- if(curchar <= '\'' && !src.escaped()) {
- // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
- if ( curchar == '&' )
- {
- ++src;
- parseEntity(src, dest, true);
- break;
- }
- else if ( (tquote == SingleQuote && curchar == '\'') ||
- (tquote == DoubleQuote && curchar == '\"') )
- {
- // some <input type=hidden> rely on trailing spaces. argh
- while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
- dest--; // remove trailing newlines
- DOMString v(buffer+1, dest-buffer-1);
- currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
-
- dest = buffer;
- tag = SearchAttribute;
- tquote = NoQuote;
- ++src;
- break;
- }
- }
- *dest++ = *src;
- ++src;
- }
- break;
- }
- case Value:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("Value");
-#endif
- ushort curchar;
- while(!src.isEmpty()) {
- checkBuffer();
- curchar = src->unicode();
- if(curchar <= '>' && !src.escaped()) {
- // parse Entities
- if ( curchar == '&' )
- {
- ++src;
- parseEntity(src, dest, true);
- break;
- }
- // no quotes. Every space means end of value
- // '/' does not delimit in IE!
- if ( curchar <= ' ' || curchar == '>' )
- {
- DOMString v(buffer+1, dest-buffer-1);
- currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
- dest = buffer;
- tag = SearchAttribute;
- break;
- }
- }
-
- *dest++ = *src;
- ++src;
- }
- break;
- }
- case SearchEnd:
- {
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
- tqDebug("SearchEnd");
-#endif
- while(!src.isEmpty()) {
- if(*src == '<' || *src == '>')
- break;
-
- if (*src == '/')
- currToken.flat = true;
-
- ++src;
- }
- if(src.isEmpty() && *src != '<' && *src != '>') break;
-
- searchCount = 0; // Stop looking for '<!--' sequence
- tag = NoTag;
- tquote = NoQuote;
- if ( *src == '>' )
- ++src;
-
- if ( !currToken.tid ) //stop if tag is unknown
- return;
-
- uint tagID = currToken.tid;
-#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
- kdDebug( 6036 ) << "appending Tag: " << tagID << endl;
-#endif
- // If the tag requires an end tag it cannot be flat,
- // unless we are using the HTML parser to parse XHTML
- // The only exception is SCRIPT and priority 0 tokens.
- if (tagID < ID_CLOSE_TAG && tagID != ID_SCRIPT &&
- DOM::endTag[tagID] == DOM::REQUIRED &&
- parser->doc()->htmlMode() != DocumentImpl::XHtml)
- currToken.flat = false;
-
- bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG);
-
- if(tagID >= ID_CLOSE_TAG)
- tagID -= ID_CLOSE_TAG;
- else if ( !brokenScript && tagID == ID_SCRIPT ) {
- DOMStringImpl* a = 0;
- bool foundTypeAttribute = false;
- scriptSrc = scriptSrcCharset = TQString::null;
- if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
- view && /* are we a regular tokenizer or just for innerHTML ? */
- parser->doc()->view()->part()->jScriptEnabled() /* jscript allowed at all? */
- ) {
- if ( ( a = currToken.attrs->getValue( ATTR_SRC ) ) )
- scriptSrc = parser->doc()->completeURL(khtml::parseURL( DOMString(a) ).string() );
- if ( ( a = currToken.attrs->getValue( ATTR_CHARSET ) ) )
- scriptSrcCharset = DOMString(a).string().stripWhiteSpace();
- if ( scriptSrcCharset.isEmpty() && view)
- scriptSrcCharset = parser->doc()->view()->part()->encoding();
- /* Check type before language, since language is deprecated */
- if ((a = currToken.attrs->getValue(ATTR_TYPE)) != 0 && !DOMString(a).string().isEmpty())
- foundTypeAttribute = true;
- else
- a = currToken.attrs->getValue(ATTR_LANGUAGE);
- }
- javascript = true;
-
- if( foundTypeAttribute ) {
- /*
- Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
- Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
- Mozilla 1.5 accepts application/x-javascript, WinIE 6 doesn't.
- Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
- Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
- We want to accept all the values that either of these browsers accept, but not other values.
- */
- TQString type = DOMString(a).string().stripWhiteSpace().lower();
- if( type.compare("text/javascript") != 0 &&
- type.compare("text/javascript1.0") != 0 &&
- type.compare("text/javascript1.1") != 0 &&
- type.compare("text/javascript1.2") != 0 &&
- type.compare("text/javascript1.3") != 0 &&
- type.compare("text/javascript1.4") != 0 &&
- type.compare("text/javascript1.5") != 0 &&
- type.compare("text/jscript") != 0 &&
- type.compare("text/ecmascript") != 0 &&
- type.compare("text/livescript") != 0 &&
- type.compare("application/x-javascript") != 0 &&
- type.compare("application/x-ecmascript") != 0 &&
- type.compare("application/javascript") != 0 &&
- type.compare("application/ecmascript") != 0 )
- javascript = false;
- } else if( a ) {
- /*
- Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
- Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
- Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
- We want to accept all the values that either of these browsers accept, but not other values.
- */
- TQString lang = DOMString(a).string();
- lang = lang.lower();
- if( lang.compare("") != 0 &&
- lang.compare("javascript") != 0 &&
- lang.compare("javascript1.0") != 0 &&
- lang.compare("javascript1.1") != 0 &&
- lang.compare("javascript1.2") != 0 &&
- lang.compare("javascript1.3") != 0 &&
- lang.compare("javascript1.4") != 0 &&
- lang.compare("javascript1.5") != 0 &&
- lang.compare("ecmascript") != 0 &&
- lang.compare("livescript") != 0 &&
- lang.compare("jscript") )
- javascript = false;
- }
- }
-
- processToken();
-
- if ( parser->selectMode() && beginTag)
- discard = AllDiscard;
-
- switch( tagID ) {
- case ID_PRE:
- pre = beginTag;
- if (beginTag)
- discard = LFDiscard;
- prePos = 0;
- break;
- case ID_BR:
- prePos = 0;
- break;
- case ID_SCRIPT:
- if (beginTag) {
- searchStopper = scriptEnd;
- searchStopperLen = 8;
- script = true;
- parseSpecial(src);
- }
- else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/>
- scriptHandler();
- break;
- case ID_STYLE:
- if (beginTag) {
- searchStopper = styleEnd;
- searchStopperLen = 7;
- style = true;
- parseSpecial(src);
- }
- break;
- case ID_TEXTAREA:
- if(beginTag) {
- searchStopper = textareaEnd;
- searchStopperLen = 10;
- textarea = true;
- discard = NoneDiscard;
- parseSpecial(src);
- }
- break;
- case ID_TITLE:
- if (beginTag) {
- searchStopper = titleEnd;
- searchStopperLen = 7;
- title = true;
- parseSpecial(src);
- }
- break;
- case ID_XMP:
- if (beginTag) {
- searchStopper = xmpEnd;
- searchStopperLen = 5;
- xmp = true;
- parseSpecial(src);
- }
- break;
- case ID_SELECT:
- select = beginTag;
- break;
- case ID_PLAINTEXT:
- plaintext = beginTag;
- break;
- }
- return; // Finished parsing tag!
- }
- } // end switch
- }
- return;
-}
-
-void HTMLTokenizer::addPending()
-{
- if ( select && !(comment || script))
- {
- *dest++ = ' ';
- }
- else if ( textarea )
- {
- switch(pending) {
- case LFPending: *dest++ = '\n'; prePos = 0; break;
- case SpacePending: *dest++ = ' '; ++prePos; break;
- case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break;
- case NonePending:
- assert(0);
- }
- }
- else
- {
- int p;
-
- switch (pending)
- {
- case SpacePending:
- // Insert a breaking space
- *dest++ = TQChar(' ');
- prePos++;
- break;
-
- case LFPending:
- *dest = '\n';
- dest++;
- prePos = 0;
- break;
-
- case TabPending:
- p = TAB_SIZE - ( prePos % TAB_SIZE );
- for ( int x = 0; x < p; x++ )
- *dest++ = TQChar(' ');
- prePos += p;
- break;
-
- case NonePending:
- assert(0);
- break;
- }
- }
-
- pending = NonePending;
-}
-
-void HTMLTokenizer::write( const TokenizerString &str, bool appendData )
-{
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl;
-#endif
-
- if ( !buffer )
- return;
-
- if ( ( m_executingScript && appendData ) || cachedScript.count() ) {
- // don't parse; we will do this later
- if (pendingQueue.isEmpty())
- pendingQueue.push(str);
- else if (appendData)
- pendingQueue.bottom().append(str);
- else
- pendingQueue.top().append(str);
- return;
- }
-
- if ( onHold ) {
- src.append(str);
- return;
- }
-
- if (!src.isEmpty())
- src.append(str);
- else
- setSrc(str);
- m_abort = false;
-
-// if (Entity)
-// parseEntity(src, dest);
-
- while ( !src.isEmpty() )
- {
- if ( m_abort )
- return;
- // do we need to enlarge the buffer?
- checkBuffer();
-
- ushort cc = src->unicode();
-
- if (skipLF && (cc != '\n'))
- skipLF = false;
-
- if (skipLF) {
- skipLF = false;
- ++src;
- }
- else if ( Entity )
- parseEntity( src, dest );
- else if ( plaintext )
- parseText( src );
- else if (script)
- parseSpecial(src);
- else if (style)
- parseSpecial(src);
- else if (xmp)
- parseSpecial(src);
- else if (textarea)
- parseSpecial(src);
- else if (title)
- parseSpecial(src);
- else if (comment)
- parseComment(src);
- else if (server)
- parseServer(src);
- else if (processingInstruction)
- parseProcessingInstruction(src);
- else if (tag)
- parseTag(src);
- else if ( startTag )
- {
- startTag = false;
- bool endTag = false;
-
- switch(cc) {
- case '/':
- endTag = true;
- break;
- case '!':
- {
- // <!-- comment -->
- searchCount = 1; // Look for '<!--' sequence to start comment
-
- break;
- }
- case '?':
- {
- // xml processing instruction
- processingInstruction = true;
- tquote = NoQuote;
- parseProcessingInstruction(src);
- continue;
-
- break;
- }
- case '%':
- if (!brokenServer) {
- // <% server stuff, handle as comment %>
- server = true;
- tquote = NoQuote;
- parseServer(src);
- continue;
- }
- // else fall through
- default:
- {
- if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z')))
- {
- // Start of a Start-Tag
- }
- else
- {
- // Invalid tag
- // Add as is
- if (pending)
- addPending();
- *dest = '<';
- dest++;
- continue;
- }
- }
- }; // end case
-
- // According to SGML any LF immediately after a starttag, or
- // immediately before an endtag should be ignored.
- // ### Gecko and MSIE though only ignores LF immediately after
- // starttags and only for PRE elements -- asj (28/06-2005)
- if ( pending )
- if (!select)
- addPending();
- else
- pending = NonePending;
-
- // Cancel unused discards
- discard = NoneDiscard;
- // if (!endTag) discard = LFDiscard;
-
- processToken();
-
- cBufferPos = 0;
- tag = TagName;
- parseTag(src);
- }
- else if ( cc == '&' && !src.escaped())
- {
- ++src;
- if ( pending )
- addPending();
- discard = NoneDiscard;
- parseEntity(src, dest, true);
- }
- else if ( cc == '<' && !src.escaped())
- {
- tagStartLineno = lineno+src.lineCount();
- ++src;
- discard = NoneDiscard;
- startTag = true;
- }
- else if (( cc == '\n' ) || ( cc == '\r' ))
- {
- if (discard == SpaceDiscard)
- discard = NoneDiscard;
-
- if (discard == LFDiscard) {
- // Ignore one LF
- discard = NoneDiscard;
- }
- else if (discard == AllDiscard)
- {
- // Ignore
- }
- else
- {
- if (select && !script) {
- pending = LFPending;
- } else {
- if (pending)
- addPending();
- pending = LFPending;
- }
- }
-
- /* Check for MS-DOS CRLF sequence */
- if (cc == '\r')
- {
- skipLF = true;
- }
- ++src;
- }
- else if (( cc == ' ' ) || ( cc == '\t' ))
- {
- if(discard == LFDiscard)
- discard = NoneDiscard;
-
- if(discard == SpaceDiscard) {
- // Ignore one space
- discard = NoneDiscard;
- }
- else if(discard == AllDiscard)
- {
- // Ignore
- }
- else {
- if (select && !script) {
- if (!pending)
- pending = SpacePending;
- } else {
- if (pending)
- addPending();
- if (cc == ' ')
- pending = SpacePending;
- else
- pending = TabPending;
- }
- }
-
- ++src;
- }
- else
- {
- if (pending)
- addPending();
-
- discard = NoneDiscard;
- if ( pre )
- {
- prePos++;
- }
- *dest = *src;
- fixUpChar( *dest );
- ++dest;
- ++src;
- }
- }
-
- if (noMoreData && cachedScript.isEmpty() && !m_executingScript)
- end(); // this actually causes us to be deleted
-}
-
-void HTMLTokenizer::timerEvent( TQTimerEvent *e )
-{
- if ( e->timerId() == m_autoCloseTimer && cachedScript.isEmpty() ) {
- finish();
- }
-}
-
-void HTMLTokenizer::setAutoClose( bool b ) {
- killTimer( m_autoCloseTimer );
- m_autoCloseTimer = 0;
- if ( b )
- m_autoCloseTimer = startTimer(100);
-}
-
-void HTMLTokenizer::end()
-{
- if ( buffer == 0 ) {
- emit finishedParsing();
- return;
- }
-
- // parseTag is using the buffer for different matters
- if ( !tag )
- processToken();
-
- if(buffer)
- KHTML_DELETE_QCHAR_VEC(buffer);
-
- if(scriptCode)
- KHTML_DELETE_QCHAR_VEC(scriptCode);
-
- scriptCode = 0;
- scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
- buffer = 0;
- emit finishedParsing();
-}
-
-void HTMLTokenizer::finish()
-{
- if ( m_autoCloseTimer ) {
- killTimer( m_autoCloseTimer );
- m_autoCloseTimer = 0;
- }
- // do this as long as we don't find matching comment ends
- while((title || script || comment || server) && scriptCode && scriptCodeSize)
- {
- // we've found an unmatched comment start
- if (comment)
- brokenComments = true;
- else if (server)
- brokenServer = true;
- else if (script)
- brokenScript = true;
-
- checkScriptBuffer();
- scriptCode[ scriptCodeSize ] = 0;
- scriptCode[ scriptCodeSize + 1 ] = 0;
- int pos;
- TQString food;
- if (title || style || script)
- food.setUnicode(scriptCode, scriptCodeSize);
- else if (server) {
- food = "<";
- food += TQString(scriptCode, scriptCodeSize);
- }
- else {
- pos = TQConstString(scriptCode, scriptCodeSize).string().find('>');
- food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy
- }
- KHTML_DELETE_QCHAR_VEC(scriptCode);
- scriptCode = 0;
- scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
- if (script)
- scriptHandler();
-
- comment = title = server = script = false;
- if ( !food.isEmpty() )
- write(food, true);
- }
- // this indicates we will not receive any more data... but if we are waiting on
- // an external script to load, we can't finish parsing until that is done
- noMoreData = true;
- if (cachedScript.isEmpty() && !m_executingScript && !onHold)
- end(); // this actually causes us to be deleted
-}
-
-void HTMLTokenizer::processToken()
-{
- KJSProxy *jsProxy = view ? view->part()->jScript() : 0L;
- if (jsProxy)
- jsProxy->setEventHandlerLineno(tagStartLineno+1);
- if ( dest > buffer )
- {
-#if 0
- if(currToken.tid) {
- tqDebug( "unexpected token id: %d, str: *%s*", currToken.tid,TQConstString( buffer,dest-buffer ).string().latin1() );
- assert(0);
- }
-
-#endif
- currToken.text = new DOMStringImpl( buffer, dest - buffer );
- currToken.text->ref();
- currToken.tid = ID_TEXT;
- }
- else if(!currToken.tid) {
- currToken.reset();
- if (jsProxy)
- jsProxy->setEventHandlerLineno(lineno+src.lineCount()+1);
- return;
- }
-
- dest = buffer;
-
-#ifdef TOKEN_DEBUG
- TQString name = TQString( getTagName(currToken.tid) );
- TQString text;
- if(currToken.text)
- text = TQConstString(currToken.text->s, currToken.text->l).string();
-
- kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken.tid << endl;
- if (currToken.flat)
- kdDebug( 6036 ) << "Token is FLAT!" << endl;
- if(!text.isNull())
- kdDebug( 6036 ) << "text: \"" << text << "\"" << endl;
- unsigned long l = currToken.attrs ? currToken.attrs->length() : 0;
- if(l) {
- kdDebug( 6036 ) << "Attributes: " << l << endl;
- for (unsigned long i = 0; i < l; ++i) {
- NodeImpl::Id tid = currToken.attrs->idAt(i);
- DOMString value = currToken.attrs->valueAt(i);
- kdDebug( 6036 ) << " " << tid << " " << parser->doc()->getDocument()->getName(NodeImpl::AttributeId, tid).string()
- << "=\"" << value.string() << "\"" << endl;
- }
- }
- kdDebug( 6036 ) << endl;
-#endif
-
- // In some cases, parseToken() can cause javascript code to be executed
- // (for example, when setting an attribute that causes an event handler
- // to be created). So we need to protect against re-entrancy into the parser
- m_executingScript++;
-
- // pass the token over to the parser, the parser DOES NOT delete the token
- parser->parseToken(&currToken);
-
- m_executingScript--;
-
- if ( currToken.flat && currToken.tid != ID_TEXT && !parser->noSpaces() )
- discard = NoneDiscard;
-
- currToken.reset();
- if (jsProxy)
- jsProxy->setEventHandlerLineno(1);
-}
-
-
-HTMLTokenizer::~HTMLTokenizer()
-{
- reset();
- delete parser;
-}
-
-
-void HTMLTokenizer::enlargeBuffer(int len)
-{
- int newsize = kMax(size*2, size+len);
- int oldoffs = (dest - buffer);
-
- buffer = KHTML_REALLOC_QCHAR_VEC(buffer, newsize);
- dest = buffer + oldoffs;
- size = newsize;
-}
-
-void HTMLTokenizer::enlargeScriptBuffer(int len)
-{
- int newsize = kMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len);
- scriptCode = KHTML_REALLOC_QCHAR_VEC(scriptCode, newsize);
- scriptCodeMaxSize = newsize;
-}
-
-void HTMLTokenizer::notifyFinished(CachedObject* /*finishedObj*/)
-{
- assert(!cachedScript.isEmpty());
- bool done = false;
- while (!done && cachedScript.head()->isLoaded()) {
-
- kdDebug( 6036 ) << "Finished loading an external script" << endl;
-
- CachedScript* cs = cachedScript.dequeue();
- DOMString scriptSource = cs->script();
-#ifdef TOKEN_DEBUG
- kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl;
-#endif
- setSrc(TokenizerString());
-
- // make sure we forget about the script before we execute the new one
- // infinite recursion might happen otherwise
- TQString cachedScriptUrl( cs->url().string() );
- cs->deref(this);
-
- scriptExecution( scriptSource.string(), cachedScriptUrl );
-
- done = cachedScript.isEmpty();
-
- // 'script' is true when we are called synchronously from
- // scriptHandler(). In that case scriptHandler() will take care
- // of 'scriptOutput'.
- if ( !script ) {
- while (pendingQueue.count() > 1) {
- TokenizerString t = pendingQueue.pop();
- pendingQueue.top().prepend( t );
- }
- if (done) {
- write(pendingQueue.pop(), false);
- }
- // we might be deleted at this point, do not
- // access any members.
- }
- }
-}
-
-bool HTMLTokenizer::isWaitingForScripts() const
-{
- return cachedScript.count();
-}
-
-bool HTMLTokenizer::isExecutingScript() const
-{
- return (m_executingScript > 0);
-}
-
-void HTMLTokenizer::setSrc(const TokenizerString& source)
-{
- lineno += src.lineCount();
- src = source;
- src.resetLineCount();
-}
-
-void HTMLTokenizer::setOnHold(bool _onHold)
-{
- if (onHold == _onHold) return;
- onHold = _onHold;
-}
-