summaryrefslogtreecommitdiffstats
path: root/khtml/html/htmltokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'khtml/html/htmltokenizer.h')
-rw-r--r--khtml/html/htmltokenizer.h358
1 files changed, 0 insertions, 358 deletions
diff --git a/khtml/html/htmltokenizer.h b/khtml/html/htmltokenizer.h
deleted file mode 100644
index 10ef76176..000000000
--- a/khtml/html/htmltokenizer.h
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- This file is part of the KDE libraries
-
- Copyright (C) 1997 Martin Jones ([email protected])
- (C) 1997 Torben Weis ([email protected])
- (C) 1998 Waldo Bastian ([email protected])
- (C) 2001 Dirk Mueller ([email protected])
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public License
- along with this library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
-*/
-//----------------------------------------------------------------------------
-//
-// KDE HTML Widget -- Tokenizers
-
-#ifndef HTMLTOKENIZER_H
-#define HTMLTOKENIZER_H
-
-#include <tqstring.h>
-#include <tqobject.h>
-#include <tqptrqueue.h>
-
-#include "misc/loader_client.h"
-#include "misc/htmltags.h"
-#include "misc/stringit.h"
-#include "xml/dom_stringimpl.h"
-#include "xml/xml_tokenizer.h"
-#include "xml/dom_elementimpl.h"
-#include "xml/dom_docimpl.h"
-
-class KCharsets;
-class KHTMLView;
-
-namespace DOM {
- class DocumentImpl;
- class DocumentFragmentImpl;
-}
-
-namespace khtml {
- class CachedScript;
- class KHTMLParser;
-
- /**
- * @internal
- * represents one HTML tag. Consists of a numerical id, and the list
- * of attributes. Can also represent text. In this case the id = 0 and
- * text contains the text.
- */
- class Token
- {
- public:
- Token() {
- tid = 0;
- attrs = 0;
- text = 0;
- flat = false;
- //tqDebug("new token, creating %08lx", attrs);
- }
- ~Token() {
- if(attrs) attrs->deref();
- if(text) text->deref();
- }
- void addAttribute(DocumentImpl* doc, TQChar* buffer, const TQString& attrName, const DOMString& v)
- {
- DOMStringImpl *value = 0;
- NodeImpl::Id tid = 0;
- if(buffer->unicode()) {
- tid = buffer->unicode();
- value = v.implementation();
- }
- else if ( !attrName.isEmpty() && attrName != "/" ) {
- tid = doc->getId(NodeImpl::AttributeId, DOMString(attrName).implementation(), false, true);
- value = v.implementation();
- }
-
- if (value && tid) {
- if(!attrs) {
- attrs = new DOM::NamedAttrMapImpl(0);
- attrs->ref();
- }
- if (!attrs->getValue(tid))
- attrs->setValue(tid,value);
- }
- }
- void reset()
- {
- if(attrs) {
- attrs->deref();
- attrs = 0;
- }
- tid = 0;
- if(text) {
- text->deref();
- text = 0;
- }
- flat = false;
- }
- DOM::NamedAttrMapImpl* attrs;
- DOMStringImpl* text;
- ushort tid;
- bool flat;
- };
-
-// The count of spaces used for each tab.
-#define TAB_SIZE 8
-
-//-----------------------------------------------------------------------------
-
-class HTMLTokenizer : public Tokenizer, public CachedObjectClient
-{
- friend class KHTMLParser;
-public:
- HTMLTokenizer(DOM::DocumentImpl *, KHTMLView * = 0);
- HTMLTokenizer(DOM::DocumentImpl *, DOM::DocumentFragmentImpl *frag);
- virtual ~HTMLTokenizer();
-
- void begin();
- void write( const khtml::TokenizerString &str, bool appendData );
- void end();
- void finish();
- void timerEvent( TQTimerEvent *e );
- virtual void setOnHold(bool _onHold);
- void abort() { m_abort = true; }
- virtual void setAutoClose(bool b=true);
- virtual bool isWaitingForScripts() const;
- virtual bool isExecutingScript() const;
-
-protected:
- void reset();
- void addPending();
- void processToken();
- void processListing(khtml::TokenizerString list);
-
- void parseComment(khtml::TokenizerString &str);
- void parseServer(khtml::TokenizerString &str);
- void parseText(khtml::TokenizerString &str);
- void parseListing(khtml::TokenizerString &str);
- void parseSpecial(khtml::TokenizerString &str);
- void parseTag(khtml::TokenizerString &str);
- void parseEntity(khtml::TokenizerString &str, TQChar *&dest, bool start = false);
- void parseProcessingInstruction(khtml::TokenizerString &str);
- void scriptHandler();
- void scriptExecution(const TQString& script, const TQString& scriptURL = TQString::null, int baseLine = 0);
- void setSrc(const TokenizerString& source);
-
- // check if we have enough space in the buffer.
- // if not enlarge it
- inline void checkBuffer(int len = 10)
- {
- if ( (dest - buffer) > size-len )
- enlargeBuffer(len);
- }
- inline void checkScriptBuffer(int len = 10)
- {
- if ( scriptCodeSize + len >= scriptCodeMaxSize )
- enlargeScriptBuffer(len);
- }
-
- void enlargeBuffer(int len);
- void enlargeScriptBuffer(int len);
-
- // from CachedObjectClient
- void notifyFinished(khtml::CachedObject *finishedObj);
-
-protected:
- // Internal buffers
- ///////////////////
- TQChar *buffer;
- TQChar *dest;
-
- khtml::Token currToken;
-
- // the size of buffer
- int size;
-
- // Tokenizer flags
- //////////////////
- // are we in quotes within a html tag
- enum
- {
- NoQuote = 0,
- SingleQuote,
- DoubleQuote
- } tquote;
-
- enum
- {
- NonePending = 0,
- SpacePending,
- LFPending,
- TabPending
- } pending;
-
- enum
- {
- NoneDiscard = 0,
- SpaceDiscard, // Discard spaces after '=' within tags
- LFDiscard, // Discard line breaks immediately after start-tags
- AllDiscard // discard all spaces, LF's etc until next non white char
- } discard;
-
- // Discard the LF part of CRLF sequence
- bool skipLF;
-
- // Flag to say that we have the '<' but not the character following it.
- bool startTag;
-
- // Flag to say, we are just parsing a tag, meaning, we are in the middle
- // of <tag...
- enum {
- NoTag = 0,
- TagName,
- SearchAttribute,
- AttributeName,
- SearchEqual,
- SearchValue,
- QuotedValue,
- Value,
- SearchEnd
- } tag;
-
- // Are we in a &... character entity description?
- enum {
- NoEntity = 0,
- SearchEntity,
- NumericSearch,
- Hexadecimal,
- Decimal,
- EntityName,
- SearchSemicolon
- } Entity;
-
- // are we in a <script> ... </script> block
- bool script;
-
- TQChar EntityChar;
-
- // Are we in a <pre> ... </pre> block
- bool pre;
-
- // if 'pre == true' we track in which column we are
- int prePos;
-
- // Are we in a <style> ... </style> block
- bool style;
-
- // Are we in a <select> ... </select> block
- bool select;
-
- // Are we in a <xmp> ... </xmp> block
- bool xmp;
-
- // Are we in a <title> ... </title> block
- bool title;
-
- // Are we in plain textmode ?
- bool plaintext;
-
- // XML processing instructions. Ignored at the moment
- bool processingInstruction;
-
- // Area we in a <!-- comment --> block
- bool comment;
-
- // Are we in a <textarea> ... </textarea> block
- bool textarea;
-
- // was the previous character escaped ?
- bool escaped;
-
- // are we in a server includes statement?
- bool server;
-
- bool brokenServer;
-
- bool brokenScript;
-
- // name of an unknown attribute
- TQString attrName;
-
- // Used to store the code of a srcipting sequence
- TQChar *scriptCode;
- // Size of the script sequenze stored in scriptCode
- int scriptCodeSize;
- // Maximal size that can be stored in scriptCode
- int scriptCodeMaxSize;
- // resync point of script code size
- int scriptCodeResync;
-
- // Stores characters if we are scanning for a string like "</script>"
- TQChar searchBuffer[ 10 ];
- // Counts where we are in the string we are scanning for
- int searchCount;
- // The string we are searching for
- const TQChar *searchFor;
- // the stopper string
- const char* searchStopper;
- // the stopper len
- int searchStopperLen;
- // if no more data is coming, just parse what we have (including ext scripts that
- // may be still downloading) and finish
- bool noMoreData;
- // URL to get source code of script from
- TQString scriptSrc;
- TQString scriptSrcCharset;
- bool javascript;
- // the HTML code we will parse after the external script we are waiting for has loaded
- TokenizerQueue pendingQueue;
- // true if we are executing a script while parsing a document. This causes the parsing of
- // the output of the script to be postponed until after the script has finished executing
- int m_executingScript;
- TQPtrQueue<khtml::CachedScript> cachedScript;
- // you can pause the tokenizer if you need to display a dialog or something
- bool onHold;
- // you can ask the tokenizer to abort the current write() call, e.g. to redirect somewhere else
- bool m_abort;
-
- // if we found one broken comment, there are most likely others as well
- // store a flag to get rid of the O(n^2) behavior in such a case.
- bool brokenComments;
- // current line number
- int lineno;
- // line number at which the current <script> started
- int scriptStartLineno;
- int tagStartLineno;
- // autoClose mode is used when the tokenizer was created by a script document.writing
- // on an already loaded document
- int m_autoCloseTimer;
-
-#define CBUFLEN 1024
- char cBuffer[CBUFLEN+2];
- unsigned int cBufferPos;
- unsigned int entityLen;
-
- khtml::TokenizerString src;
-
- KCharsets *charsets;
- KHTMLParser *parser;
-
- KHTMLView *view;
-};
-
-} // namespace
-
-#endif // HTMLTOKENIZER
-