 * This file is part of the DOM implementation for KDE.
 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
 * Copyright (C) 2003 Apple Computer, Inc.
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * Library General Public License for more details.
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.

#include "xml_tokenizer.h"
#include "xml/dom_docimpl.h"
#include "xml/dom_textimpl.h"
#include "xml/dom_xmlimpl.h"
#include "html/html_tableimpl.h"
#include "html/html_headimpl.h"
#include "rendering/render_object.h"
#include "misc/htmltags.h"
#include "misc/htmlattrs.h"
#include "misc/loader.h"

#include "tdehtmlview.h"
#include "tdehtml_part.h"
#include <tqvariant.h>
#include <kdebug.h>
#include <tdelocale.h>

using namespace DOM;
using namespace tdehtml;

    : TQXmlInputSource(), m_pos( 0 ), m_unicode( 0 ),
      m_finished( false )

void XMLIncrementalSource::fetchData()
    //just a dummy to overwrite default behavior

TQChar XMLIncrementalSource::next()
    if ( m_finished )
        return TQXmlInputSource::EndOfDocument;
    else if ( m_data.length() <= m_pos )
        return TQXmlInputSource::EndOfData;
        return m_unicode[m_pos++];

void XMLIncrementalSource::setData( const TQString& str )
    m_data = str;
    m_unicode = m_data.unicode();
    m_pos = 0;
    if ( !str.isEmpty() )
        m_finished = false;
void XMLIncrementalSource::setData( const TQByteArray& data )
    setData( fromRawData( data, true ) );

void XMLIncrementalSource::appendXML( const TQString& str )
    m_data += str;
    m_unicode = m_data.unicode();

TQString XMLIncrementalSource::data()
    return m_data;

void XMLIncrementalSource::setFinished( bool finished )
    m_finished = finished;

XMLHandler::XMLHandler(DocumentImpl *_doc, TDEHTMLView *_view)
    : errorLine(0)
    m_doc = _doc;
    m_view = _view;
    pushNode( _doc );


void XMLHandler::pushNode( NodeImpl *node )
    m_nodes.push( node );

NodeImpl *XMLHandler::popNode()
    return m_nodes.pop();

NodeImpl *XMLHandler::currentNode() const
    return m_nodes.current();

TQString XMLHandler::errorProtocol()
    return errorProt;

bool XMLHandler::startDocument()
    // at the beginning of parsing: do some initialization
    errorProt = "";
    state = StateInit;

    return true;

bool XMLHandler::startPrefixMapping(const TQString& prefix, const TQString& uri)
    return true;

bool XMLHandler::endPrefixMapping(const TQString& prefix)
    TQValueStack<TQString>& stack = namespaceInfo[prefix];
    if (stack.isEmpty())
    return true;

void XMLHandler::fixUpNSURI(TQString& uri, const TQString& qname)
    /* QXml does not resolve the namespaces of attributes in the same 
       tag that preceed the xmlns declaration. This fixes up that case */
    if (uri.isEmpty() && qname.find(':') != -1) {
        TQXmlNamespaceSupport ns;
        TQString localName, prefix;
        ns.splitName(qname, prefix, localName);
        if (namespaceInfo.contains(prefix)) {
            uri = namespaceInfo[prefix].top();

bool XMLHandler::startElement( const TQString& namespaceURI, const TQString& /*localName*/,
                               const TQString& qName, const TQXmlAttributes& atts )
    if (currentNode()->nodeType() == Node::TEXT_NODE)

    DOMString nsURI;
    if (!namespaceURI.isNull())
        nsURI = DOMString(namespaceURI);
        // No namespace declared, default to the no namespace
        nsURI = DOMString("");
    ElementImpl *newElement = m_doc->createElementNS(nsURI,qName);
    if (!newElement)
        return false;
    int i;
    for (i = 0; i < atts.length(); i++) {
        int exceptioncode = 0;
        TQString uriString = atts.uri(i);
        TQString qnString  = atts.qName(i);
        fixUpNSURI(uriString, qnString);
        DOMString uri(uriString);
        DOMString qn(qnString);
        DOMString val(atts.value(i));
        newElement->setAttributeNS(uri, qn, val, exceptioncode);
        if (exceptioncode) // exception setting attributes
            return false;

    if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT))
        static_cast<HTMLScriptElementImpl *>(newElement)->setCreatedByParser(true);

    //this is tricky. in general the node doesn't have to attach to the one it's in. as far
    //as standards go this is wrong, but there's literally thousands of documents where
    //we see <p><ul>...</ul></p>. the following code is there for those cases.
    //when we can't attach to the currently holding us node we try to attach to its parent
    bool attached = false;
    for ( NodeImpl *current = currentNode(); current; current = current->parent() ) {
        attached = current->addChild( newElement );
        if ( attached )
    if (attached) {
        if (m_view && !newElement->attached() && !m_doc->hasPendingSheets())
        pushNode( newElement );
        return true;
    else {
        delete newElement;
        return false;

    // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
    // single object implementing the Text interface that is the only child of the element."... do we
    // need to ensure that empty elements always have an empty text child?

bool XMLHandler::endElement( const TQString& /*namespaceURI*/, const TQString& /*localName*/, const TQString& /*qName*/ )
    if (currentNode()->nodeType() == Node::TEXT_NODE)

    NodeImpl *node = popNode();
    if ( node ) {
        while ( currentNode()  && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
    } else
        return false;

    return true;

bool XMLHandler::startCDATA()
    if (currentNode()->nodeType() == Node::TEXT_NODE)

    NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl(""));
    if (currentNode()->addChild(newNode)) {
        if (m_view && !newNode->attached() && !m_doc->hasPendingSheets())
        pushNode( newNode );
        return true;
    else {
        delete newNode;
        return false;


bool XMLHandler::endCDATA()
    Q_ASSERT( currentNode() );
    return currentNode();

bool XMLHandler::characters( const TQString& ch )
    if (currentNode()->nodeType() == Node::TEXT_NODE ||
        currentNode()->nodeType() == Node::CDATA_SECTION_NODE ||
        enterText()) {
        int exceptioncode = 0;
        if (exceptioncode)
            return false;
        return true;
    else {
        // Don't worry about white-space violating DTD
        if (ch.stripWhiteSpace().isEmpty()) return true;

        return false;


bool XMLHandler::comment(const TQString & ch)
    if (currentNode()->nodeType() == Node::TEXT_NODE)
    // ### handle exceptions
    currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.unicode(), ch.length())));
    return true;

bool XMLHandler::processingInstruction(const TQString &target, const TQString &data)
    if (currentNode()->nodeType() == Node::TEXT_NODE)
    // ### handle exceptions
    ProcessingInstructionImpl *pi =
        m_doc->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length()));
    return true;

TQString XMLHandler::errorString()
    // ### Make better error-messages
    return i18n("the document is not in the correct file format");

bool XMLHandler::fatalError( const TQXmlParseException& exception )
    errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" )
        .arg( exception.message() )
        .arg( exception.lineNumber() )
        .arg( exception.columnNumber() );

    errorLine = exception.lineNumber();
    errorCol = exception.columnNumber();

    return false;

bool XMLHandler::enterText()
    NodeImpl *newNode = m_doc->createTextNode("");
    if (currentNode()->addChild(newNode)) {
        pushNode( newNode );
        return true;
    else {
        delete newNode;
        return false;

void XMLHandler::exitText()
    if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() )

bool XMLHandler::attributeDecl(const TQString &/*eName*/, const TQString &/*aName*/, const TQString &/*type*/,
                               const TQString &/*valueDefault*/, const TQString &/*value*/)
    // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
    // value. When it does, we can store these somewhere and have default attributes on elements
    return true;

bool XMLHandler::externalEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
    // ### insert these too - is there anything special we have to do here?
    return true;

bool XMLHandler::internalEntityDecl(const TQString &name, const TQString &value)
    EntityImpl *e = new EntityImpl(m_doc,name);
    // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
    e->addChild(m_doc->createTextNode(new DOMStringImpl(value.unicode(), value.length())));
     if (m_doc->doctype())
    return true;

bool XMLHandler::notationDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
// ### FIXME
//     if (m_doc->document()->doctype()) {
//         NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
//         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
//     }
    return true;

bool XMLHandler::unparsedEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/,
                                    const TQString &/*systemId*/, const TQString &/*notationName*/)
    // ###
    return true;


XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, TDEHTMLView *_view)
    : m_handler(_doc,_view)
    m_doc = _doc;
    m_view = _view;
    m_scriptsIt = 0;
    m_cachedScript = 0;
    m_noErrors = true;
    m_reader.setContentHandler( &m_handler );
    m_reader.setLexicalHandler( &m_handler );
    m_reader.setErrorHandler( &m_handler );
    m_reader.setDeclHandler( &m_handler );
    m_reader.setDTDHandler( &m_handler );
    m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);

    if (m_scriptsIt)
        delete m_scriptsIt;
    if (m_cachedScript)

void XMLTokenizer::begin()
    // parse xml file
    m_reader.parse( &m_source, true );

void XMLTokenizer::write( const TokenizerString &str, bool appendData )
    if ( !m_noErrors && appendData )
    if ( appendData ) {
        m_source.appendXML( str.toString() );

    } else {
        m_source.setData( str.toString() );
    m_noErrors = m_reader.parseContinue();

void XMLTokenizer::end()
    m_source.setFinished( true );
    //if ( m_noErrors )
    //m_noErrors = m_reader.parseContinue();
    emit finishedParsing();

void XMLTokenizer::finish()
    m_source.setFinished( true );
    if (!m_noErrors) {
        // An error occurred during parsing of the code. Display an error page to the user (the DOM
        // tree is created manually and includes an excerpt from the code where the error is located)

        // ### for multiple error messages, display the code for each (can this happen?)

        // Clear the document
        int exceptioncode = 0;
        while (m_doc->hasChildNodes())

        TQString line, errorLocPtr;
        if ( m_handler.errorLine ) {
            TQString xmlCode = m_source.data();
            TQTextIStream stream(&xmlCode);
            for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
            line = stream.readLine();

            for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++)
                errorLocPtr += " ";
            errorLocPtr += "^";

        // Create elements for display
        DocumentImpl *doc = m_doc;
        NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
        NodeImpl   *body = doc->createElementNS(XHTML_NAMESPACE,"body");
        NodeImpl     *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
        NodeImpl       *headingText = doc->createTextNode(i18n("XML parsing error"));
        NodeImpl     *errorText = doc->createTextNode(m_handler.errorProtocol());
        NodeImpl     *hr = 0;
        NodeImpl     *pre = 0;
        NodeImpl     *lineText = 0;
        NodeImpl     *errorLocText = 0;
        if ( !line.isNull() ) {
            hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
            pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
            lineText = doc->createTextNode(line+"\n");
            errorLocText = doc->createTextNode(errorLocPtr);

        // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
        // fact we are using a known tag set)
        if ( body )
        if ( pre ) {

        // Close the renderers so that they update their display correctly
        // ### this should not be necessary, but requires changes in the rendering code...
        if ( pre ) pre->close();

        m_doc->recalcStyle( NodeImpl::Inherit );

    else {
        // Parsing was successful. Now locate all html <script> tags in the document and execute them
        // one by one
        m_scriptsIt = new TQPtrListIterator<HTMLScriptElementImpl>(m_scripts);


void XMLTokenizer::addScripts(NodeImpl *n)
    // Recursively go through the entire document tree, looking for html <script> tags. For each of these
    // that is found, add it to the m_scripts list from which they will be executed

    if (n->id() == ID_SCRIPT || n->id() == makeId(xhtmlNamespace, ID_SCRIPT)) {

    NodeImpl *child;
    for (child = n->firstChild(); child; child = child->nextSibling())

void XMLTokenizer::executeScripts()
    // Iterate through all of the html <script> tags in the document. For those that have a src attribute,
    // start loading the script and return (executeScripts() will be called again once the script is loaded
    // and continue where it left off). For scripts that don't have a src attribute, execute the code
    // inside the tag
    while (m_scriptsIt->current()) {
        DOMString scriptSrc = m_scriptsIt->current()->getAttribute(ATTR_SRC);
        TQString charset = m_scriptsIt->current()->getAttribute(ATTR_CHARSET).string();

        if (!scriptSrc.isEmpty()) {
            // we have a src attribute
            m_cachedScript = m_doc->docLoader()->requestScript(scriptSrc, charset);
            if (m_cachedScript) {
                m_cachedScript->ref(this); // will call executeScripts() again if already cached
        else {
            // no src attribute - execute from contents of tag
            TQString scriptCode = "";
            NodeImpl *child;
            for (child = m_scriptsIt->current()->firstChild(); child; child = child->nextSibling()) {
                if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) &&
                     static_cast<TextImpl*>(child)->string() )
                    scriptCode += TQConstString(static_cast<TextImpl*>(child)->string()->s,
            // the script cannot do document.write until we support incremental parsing
            // ### handle the case where the script deletes the node or redirects to
            // another page, etc. (also in notifyFinished())
            // ### the script may add another script node after this one which should be executed
            if (m_view) {
                m_view->part()->executeScript(DOM::Node(), scriptCode);

    // All scripts have finished executing, so calculate the style for the document and close
    // the last element

    // We are now finished parsing

void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
    // This is called when a script has finished loading that was requested from executeScripts(). We execute
    // the script, and then call executeScripts() again to continue iterating through the list of scripts in
    // the document
    if (finishedObj == m_cachedScript) {
        DOMString scriptSource = m_cachedScript->script();
        m_cachedScript = 0;
        if (m_view)
            m_view->part()->executeScript(DOM::Node(), scriptSource.string());

bool XMLTokenizer::isWaitingForScripts() const
    return m_cachedScript != 0;

#include "xml_tokenizer.moc"