// -*- c-basic-offset: 2 -*-
/*
 *  This file is part of the KDE libraries
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#ifndef _KJSLEXER_H_
#define _KJSLEXER_H_

#include "ustring.h"


namespace KJS {

  class Identifier;

  class RegExp;

  class Lexer {
  public:
    Lexer();
    ~Lexer();
    static Lexer *curr();

    void setCode(const UChar *c, unsigned int len);
    int lex();

    int lineNo() const { return yylineno + 1; }

    bool prevTerminator() const { return terminator; }

    enum State { Start,
                 IdentifierOrKeyword,
                 Identifier,
                 InIdentifierOrKeyword,
                 InIdentifier,
                 InIdentifierUnicodeEscapeStart,
                 InIdentifierUnicodeEscape,
                 InSingleLineComment,
                 InMultiLineComment,
                 InNum,
                 InNum0,
                 InHex,
                 InOctal,
                 InDecimal,
                 InExponentIndicator,
                 InExponent,
                 Hex,
                 Octal,
                 Number,
                 String,
                 Eof,
                 InString,
                 InEscapeSequence,
                 InHexEscape,
                 InUnicodeEscape,
                 Other,
                 Bad };

    bool scanRegExp();
    UString pattern, flags;
    bool hadError() const { return foundBad; }

    static bool isWhiteSpace(unsigned short c);
    static bool isIdentLetter(unsigned short c);
    static bool isDecimalDigit(unsigned short c);
    static bool isHexDigit(unsigned short c);
    static bool isOctalDigit(unsigned short c);

  private:
    int yylineno;
    bool done;
    char *buffer8;
    UChar *buffer16;
    unsigned int size8, size16;
    unsigned int pos8, pos16;
    bool terminator;
    bool restrKeyword;
    // encountered delimiter like "'" and "}" on last run
    bool delimited;
    bool skipLF;
    bool skipCR;
    bool convertNextIdentifier;
    int stackToken;
    int lastToken;
    bool foundBad;

    State state;
    void setDone(State s);
    unsigned int pos;
    void shift(unsigned int p);
    void nextLine();
    int lookupKeyword(const char *);

    int matchPunctuator(unsigned short c1, unsigned short c2,
                        unsigned short c3, unsigned short c4);
    unsigned short singleEscape(unsigned short c) const;
    unsigned short convertOctal(unsigned short c1, unsigned short c2,
                                unsigned short c3) const;
  public:
    static unsigned char convertHex(unsigned short c1);
    static unsigned char convertHex(unsigned short c1, unsigned short c2);
    static UChar convertUnicode(unsigned short c1, unsigned short c2,
                                unsigned short c3, unsigned short c4);

#ifdef KJS_DEBUG_MEM
    /**
     * Clear statically allocated resources
     */
    static void globalClear();
#endif

    void doneParsing();

  private:

    void record8(unsigned short c);
    void record16(int c);
    void record16(UChar c);

    KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos);
    UString *makeUString(UChar *buffer, unsigned int pos);

    const UChar *code;
    unsigned int length;
    int yycolumn;
#ifndef KJS_PURE_ECMA
    int bol;     // begin of line
#endif

    // current and following unicode characters (int to allow for -1 for end-of-file marker)
    int current, next1, next2, next3;

    UString **strings;
    unsigned int numStrings;
    unsigned int stringsCapacity;

    KJS::Identifier **identifiers;
    unsigned int numIdentifiers;
    unsigned int identifiersCapacity;

    // for future extensions
    class LexerPrivate;
    LexerPrivate *priv;
  };

} // namespace

#endif