18 files changed, 3662 insertions, 0 deletions
diff --git a/libksieve/Makefile.am b/libksieve/Makefile.am
new file mode 100644
index 000000000..fa7ff1a32
--- /dev/null
+++ b/libksieve/Makefile.am
@@ -0,0 +1,17 @@
+SUBDIRS = ksieve shared parser . tests
+
+INCLUDES = $(all_includes)
+
+lib_LTLIBRARIES = libksieve.la
+
+CLEANFILES = dummy.cpp
+
+libksieve_la_SOURCES = dummy.cpp
+libksieve_la_LIBADD = parser/libksieve_parser.la
+libksieve_la_LDFLAGS = $(all_libraries) -version-info 0:0:0 -no-undefined
+
+dummy.cpp:
+	echo > dummy.cpp
+
+messages:
+	$(XGETTEXT) shared/*.cpp parser/*.cpp impl/*.h ksieve/*.h -o $(podir)/libksieve.pot
diff --git a/libksieve/impl/lexer.h b/libksieve/impl/lexer.h
new file mode 100644
index 000000000..0eba0e585
--- /dev/null
+++ b/libksieve/impl/lexer.h
@@ -0,0 +1,189 @@
+/*  -*- c++ -*-
+    impl/lexer.h
+
+    Internal header file. Subject to change without notice. DO NOT USE.
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_IMPL_LEXER_H__
+#define __KSIEVE_IMPL_LEXER_H__
+
+#include <ksieve/lexer.h>
+#include <ksieve/error.h>
+
+#include <qvaluestack.h>
+#include <qcstring.h>
+
+namespace KSieve {
+
+  class Lexer::Impl {
+  public:
+    Impl( const char * scursor, const char * send, int options );
+
+    bool ignoreComments() const {
+      return mIgnoreComments;
+    }
+
+    bool ignoreLineFeeds() const {
+      return mIgnoreLF;
+    }
+
+    const Error & error() const {
+      return mState.error;
+    }
+
+    bool atEnd() const {
+      return mState.cursor >= mEnd;
+    }
+
+    int column() const {
+      return mState.cursor - mState.beginOfLine;
+    }
+
+    int line() const {
+      return mState.line;
+    }
+
+    void save() {
+      mStateStack.push( mState );
+    }
+
+    void restore() {
+      mState = mStateStack.pop();
+    }
+
+    Lexer::Token nextToken( QString & tokenValue );
+
+  private:
+    /** Cursor must be positioned on the \r or the \n. */
+    bool eatCRLF();
+
+    /** Cursor must be positioned after the opening hash (#). If
+	parsing is successful, cursor is positioned behind the CRLF
+	that ended the comment's line (or past the end). */
+    bool parseHashComment( QString & result, bool reallySave=false );
+    
+    /** Cursor must be positioned after the opening slash-asterisk */
+    bool parseBracketComment( QString & result, bool reallySave=false );
+    
+    /** Cursor must be positioned on the opening '/'or '#' */
+    bool parseComment( QString & result, bool reallySave=false );
+
+    /** Eats whitespace, but not comments */
+    bool eatWS();
+
+    /** Eats comments and whitespace */
+    bool eatCWS();
+
+    /** Cursor must be positioned on the first character */
+    bool parseIdentifier( QString & result );
+
+    /** Cursor must be positioned after the initial ':' */
+    bool parseTag( QString & result );
+
+    /** Cursor must be positioned on the first digit */
+    bool parseNumber( QString & result );
+
+    /** Cursor must be positioned after the "text:" token. */
+    bool parseMultiLine( QString & result );
+
+    /** Cursor must be positioned after the initial " */
+    bool parseQuotedString( QString & result );
+
+    struct State {
+      State( const char * s=0 )
+	: cursor( s ), line( 0 ), beginOfLine( s ), error() {}
+      const char * cursor;
+      int line;
+      const char * beginOfLine;
+      Error error;
+    } mState;
+
+    const char * const mEnd;
+    const bool mIgnoreComments : 1;
+    const bool mIgnoreLF : 1;
+    QValueStack<State> mStateStack;
+
+    const char * beginOfLine() const { return mState.beginOfLine; }
+
+    int _strnicmp( const char * left, const char * right, size_t len ) const {
+      return charsLeft() >= len ? qstrnicmp( left, right, len ) : 1 ;
+    }
+
+    void clearErrors() { mState.error = Error(); }
+
+    unsigned int charsLeft() const {
+      return mEnd - mState.cursor < 0 ? 0 : mEnd - mState.cursor ;
+    }
+    void makeError( Error::Type e ) {
+      makeError( e, line(), column() );
+    }
+    void makeError( Error::Type e, int errorLine, int errorCol ) {
+      mState.error = Error( e, errorLine, errorCol );
+    }
+    void makeIllegalCharError( char ch );
+    void makeIllegalCharError() {
+      makeIllegalCharError( *mState.cursor );
+    }
+    /** Defines the current char to end a line.
+	Warning: increases @p mCursor!
+    **/
+    void newLine() {
+      ++mState.line;
+      mState.beginOfLine = ++mState.cursor;
+    }
+    bool skipTo( char c, bool acceptEnd=false ) {
+      while( !atEnd() ) {
+	if ( *mState.cursor == '\n' || *mState.cursor == '\r' ) {
+	  if ( !eatCRLF() ) return false;
+	} else if ( *mState.cursor == c ) {
+	  return true;
+	} else {
+	  ++mState.cursor;
+	}
+      }
+      return acceptEnd;
+    }
+    bool skipToCRLF( bool acceptEnd=true ) {
+      for ( ; !atEnd() ; ++mState.cursor )
+	if ( *mState.cursor == '\n' || *mState.cursor == '\r' )
+	  return eatCRLF();
+      return acceptEnd;
+    }
+    void skipTo8BitEnd() {
+      while ( !atEnd() && (signed char)*mState.cursor < 0 )
+	++mState.cursor;
+    }
+    void skipToDelim();
+  };
+
+}
+
+#endif // __KSIEVE_IMPL_LEXER_H__
diff --git a/libksieve/impl/parser.h b/libksieve/impl/parser.h
new file mode 100644
index 000000000..736678b43
--- /dev/null
+++ b/libksieve/impl/parser.h
@@ -0,0 +1,118 @@
+/*  -*- c++ -*-
+    impl/parser.h
+
+    Internal header file. Subject to change without notice. DO NOT USE.
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_IMPL_PARSER_H__
+#define __KSIEVE_IMPL_PARSER_H__
+
+#include <ksieve/parser.h>
+
+#include <ksieve/error.h>
+#include <ksieve/lexer.h>
+#include <impl/lexer.h>
+
+#include <ksieve/scriptbuilder.h>
+
+
+namespace KSieve {
+
+  class Parser::Impl {
+    friend class Parser;
+  private:
+    Impl( const char * scursor, const char * const send, int options=0 );
+
+    void setScriptBuilder( ScriptBuilder * builder ) {
+      mBuilder = builder;
+    }
+    ScriptBuilder * scriptBuilder() const {
+      return mBuilder;
+    }
+
+    bool parse();
+
+    const Error & error() const { return mError == Error::None ? lexer.error() : mError ; }
+
+    bool parseCommandList();
+
+    bool parseCommand();
+
+    bool parseArgumentList();
+
+    bool parseArgument();
+
+    bool parseTestList();
+
+    bool parseTest();
+
+    bool parseBlock();
+
+    bool parseStringList();
+
+    bool parseNumber();
+
+
+    Lexer::Token token() const { return mToken; }
+    QString tokenValue() const { return mTokenValue; }
+
+    bool atEnd() const {
+      return !mToken && lexer.atEnd() ;
+    }
+    bool obtainToken();
+    void consumeToken() {
+      mToken = Lexer::None;
+      mTokenValue = QString::null;
+    }
+    void makeError( Error::Type e, int line, int col ) {
+      mError = Error( e, line, col );
+      if ( scriptBuilder() )
+	scriptBuilder()->error( mError );
+    }
+    void makeError( Error::Type e ) {
+      makeError( e, lexer.line(), lexer.column() );
+    }
+    void makeUnexpectedTokenError( Error::Type e ) {
+      makeError( e ); // ### save wrong token...
+    }
+    bool isArgumentToken() const;
+    bool isStringToken() const;
+
+    Error mError;
+    Lexer::Token mToken;
+    QString mTokenValue;
+    Lexer::Impl lexer;
+    ScriptBuilder * mBuilder;
+  };
+
+}
+
+#endif // __KSIEVE_IMPL_PARSER_H__
diff --git a/libksieve/impl/utf8validator.h b/libksieve/impl/utf8validator.h
new file mode 100644
index 000000000..aa10bad2a
--- /dev/null
+++ b/libksieve/impl/utf8validator.h
@@ -0,0 +1,42 @@
+/*  -*- c++ -*-
+    impl/utf8validator.h
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_UTF8VALIDATOR_H__
+#define __KSIEVE_UTF8VALIDATOR_H__
+
+namespace KSieve {
+
+  extern bool isValidUtf8( const char * s, unsigned int len );
+
+} // namespace KSieve
+
+#endif // __KSIEVE_ERROR_H__
diff --git a/libksieve/ksieve/Makefile.am b/libksieve/ksieve/Makefile.am
new file mode 100644
index 000000000..9d086dd2a
--- /dev/null
+++ b/libksieve/ksieve/Makefile.am
@@ -0,0 +1,8 @@
+# here are header files that are part of the public api:
+ksievedir = $(includedir)/ksieve
+
+ksieve_HEADERS = \
+	error.h \
+	lexer.h \
+	parser.h \
+	scriptbuilder.h
diff --git a/libksieve/ksieve/error.h b/libksieve/ksieve/error.h
new file mode 100644
index 000000000..2dbed32c8
--- /dev/null
+++ b/libksieve/ksieve/error.h
@@ -0,0 +1,139 @@
+/*  -*- c++ -*-
+    ksieve/error.h
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_ERROR_H__
+#define __KSIEVE_ERROR_H__
+
+#include <qstring.h>
+
+#include <kdepimmacros.h>
+
+#ifdef None // X headers
+#  undef None
+#endif
+
+namespace KSieve {
+
+  class KDE_EXPORT Error {
+  public:
+    enum Type {
+      None = 0,
+      Custom,
+      // parse (well-formedness in XML speak) errors:
+      FirstParseError,
+
+      CRWithoutLF = FirstParseError,
+      SlashWithoutAsterisk,
+      IllegalCharacter,
+      UnexpectedCharacter,
+      NoLeadingDigits,
+      NonCWSAfterTextColon,
+
+      NumberOutOfRange,
+      InvalidUTF8,
+
+      UnfinishedBracketComment,
+      PrematureEndOfMultiLine,
+      PrematureEndOfQuotedString,
+      PrematureEndOfStringList,
+      PrematureEndOfTestList,
+      PrematureEndOfBlock,
+      MissingWhitespace,
+      MissingSemicolonOrBlock,
+
+      ExpectedBlockOrSemicolon,
+      ExpectedCommand,
+      ConsecutiveCommasInStringList,
+      ConsecutiveCommasInTestList,
+      MissingCommaInTestList,
+      MissingCommaInStringList,
+      NonStringInStringList,
+      NonCommandInCommandList,
+      NonTestInTestList,
+      LastParseError = NonTestInTestList,
+      // validity errors:
+      FirstValidityError,
+      RequireNotFirst = FirstValidityError, // rfc3028, 3.2
+      RequireMissingForCommand,
+      RequireMissingForTest,
+      RequireMissingForComparator,
+      UnsupportedCommand,
+      UnsupportedTest,
+      UnsupportedComparator,
+      TestNestingTooDeep,  // site policy
+      BlockNestingTooDeep, // site policy
+      InvalidArgument,
+      ConflictingArguments, // e.g. rfc3028, 2.7.{1,3}
+      ArgumentsRepeated, // similar to ConflictingArguments, e.g. :is :is
+      CommandOrderingConstraintViolation, // e.g. else w/o if, rfc3028, 3.1
+      LastValidityError = CommandOrderingConstraintViolation,
+      // runtime errors:
+      FirstRuntimeError,
+      IncompatibleActionsRequested = FirstRuntimeError,
+      MailLoopDetected,
+      TooManyActions,
+      LastRuntimeError = TooManyActions
+    };
+
+    static const char * typeToString( Type type );
+
+    Error( Type type=None,
+	   const QString & s1=QString::null, const QString & s2=QString::null,
+	   int line=-1, int col=-1 )
+      : mType( type ), mLine( line ), mCol( col ),
+        mStringOne( s1 ), mStringTwo( s2 ) {}
+    Error( Type type, int line, int col )
+      : mType( type ), mLine( line ), mCol( col ) {}
+
+    QString asString() const;
+
+    /** So you can write <pre>if( error() )</pre> with e.g. @ref Lexer */
+    operator bool() const {
+      return type() != None;
+    }
+
+    Type type() const { return mType; }
+    int line() const { return mLine; }
+    int column() const { return mCol; }
+    QString firstString() const { return mStringOne; }
+    QString secondString() const { return mStringTwo; }
+
+  protected:
+    Type mType;
+    int mLine;
+    int mCol;
+    QString mStringOne, mStringTwo;
+  };
+
+} // namespace KSieve
+
+#endif // __KSIEVE_ERROR_H__
diff --git a/libksieve/ksieve/lexer.h b/libksieve/ksieve/lexer.h
new file mode 100644
index 000000000..d5bb1fc3b
--- /dev/null
+++ b/libksieve/ksieve/lexer.h
@@ -0,0 +1,108 @@
+/*  -*- c++ -*-
+    ksieve/lexer.h
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_LEXER_H__
+#define __KSIEVE_LEXER_H__
+
+class QString;
+
+namespace KSieve {
+
+  class Error;
+
+  class Lexer {
+  public:
+    enum Options {
+      IncludeComments = 0,
+      IgnoreComments = 1,
+      IncludeLineFeeds = 0,
+      IgnoreLineFeeds = 2
+    };
+
+    Lexer( const char * scursor, const char * send, int options=0 );
+    ~Lexer();
+
+    /** Return whether comments are returned by @ref
+	nextToken. Default is to not ignore comments. Ignoring them
+	can speed up script parsing a bit, and can be used when the
+	internal representation of the script won't be serialized into
+	string form again (or if you simply want to delete all
+	comments)
+    **/
+    bool ignoreComments() const;
+
+    /** Return whether line feeds are returned by @ref
+	nextToken. Default is to not ignore line feeds. Ignoring them
+	can speed up script parsing a bit, and can be used when the
+	internal representation of the script won't be serialized into
+	string form again.
+    **/
+    bool ignoreLineFeeds() const;
+
+    const Error & error() const;
+
+    bool atEnd() const;
+    int column() const;
+    int line() const;
+
+    enum Token {
+      None = 0,
+      Number,          // 1, 100, 1M, 10k, 1G, 2g, 3m
+      Identifier,      // atom
+      Tag,             // :tag
+      Special,         // {} [] () ,;
+      QuotedString,    // "foo\"bar" -> foo"bar
+      MultiLineString, // text: \nfoo\n. -> foo
+      HashComment,     // # foo
+      BracketComment,  // /* foo */
+      LineFeeds        // the number of line feeds encountered
+    };
+
+    /** Parse the next token and return it's type. @p result will contain
+	the value of the token. */
+    Token nextToken( QString & result );
+
+    void save();
+    void restore();
+      
+    class Impl;
+  private:
+    Impl * i;
+
+  private:
+    const Lexer & operator=( const Lexer & );
+    Lexer( const Lexer & );
+  };
+
+} // namespace KSieve
+
+#endif // __KSIEVE_LEXER_H__
diff --git a/libksieve/ksieve/parser.h b/libksieve/ksieve/parser.h
new file mode 100644
index 000000000..e70e1db4d
--- /dev/null
+++ b/libksieve/ksieve/parser.h
@@ -0,0 +1,72 @@
+/*  -*- c++ -*-
+    ksieve/parser.h
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_PARSING_H__
+#define __KSIEVE_PARSING_H__
+
+#include <kdepimmacros.h>
+
+class QString;
+
+namespace KSieve {
+
+  class ScriptBuilder;
+  class Error;
+
+  /** @short Parser for the Sieve grammar.
+      @author Marc Mutz <[email protected]>
+  **/
+  class KDE_EXPORT Parser {
+  public:
+
+    Parser( const char * scursor, const char * const send, int options=0 );
+    ~Parser();
+
+    void setScriptBuilder( ScriptBuilder * builder );
+    ScriptBuilder * scriptBuilder() const;
+
+    bool parse();
+
+    const Error & error() const;
+
+    class Impl;
+  private:
+    Impl * i;
+
+  private:
+    const Parser & operator=( const Parser & );
+    Parser( const Parser & );
+  };
+
+} // namespace KSieve
+
+#endif // __KSIEVE_PARSING_H__
diff --git a/libksieve/ksieve/scriptbuilder.h b/libksieve/ksieve/scriptbuilder.h
new file mode 100644
index 000000000..5e0a955bb
--- /dev/null
+++ b/libksieve/ksieve/scriptbuilder.h
@@ -0,0 +1,80 @@
+/*  -*- c++ -*-
+    ksieve/interfaces/scriptbuilder.h
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#ifndef __KSIEVE_INTERFACES_SCRIPTBUILDER_H__
+#define __KSIEVE_INTERFACES_SCRIPTBUILDER_H__
+
+class QString;
+
+namespace KSieve {
+
+  class Error;
+
+  class ScriptBuilder {
+  public:
+    virtual ~ScriptBuilder() {}
+
+    virtual void taggedArgument( const QString & tag ) = 0;
+    virtual void stringArgument( const QString & string, bool multiLine, const QString & embeddedHashComment ) = 0;
+    virtual void numberArgument( unsigned long number, char quantifier ) = 0;
+
+    virtual void stringListArgumentStart() = 0;
+    virtual void stringListEntry( const QString & string, bool multiLine, const QString & embeddedHashComment ) = 0;
+    virtual void stringListArgumentEnd() = 0;
+
+    virtual void commandStart( const QString & identifier ) = 0;
+    virtual void commandEnd() = 0;
+
+    virtual void testStart( const QString & identifier ) = 0;
+    virtual void testEnd() = 0;
+
+    virtual void testListStart() = 0;
+    virtual void testListEnd() = 0;
+
+    virtual void blockStart() = 0;
+    virtual void blockEnd() = 0;
+
+    /** A hash comment always includes an implicit lineFeed() at it's end. */
+    virtual void hashComment( const QString & comment ) = 0;
+    /** Bracket comments inclde explicit lineFeed()s in their content */
+    virtual void bracketComment( const QString & comment ) = 0;
+
+    virtual void lineFeed() = 0;
+
+    virtual void error( const Error & error ) = 0;
+
+    virtual void finished() = 0;
+  };
+
+} // namespace KSieve
+
+#endif // __KSIEVE_INTERFACES_SCRIPTBUILDER_H__
diff --git a/libksieve/parser/Makefile.am b/libksieve/parser/Makefile.am
new file mode 100644
index 000000000..044d045cf
--- /dev/null
+++ b/libksieve/parser/Makefile.am
@@ -0,0 +1,12 @@
+# final breaks static use:
+# If you feel like "fixing" it, better talk to [email protected] first :)
+KDE_OPTIONS = nofinal
+
+INCLUDES = -I$(top_srcdir)/libksieve $(all_includes)
+
+noinst_LTLIBRARIES = libksieve_parser.la
+
+libksieve_parser_la_SOURCES = utf8validator.cpp lexer.cpp parser.cpp
+libksieve_parser_la_LIBADD = ../shared/libksieve_shared.la
+libksieve_parser_la_LDFLAGS = $(all_libraries) -no-undefined
+
diff --git a/libksieve/parser/lexer.cpp b/libksieve/parser/lexer.cpp
new file mode 100644
index 000000000..d8b76da71
--- /dev/null
+++ b/libksieve/parser/lexer.cpp
@@ -0,0 +1,666 @@
+/*  -*- c++ -*-
+    parser/lexer.cpp
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#include <config.h>
+
+#include <ksieve/lexer.h>
+#include <impl/lexer.h>
+
+#include <impl/utf8validator.h>
+#include <ksieve/error.h>
+
+#include <qstring.h>
+#include <qstringlist.h>
+#include <qtextcodec.h>
+
+#include <memory> // std::auto_ptr
+
+#include <assert.h>
+#include <ctype.h> // isdigit
+
+#ifdef STR_DIM
+# undef STR_DIM
+#endif
+#define STR_DIM(x) (sizeof(x) - 1)
+
+namespace KSieve {
+
+  //
+  //
+  // Lexer Bridge implementation
+  //
+  //
+
+  Lexer::Lexer( const char * scursor, const char * send, int options )
+    : i( 0 )
+  {
+    i = new Impl( scursor, send, options );
+  }
+
+  Lexer::~Lexer() {
+    delete i; i = 0;
+  }
+
+  bool Lexer::ignoreComments() const {
+    assert( i );
+    return i->ignoreComments();
+  }
+
+  const Error & Lexer::error() const {
+    assert( i );
+    return i->error();
+  }
+
+  bool Lexer::atEnd() const {
+    assert( i );
+    return i->atEnd();
+  }
+
+  int Lexer::column() const {
+    assert( i );
+    return i->column();
+  }
+
+  int Lexer::line() const {
+    assert( i );
+    return i->line();
+  }
+
+  void Lexer::save() {
+    assert( i );
+    i->save();
+  }
+
+  void Lexer::restore() {
+    assert( i );
+    i->restore();
+  }
+
+  Lexer::Token Lexer::nextToken( QString & result ) {
+    assert( i );
+    return i->nextToken( result );
+  }
+
+} // namespace KSieve
+
+
+// none except a-zA-Z0-9_
+static const unsigned char iTextMap[16] = {
+    0x00, 0x00, 0x00, 0x00, // CTLs:        none
+    0x00, 0x00, 0xFF, 0xC0, // SP ... '?':  0-9
+    0x7F, 0xFF, 0xFF, 0xE1, // '@' ... '_': A-Z_
+    0x7F, 0xFF, 0xFF, 0xE0  // '`' ... DEL: a-z
+};
+
+// SP, HT, CR, LF, {}[]();,#/
+// ### exclude '['? Why would one want to write identifier["foo"]?
+static const unsigned char delimMap[16] = {
+    0x00, 0x64, 0x00, 0x00, // CTLs:        CR, HT, LF
+    0x90, 0xC9, 0x00, 0x10, // SP ... '?':  SP, #(),;
+    0x00, 0x00, 0x00, 0x16, // '@' ... '_': []
+    0x00, 0x00, 0x00, 0x16  // '`' ... DEL: {}
+};
+
+// All except iText, delim, "*:
+static const unsigned char illegalMap[16] = {
+    0xFF, 0x9B, 0xFF, 0xFF,
+    0x4F, 0x16, 0x00, 0x0F,
+    0x80, 0x00, 0x00, 0x0A,
+    0x80, 0x00, 0x00, 0x0A
+};
+
+static inline bool isOfSet( const unsigned char map[16], unsigned char ch ) {
+    assert( ch < 128 );
+    return ( map[ ch/8 ] & 0x80 >> ch%8 );
+}
+
+static inline bool isIText( unsigned char ch ) {
+    return ch <= 'z' && isOfSet( iTextMap, ch );
+}
+
+static inline bool isDelim( unsigned char ch ) {
+    return ch <= '}' && isOfSet( delimMap, ch );
+}
+
+static inline bool isIllegal( unsigned char ch ) {
+    return ch >= '~' || isOfSet( illegalMap, ch );
+}
+
+static inline bool is8Bit( signed char ch ) {
+    return ch < 0;
+}
+
+static QString removeCRLF( const QString & s ) {
+  const bool CRLF = s.endsWith( "\r\n" );
+  const bool LF = !CRLF && s.endsWith( "\n" );
+
+  const int e = CRLF ? 2 : LF ? 1 : 0 ;  // what to chop off at the end
+
+  return s.left( s.length() - e );
+}
+
+static QString removeDotStuff( const QString & s ) {
+    return s.startsWith( ".." ) ? s.mid( 1 ) : s ;
+}
+
+namespace KSieve {
+
+  //
+  //
+  // Lexer Implementation
+  //
+  //
+
+  Lexer::Impl::Impl( const char * scursor, const char * send, int options )
+    : mState( scursor ? scursor : send ),
+      mEnd( send ? send : scursor ),
+      mIgnoreComments( options & IgnoreComments ),
+      mIgnoreLF( options & IgnoreLineFeeds )
+  {
+    if ( !scursor || !send )
+      assert( atEnd() );
+  }
+
+  Lexer::Token Lexer::Impl::nextToken( QString & result ) {
+    assert( !atEnd() );
+    result = QString::null;
+    //clearErrors();
+
+    const int oldLine = line();
+
+    const bool eatingWSSucceeded = ignoreComments() ? eatCWS() : eatWS() ;
+
+    if ( !ignoreLineFeeds() && oldLine != line() ) {
+      result.setNum( line() - oldLine ); // return number of linefeeds encountered
+      return LineFeeds;
+    }
+
+    if ( !eatingWSSucceeded )
+      return None;
+
+    if ( atEnd() )
+      return None;
+
+    switch ( *mState.cursor ) {
+    case '#': // HashComment
+      assert( !ignoreComments() );
+      ++mState.cursor;
+      if ( !atEnd() )
+	parseHashComment( result, true );
+      return HashComment;
+    case '/': // BracketComment
+      assert( !ignoreComments() );
+      ++mState.cursor; // eat slash
+      if ( atEnd() || *mState.cursor != '*' ) {
+	makeError( Error::SlashWithoutAsterisk );
+	return BracketComment;
+      }
+      ++mState.cursor; // eat asterisk
+      if ( atEnd() ) {
+	makeError( Error::UnfinishedBracketComment );
+	return BracketComment;
+      }
+      parseBracketComment( result, true );
+      return BracketComment;
+    case ':': // Tag
+      ++mState.cursor;
+      if ( atEnd() ) {
+	makeError( Error::UnexpectedCharacter, line(), column() - 1 );
+	return Tag;
+      }
+      if ( !isIText( *mState.cursor ) ) {
+	makeIllegalCharError( *mState.cursor );
+	return Tag;
+      }
+      parseTag( result );
+      return Tag;
+    case '"': // QuotedString
+      ++mState.cursor;
+      parseQuotedString( result );
+      return QuotedString;
+    case '{':
+    case '}':
+    case '[':
+    case ']':
+    case '(':
+    case ')':
+    case ';':
+    case ',': // Special
+      result = *mState.cursor++;
+      return Special;
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9': // Number
+      parseNumber( result );
+      return Number;
+    case 't': // maybe MultiLineString, else Identifier
+      if ( _strnicmp( mState.cursor, "text:", STR_DIM("text:") ) == 0 ) {
+	// MultiLineString
+	mState.cursor += STR_DIM("text:");
+	parseMultiLine( result );
+	// ### FIXME: There can be a hash-comment between "text:"
+	// and CRLF! That should be preserved somehow...
+	return MultiLineString;
+      }
+      // else fall through:
+    default: // Identifier (first must not be 0-9, and can't (caught by Number above))
+      if ( !isIText( *mState.cursor ) ) {
+	makeError( Error::IllegalCharacter );
+	return None;
+      }
+      parseIdentifier( result );
+      return Identifier;
+    }
+  }
+
+  bool Lexer::Impl::eatWS() {
+    while ( !atEnd() )
+      switch ( *mState.cursor ) {
+      case '\r':
+      case '\n':
+	if ( !eatCRLF() )
+	  return false;
+	break;
+      case ' ':
+      case '\t':
+	++mState.cursor;
+	break;
+      default:
+	return true;
+      }
+
+    // at end:
+    return true;
+  }
+
+  bool Lexer::Impl::eatCRLF() {
+    assert( !atEnd() );
+    assert( *mState.cursor == '\n' || *mState.cursor == '\r' );
+
+    if ( *mState.cursor == '\r' ) {
+      ++mState.cursor;
+      if ( atEnd() || *mState.cursor != '\n' ) {
+	// CR w/o LF -> error
+	makeError( Error::CRWithoutLF );
+	return false;
+      } else {
+	// good CRLF
+	newLine();
+	return true;
+      }
+    } else /* *mState.cursor == '\n' */ {
+      // good, LF only
+      newLine();
+      return true;
+    }
+  }
+      
+
+  bool Lexer::Impl::parseHashComment( QString & result, bool reallySave ) {
+    // hash-comment := "#" *CHAR-NOT-CRLF CRLF
+
+    // check that the caller plays by the rules:
+    assert( *(mState.cursor-1) == '#' );
+
+    const char * const commentStart = mState.cursor;
+
+    // find next CRLF:
+    while ( !atEnd() ) {
+      if ( *mState.cursor == '\n' || *mState.cursor == '\r' ) break;
+      ++mState.cursor;
+    }
+
+    const char * const commentEnd = mState.cursor - 1;
+
+    if ( commentEnd == commentStart ) return true; // # was last char in script...
+
+    if ( atEnd() || eatCRLF() ) {
+      const int commentLength = commentEnd - commentStart + 1;
+      if ( commentLength > 0 ) {
+	if ( !isValidUtf8( commentStart, commentLength ) ) {
+	  makeError( Error::InvalidUTF8 );
+	  return false;
+	}
+	if ( reallySave )
+	  result += QString::fromUtf8( commentStart, commentLength );
+      }
+      return true;
+    }
+
+    return false;
+  }
+
+  bool Lexer::Impl::parseBracketComment( QString & result, bool reallySave ) {
+    // bracket-comment := "/*" *(CHAR-NOT-STAR / ("*" CHAR-NOT-SLASH )) "*/"
+
+    // check that caller plays by the rules:
+    assert( *(mState.cursor-2) == '/' );
+    assert( *(mState.cursor-1) == '*' );
+
+    const char * const commentStart = mState.cursor;
+    const int commentCol = column() - 2;
+    const int commentLine = line();
+
+    // find next asterisk:
+    do {
+      if ( !skipTo( '*' ) ) {
+	if ( !error() )
+	  makeError( Error::UnfinishedBracketComment, commentLine, commentCol );
+	return false;
+      }
+    } while ( !atEnd() && *++mState.cursor != '/' );
+
+    if ( atEnd() ) {
+      makeError( Error::UnfinishedBracketComment, commentLine, commentCol );
+      return false;
+    }
+
+    assert( *mState.cursor == '/' );
+
+    const int commentLength = mState.cursor - commentStart - 1;
+    if ( commentLength > 0 ) {
+      if ( !isValidUtf8( commentStart, commentLength ) ) {
+	makeError( Error::InvalidUTF8 );
+	return false;
+      }
+      if ( reallySave ) {
+	QString tmp = QString::fromUtf8( commentStart, commentLength );
+	result += tmp.remove( '\r' ); // get rid of CR in CRLF pairs
+      }
+    }
+
+    ++mState.cursor; // eat '/'
+    return true;
+  }
+
+  bool Lexer::Impl::parseComment( QString & result, bool reallySave ) {
+    // comment := hash-comment / bracket-comment
+
+    switch( *mState.cursor ) {
+    case '#':
+      ++mState.cursor;
+      return parseHashComment( result, reallySave );
+    case '/':
+      if ( charsLeft() < 2 || mState.cursor[1] != '*' ) {
+	makeError( Error::IllegalCharacter );
+	return false;
+      } else {
+	mState.cursor += 2; // eat "/*"
+	return parseBracketComment( result, reallySave );
+      }
+    default:
+      return false; // don't set an error here - there was no comment
+    }
+  }
+
+  bool Lexer::Impl::eatCWS() {
+    // white-space := 1*(SP / CRLF / HTAB / comment )
+
+    while ( !atEnd() ) {
+      switch( *mState.cursor ) {
+      case ' ':
+      case '\t': // SP / HTAB
+	++mState.cursor;
+	break;;
+      case '\n':
+      case '\r': // CRLF
+	if ( !eatCRLF() )
+	  return false;
+	break;
+      case '#':
+      case '/': // comments
+	{
+	  QString dummy;
+	  if ( !parseComment( dummy ) )
+	    return false;
+	}
+	break;
+      default:
+	return true;
+      }
+    }
+    return true;
+  }
+
+  bool Lexer::Impl::parseIdentifier( QString & result ) {
+    // identifier := (ALPHA / "_") *(ALPHA DIGIT "_")
+
+    assert( isIText( *mState.cursor ) );
+
+    const char * const identifierStart = mState.cursor;
+
+    // first char:
+    if ( isdigit( *mState.cursor ) ) { // no digits for the first
+      makeError( Error::NoLeadingDigits );
+      return false;
+    }
+
+    // rest of identifier chars ( now digits are allowed ):
+    for ( ++mState.cursor ; !atEnd() && isIText( *mState.cursor ) ; ++mState.cursor );
+
+    const int identifierLength = mState.cursor - identifierStart;
+
+    // Can use the fast fromLatin1 here, since identifiers are always
+    // in the us-ascii subset:
+    result += QString::fromLatin1( identifierStart, identifierLength );
+
+    if ( atEnd() || isDelim( *mState.cursor ) )
+      return true;
+
+    makeIllegalCharError( *mState.cursor );
+    return false;
+  }
+
+  bool Lexer::Impl::parseTag( QString & result ) {
+    // tag := ":" identifier
+
+    // check that the caller plays by the rules:
+    assert( *(mState.cursor-1) == ':' );
+    assert( !atEnd() );
+    assert( isIText( *mState.cursor ) );
+
+    return parseIdentifier( result );
+  }
+
+  bool Lexer::Impl::parseNumber( QString & result ) {
+    // number     := 1*DIGIT [QUANTIFIER]
+    // QUANTIFIER := "K" / "M" / "G"
+
+    assert( isdigit( *mState.cursor ) );
+
+    while ( !atEnd() && isdigit( *mState.cursor ) )
+      result += *mState.cursor++;
+
+    if ( atEnd() || isDelim( *mState.cursor ) )
+      return true;
+
+    switch ( *mState.cursor ) {
+    case 'G':
+    case 'g':
+    case 'M':
+    case 'm':
+    case 'K':
+    case 'k':
+      result += *mState.cursor++;
+      break;
+    default:
+      makeIllegalCharError();
+      return false;
+    }
+
+    // quantifier found. Check for delimiter:
+    if ( atEnd() || isDelim( *mState.cursor ) )
+      return true;
+    makeIllegalCharError();
+    return false;
+  }
+
+  bool Lexer::Impl::parseMultiLine( QString & result ) {
+    // multi-line          := "text:" *(SP / HTAB) (hash-comment / CRLF)
+    //                        *(multi-line-literal / multi-line-dotstuff)
+    //                        "." CRLF
+    // multi-line-literal  := [CHAR-NOT-DOT *CHAR-NOT-CRLF] CRLF
+    // multi-line-dotstuff := "." 1*CHAR-NOT-CRLF CRLF
+    //         ;; A line containing only "." ends the multi-line.
+    //         ;; Remove a leading '.' if followed by another '.'.
+
+    assert( _strnicmp( mState.cursor - 5, "text:", STR_DIM("text:") ) == 0 );
+
+    const int mlBeginLine = line();
+    const int mlBeginCol = column() - 5;
+
+    while ( !atEnd() ) {
+      switch ( *mState.cursor ) {
+      case ' ':
+      case '\t':
+	++mState.cursor;
+	break;
+      case '#':
+	{
+	  ++mState.cursor;
+	  QString dummy;
+	  if ( !parseHashComment( dummy ) )
+	    return false;
+	  goto MultiLineStart; // break from switch _and_ while
+	}
+      case '\n':
+      case '\r':
+	if ( !eatCRLF() ) return false;
+	goto MultiLineStart; // break from switch _and_ while
+      default:
+	makeError( Error::NonCWSAfterTextColon );
+	return false;
+      }
+    }
+
+  MultiLineStart:
+    if ( atEnd() ) {
+      makeError( Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol );
+      return false;
+    }
+
+    // Now, collect the single lines until one with only a single dot is found:
+    QStringList lines;
+    while ( !atEnd() ) {
+      const char * const oldBeginOfLine = beginOfLine();
+      if ( !skipToCRLF() )
+	return false;
+      const int lineLength = mState.cursor - oldBeginOfLine;
+      if ( lineLength > 0 ) {
+	if ( !isValidUtf8( oldBeginOfLine, lineLength ) ) {
+	  makeError( Error::InvalidUTF8 );
+	  return false;
+	}
+	const QString line = removeCRLF( QString::fromUtf8( oldBeginOfLine, lineLength ) );
+	lines.push_back( removeDotStuff( line ) );
+	if ( line == "." )
+	  break;
+      } else {
+	lines.push_back( QString::null );
+      }
+    }
+
+    if ( lines.back() != "." ) {
+      makeError( Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol );
+      return false;
+    }
+
+    assert( !lines.empty() );
+    lines.erase( --lines.end() ); // don't include the lone dot.
+    result = lines.join("\n");
+    return true;
+  }
+
+  bool Lexer::Impl::parseQuotedString( QString & result ) {
+    // quoted-string := DQUOTE *CHAR DQUOTE
+
+    // check that caller plays by the rules:
+    assert( *(mState.cursor-1) == '"' );
+
+    const int qsBeginCol = column() - 1;
+    const int qsBeginLine = line();
+
+    const QTextCodec * const codec = QTextCodec::codecForMib( 106 ); // UTF-8
+    assert( codec );
+    const std::auto_ptr<QTextDecoder> dec( codec->makeDecoder() );
+    assert( dec.get() );
+
+    while ( !atEnd() )
+      switch ( *mState.cursor ) {
+      case '"':
+	++mState.cursor;
+	return true;
+      case '\r':
+      case '\n':
+	if ( !eatCRLF() )
+	  return false;
+	result += '\n';
+	break;
+      case '\\':
+	++mState.cursor;
+	if ( atEnd() )
+	  break;
+	// else fall through:
+      default:
+	if ( !is8Bit( *mState.cursor ) )
+	  result += *mState.cursor++;
+	else { // probably UTF-8
+	  const char * const eightBitBegin = mState.cursor;
+	  skipTo8BitEnd();
+	  const int eightBitLen = mState.cursor - eightBitBegin;
+	  assert( eightBitLen > 0 );
+	  if ( isValidUtf8( eightBitBegin, eightBitLen ) )
+	    result += dec->toUnicode( eightBitBegin, eightBitLen );
+	  else {
+	    assert( column() >= eightBitLen );
+	    makeError( Error::InvalidUTF8, line(), column() - eightBitLen );
+	    return false;
+	  }
+	}
+      }
+
+    makeError( Error::PrematureEndOfQuotedString, qsBeginLine, qsBeginCol );
+    return false;
+  }
+
+  void Lexer::Impl::makeIllegalCharError( char ch ) {
+    makeError( isIllegal( ch ) ? Error::IllegalCharacter : Error::UnexpectedCharacter );
+  }
+
+} // namespace KSieve
diff --git a/libksieve/parser/parser.cpp b/libksieve/parser/parser.cpp
new file mode 100644
index 000000000..8c2db050e
--- /dev/null
+++ b/libksieve/parser/parser.cpp
@@ -0,0 +1,651 @@
+/*  -*- c++ -*-
+    parser/parser.cpp
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#include <config.h>
+
+#include <ksieve/parser.h>
+#include <impl/parser.h>
+
+#include <ksieve/error.h>
+
+#include <qstring.h>
+
+#include <assert.h>
+#include <limits.h> // ULONG_MAX
+#include <ctype.h> // isdigit
+
+namespace KSieve {
+
+  //
+  //
+  // Parser Bridge implementation
+  //
+  //
+
+  Parser::Parser( const char * scursor, const char * const send, int options )
+    : i( 0 )
+  {
+    i = new Impl( scursor, send, options );
+  }
+
+  Parser::~Parser() {
+    delete i; i = 0;
+  }
+
+  void Parser::setScriptBuilder( ScriptBuilder * builder ) {
+    assert( i );
+    i->mBuilder = builder;
+  }
+
+  ScriptBuilder * Parser::scriptBuilder() const {
+    assert( i );
+    return i->mBuilder;
+  }
+
+  const Error & Parser::error() const {
+    assert( i );
+    return i->error();
+  }
+
+  bool Parser::parse() {
+    assert( i );
+    return i->parse();
+  }
+
+}
+
+static inline unsigned long factorForQuantifier( char ch ) {
+  switch ( ch ) {
+  case 'g':
+  case 'G':
+    return 1024*1024*1024;
+  case 'm':
+  case 'M':
+    return 1024*1024;
+  case 'k':
+  case 'K':
+    return 1024;
+  default:
+    assert( 0 ); // lexer should prohibit this
+    return 1; // make compiler happy
+  }
+}
+
+static inline bool willOverflowULong( unsigned long result, unsigned long add ) {
+  static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0) ;
+  return result > maxULongByTen || ULONG_MAX - 10 * result < add ;
+}
+
+namespace KSieve {
+
+  //
+  //
+  // Parser Implementation
+  //
+  //
+
+  Parser::Impl::Impl( const char * scursor, const char * const send, int options )
+    : mToken( Lexer::None ),
+      lexer( scursor, send, options ),
+      mBuilder( 0 )
+  {
+
+  }
+
+  bool Parser::Impl::isStringToken() const {
+    return token() == Lexer::QuotedString ||
+           token() == Lexer::MultiLineString ;
+  }
+
+
+  bool Parser::Impl::isArgumentToken() const {
+    return isStringToken() ||
+           token() == Lexer::Number ||
+           token() == Lexer::Tag ||
+           token() == Lexer::Special && mTokenValue == "[" ;
+  }
+
+  bool Parser::Impl::obtainToken() {
+    while ( !mToken && !lexer.atEnd() && !lexer.error() ) {
+      mToken = lexer.nextToken( mTokenValue );
+      if ( lexer.error() )
+	break;
+      // comments and line feeds are semantically invisible and may
+      // appear anywhere, so we handle them here centrally:
+      switch ( token() ) {
+      case Lexer::HashComment:
+	if ( scriptBuilder() )
+	  scriptBuilder()->hashComment( tokenValue() );
+	consumeToken();
+	break;
+      case Lexer::BracketComment:
+	if ( scriptBuilder() )
+	  scriptBuilder()->bracketComment( tokenValue() );
+	consumeToken();
+	break;
+      case Lexer::LineFeeds:
+	for ( unsigned int i = 0, end = tokenValue().toUInt() ; i < end ; ++i )
+	  if ( scriptBuilder() ) // better check every iteration, b/c
+				 // we call out to ScriptBuilder,
+				 // where nasty things might happen!
+	    scriptBuilder()->lineFeed();
+	consumeToken();
+	break;
+      default: ; // make compiler happy
+      }
+    }
+    if ( lexer.error() && scriptBuilder() )
+      scriptBuilder()->error( lexer.error() );
+    return !lexer.error();
+  }
+
+  bool Parser::Impl::parse() {
+    // this is the entry point: START := command-list
+    if ( !parseCommandList() )
+      return false;
+    if ( !atEnd() ) {
+      makeUnexpectedTokenError( Error::ExpectedCommand );
+      return false;
+    }
+    if ( scriptBuilder() )
+      scriptBuilder()->finished();
+    return true;
+  }
+
+
+  bool Parser::Impl::parseCommandList() {
+    // our ABNF:
+    // command-list := *comand
+
+    while ( !atEnd() ) {
+      if ( !obtainToken() )
+	return false;
+      if ( token() == Lexer::None )
+	continue;
+      if ( token() != Lexer::Identifier )
+	return true;
+      if ( !parseCommand() ) {
+	assert( error() );
+	return false;
+      }
+    }
+    return true;
+  }
+
+
+  bool Parser::Impl::parseCommand() {
+    // command   := identifier arguments ( ";" / block )
+    // arguments := *argument [ test / test-list ]
+    // block     := "{" *command "}"
+    // our ABNF:
+    // block     := "{" [ command-list ] "}"
+
+    if ( atEnd() )
+      return false;
+
+    //
+    // identifier
+    //
+
+    if ( !obtainToken() || token() != Lexer::Identifier )
+      return false;
+
+    if ( scriptBuilder() )
+      scriptBuilder()->commandStart( tokenValue() );
+    consumeToken();
+
+    //
+    // *argument
+    //
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) {
+      makeError( Error::MissingSemicolonOrBlock );
+      return false;
+    }
+
+    if ( isArgumentToken() && !parseArgumentList() ) {
+      assert( error() );
+      return false;
+    }
+
+    //
+    // test / test-list
+    //
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) {
+      makeError( Error::MissingSemicolonOrBlock );
+      return false;
+    }
+
+    if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
+      if ( !parseTestList() ) {
+	assert( error() );
+	return false;
+      }
+    } else if ( token() == Lexer::Identifier ) { // should be test:
+      if ( !parseTest() ) {
+	assert( error() );
+	return false;
+      }
+    }
+
+    //
+    // ";" / block
+    //
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) {
+      makeError( Error::MissingSemicolonOrBlock );
+      return false;
+    }
+
+    if ( token() != Lexer::Special ) {
+      makeUnexpectedTokenError( Error::ExpectedBlockOrSemicolon );
+      return false;
+    }
+
+    if ( tokenValue() == ";" )
+      consumeToken();
+    else if ( tokenValue() == "{" ) { // block
+      if ( !parseBlock() )
+	return false; // it's an error since we saw '{'
+    } else {
+      makeError( Error::MissingSemicolonOrBlock );
+      return false;
+    }
+
+    if ( scriptBuilder() )
+      scriptBuilder()->commandEnd();
+    return true;
+  }
+
+
+  bool Parser::Impl::parseArgumentList() {
+    // our ABNF:
+    // argument-list := *argument
+
+    while ( !atEnd() ) {
+      if ( !obtainToken() )
+	return false;
+      if ( !isArgumentToken() )
+	return true;
+      if ( !parseArgument() )
+	return !error();
+    }
+    return true;
+  }
+
+
+  bool Parser::Impl::parseArgument() {
+    // argument := string-list / number / tag
+
+    if ( !obtainToken() || atEnd() )
+      return false;
+
+    if ( token() == Lexer::Number ) {
+      if ( !parseNumber() ) {
+	assert( error() );
+	return false;
+      }
+      return true;
+    } else if ( token() == Lexer::Tag ) {
+      if ( scriptBuilder() )
+	scriptBuilder()->taggedArgument( tokenValue() );
+      consumeToken();
+      return true;
+    } else if ( isStringToken() ) {
+      if ( scriptBuilder() )
+	scriptBuilder()->stringArgument( tokenValue(), token() == Lexer::MultiLineString, QString::null );
+      consumeToken();
+      return true;
+    } else if ( token() == Lexer::Special && tokenValue() == "[" ) {
+      if ( !parseStringList() ) {
+	assert( error() );
+	return false;
+      }
+      return true;
+    }
+
+    return false;
+  }
+
+
+  bool Parser::Impl::parseTestList() {
+    // test-list := "(" test *("," test) ")"
+    
+    if ( !obtainToken() || atEnd() )
+      return false;
+    
+    if ( token() != Lexer::Special || tokenValue() != "(" )
+      return false;
+    if ( scriptBuilder() )
+      scriptBuilder()->testListStart();
+    consumeToken();
+    
+    // generic while/switch construct for comma-separated lists. See
+    // parseStringList() for another one. Any fix here is like to apply there, too.
+    bool lastWasComma = true;
+    while ( !atEnd() ) {
+      if ( !obtainToken() )
+	return false;
+      
+      switch ( token() ) {
+      case Lexer::None:
+	break;
+      case Lexer::Special:
+	assert( tokenValue().length() == 1 );
+	assert( tokenValue()[0].latin1() );
+	switch ( tokenValue()[0].latin1() ) {
+	case ')':
+	  consumeToken();
+	  if ( lastWasComma ) {
+	    makeError( Error::ConsecutiveCommasInTestList );
+	    return false;
+	  }
+	  if ( scriptBuilder() )
+	    scriptBuilder()->testListEnd();
+	  return true;
+	case ',':
+	  consumeToken();
+	  if( lastWasComma ) {
+	    makeError( Error::ConsecutiveCommasInTestList );
+	    return false;
+	  }
+	  lastWasComma = true;
+	  break;
+	default:
+	  makeError( Error::NonStringInStringList );
+	  return false;
+	}
+	break;
+	
+      case Lexer::Identifier:
+	if ( !lastWasComma ) {
+	  makeError( Error::MissingCommaInTestList );
+	  return false;
+	} else {
+	  lastWasComma = false;
+	  if ( !parseTest() ) {
+	    assert( error() );
+	    return false;
+	  }
+	}
+	break;
+	
+      default:
+	makeUnexpectedTokenError( Error::NonTestInTestList );
+	return false;
+      }
+    }
+    
+    makeError( Error::PrematureEndOfTestList );
+    return false;
+  }
+
+
+  bool Parser::Impl::parseTest() {
+    // test := identifier arguments
+    // arguments := *argument [ test / test-list ]
+
+    //
+    // identifier
+    //
+
+    if ( !obtainToken() || atEnd() )
+      return false;
+
+    if ( token() != Lexer::Identifier )
+      return false;
+
+    if ( scriptBuilder() )
+      scriptBuilder()->testStart( tokenValue() );
+    consumeToken();
+
+    //
+    // *argument
+    //
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) // a test w/o args
+      goto TestEnd;
+
+    if ( isArgumentToken() && !parseArgumentList() ) {
+      assert( error() );
+      return false;
+    }
+
+    //
+    // test / test-list
+    //
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) // a test w/o nested tests
+      goto TestEnd;
+
+    if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
+      if ( !parseTestList() ) {
+	assert( error() );
+	return false;
+      }
+    } else if ( token() == Lexer::Identifier ) { // should be test:
+      if ( !parseTest() ) {
+	assert( error() );
+	return false;
+      }
+    }
+
+  TestEnd:
+    if ( scriptBuilder() )
+      scriptBuilder()->testEnd();
+    return true;
+  }
+
+
+  bool Parser::Impl::parseBlock() {
+    // our ABNF:
+    // block := "{" [ command-list ] "}"
+
+    if ( !obtainToken() || atEnd() )
+      return false;
+
+    if ( token() != Lexer::Special || tokenValue() != "{" )
+      return false;
+    if ( scriptBuilder() )
+      scriptBuilder()->blockStart();
+    consumeToken();
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) {
+      makeError( Error::PrematureEndOfBlock );
+      return false;
+    }
+
+    if ( token() == Lexer::Identifier ) {
+      if ( !parseCommandList() ) {
+	assert( error() );
+	return false;
+      }
+    }
+
+    if ( !obtainToken() )
+      return false;
+
+    if ( atEnd() ) {
+      makeError( Error::PrematureEndOfBlock );
+      return false;
+    }
+
+    if ( token() != Lexer::Special || tokenValue() != "}" ) {
+      makeError( Error::NonCommandInCommandList );
+      return false;
+    }
+    if ( scriptBuilder() )
+      scriptBuilder()->blockEnd();
+    consumeToken();
+    return true;
+  }
+
+  bool Parser::Impl::parseStringList() {
+    // string-list := "[" string *("," string) "]" / string
+    //  ;; if there is only a single string, the brackets are optional
+    //
+    // However, since strings are already handled separately from
+    // string lists in parseArgument(), our ABNF is modified to:
+    // string-list := "[" string *("," string) "]"
+
+    if ( !obtainToken() || atEnd() )
+      return false;
+
+    if ( token() != Lexer::Special || tokenValue() != "[" )
+      return false;
+
+    if ( scriptBuilder() )
+      scriptBuilder()->stringListArgumentStart();
+    consumeToken();
+
+    // generic while/switch construct for comma-separated lists. See
+    // parseTestList() for another one. Any fix here is like to apply there, too.
+    bool lastWasComma = true;
+    while ( !atEnd() ) {
+      if ( !obtainToken() )
+	return false;
+
+      switch ( token() ) {
+      case Lexer::None:
+	break;
+      case Lexer::Special:
+	assert( tokenValue().length() == 1 );
+	switch ( tokenValue()[0].latin1() ) {
+	case ']':
+	  consumeToken();
+	  if ( lastWasComma ) {
+	    makeError( Error::ConsecutiveCommasInStringList );
+	    return false;
+	  }
+	  if ( scriptBuilder() )
+	    scriptBuilder()->stringListArgumentEnd();
+	  return true;
+	case ',':
+	  consumeToken();
+	  if ( lastWasComma ) {
+	    makeError( Error::ConsecutiveCommasInStringList );
+	    return false;
+	  }
+	  lastWasComma = true;
+	  break;
+	default:
+	  makeError( Error::NonStringInStringList );
+	  return false;
+	}
+	break;
+
+      case Lexer::QuotedString:
+      case Lexer::MultiLineString:
+	if ( !lastWasComma ) {
+	  makeError( Error::MissingCommaInStringList );
+	  return false;
+	}
+	lastWasComma = false;
+	if ( scriptBuilder() )
+	  scriptBuilder()->stringListEntry( tokenValue(), token() == Lexer::MultiLineString, QString::null );
+	consumeToken();
+	break;
+
+      default:
+	makeError( Error::NonStringInStringList );
+	return false;
+      }
+    }
+
+    makeError( Error::PrematureEndOfStringList );
+    return false;
+  }
+
+  bool Parser::Impl::parseNumber() {
+    // The lexer returns the number including the quantifier as a
+    // single token value. Here, we split is an check that the number
+    // is not out of range:
+
+    if ( !obtainToken() || atEnd() )
+      return false;
+
+    if ( token() != Lexer::Number )
+      return false;
+
+    // number:
+    unsigned long result = 0;
+    unsigned int i = 0;
+    const QCString s = tokenValue().latin1();
+    for ( const unsigned int len = s.length() ; i < len && isdigit( s[i] ) ; ++i ) {
+      const unsigned long digitValue = s[i] - '0' ;
+      if ( willOverflowULong( result, digitValue ) ) {
+	makeError( Error::NumberOutOfRange );
+	return false;
+      } else {
+	result *= 10 ; result += digitValue ;
+      }
+    }
+
+    // optional quantifier:
+    char quantifier = '\0';
+    if ( i < s.length() ) {
+      assert( i + 1 == s.length() );
+      quantifier = s[i];
+      const unsigned long factor = factorForQuantifier( quantifier );
+      if ( result > double(ULONG_MAX) / double(factor) ) {
+	makeError( Error::NumberOutOfRange );
+	return false;
+      }
+      result *= factor;
+    }
+
+    if ( scriptBuilder() )
+      scriptBuilder()->numberArgument( result, quantifier );
+    consumeToken();
+    return true;
+  }
+
+} // namespace KSieve
diff --git a/libksieve/parser/utf8validator.cpp b/libksieve/parser/utf8validator.cpp
new file mode 100644
index 000000000..248a1f5e9
--- /dev/null
+++ b/libksieve/parser/utf8validator.cpp
@@ -0,0 +1,141 @@
+/*  -*- c++ -*-
+    utf8validator.cpp
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#include <impl/utf8validator.h>
+
+#include <qglobal.h>
+#include <qcstring.h>
+
+static inline bool is8Bit( signed char ch ) {
+    return ch < 0;
+}
+
+static inline bool isUtf8TupelIndicator( unsigned char ch ) {
+    return (ch & 0xE0) == 0xC0; // 110x xxxx
+}
+
+static inline bool isUtf8OverlongTupel( unsigned char ch ) {
+    return (ch & 0xFE) == 0xC0;
+}
+
+static inline bool isUtf8TripleIndicator( unsigned char ch ) {
+    return (ch & 0xF0) == 0xE0; // 1110 xxxx
+}
+
+static inline bool isUtf8OverlongTriple( unsigned char ch1, unsigned char ch2 ) {
+    return (ch1 & 0xFF) == 0xE0  &&  (ch2 & 0xE0) == 0x80 ;
+}
+
+static inline bool isUtf8QuartetIndicator( unsigned char ch ) {
+    return (ch & 0xF8) == 0xF0; // 1111 0xxx
+}
+
+static inline bool isUtf8OverlongQuartet( unsigned char ch1, unsigned char ch2 ) {
+    return (ch1 & 0xFF) == 0xF0  &&  (ch2 & 0xF0) == 0x80 ;
+}
+
+static inline bool isUtf8QuintetIndicator( unsigned char ch ) {
+    return (ch & 0xFC) == 0xF8; // 1111 10xx
+}
+
+static inline bool isUtf8OverlongQuintet( unsigned char ch1, unsigned char ch2 ) {
+    return (ch1 & 0xFF) == 0xF8  &&  (ch2 & 0xF8) == 0x80 ;
+}
+
+static inline bool isUtf8SextetIndicator( unsigned char ch ) {
+    return (ch & 0xFE) == 0xFC; // 1111 110x
+}
+
+static inline bool isUtf8OverlongSextet( unsigned char ch1, unsigned char ch2 ) {
+    return (ch1 & 0xFF) == 0xFC  &&  (ch2 & 0xFC) == 0x80 ;
+}
+
+static inline bool isUtf8Continuation( unsigned char ch ) {
+    return (ch & 0xC0) == 0x80;
+}
+
+bool KSieve::isValidUtf8( const char * s, unsigned int len ) {
+  for ( unsigned int i = 0 ; i < len ; ++i ) {
+    const unsigned char ch = s[i];
+    if ( !is8Bit( ch ) )
+      continue;
+    if ( isUtf8TupelIndicator( ch ) ) {
+      if ( len - i < 1 ) // too short
+	return false;
+      if ( isUtf8OverlongTupel( ch ) ) // not minimally encoded
+	return false;
+      if ( !isUtf8Continuation( s[i+1] ) ) // not followed by 10xx xxxx
+	return false;
+      i += 1;
+    } else if ( isUtf8TripleIndicator( ch ) ) {
+      if ( len - i < 2 ) // too short
+	return false;
+      if ( isUtf8OverlongTriple( ch, s[i+1] ) ) // not minimally encoded
+	return false;
+      if ( !isUtf8Continuation( s[i+2] ) ) // not followed by 10xx xxxx
+	return false;
+      i += 2;
+    } else if ( isUtf8QuartetIndicator( ch ) ) {
+      if ( len - i < 3 ) // too short
+	return false;
+      if ( isUtf8OverlongQuartet( ch, s[i+1] ) ) // not minimally encoded
+	return false;
+      if ( !isUtf8Continuation( s[i+2] ) ||
+	   !isUtf8Continuation( s[i+3] ) ) // not followed by 2x 10xx xxxx
+	return false;
+      i += 3;
+    } else if ( isUtf8QuintetIndicator( ch ) ) {
+      if ( len - i < 4 ) // too short
+	return false;
+      if ( isUtf8OverlongQuintet( ch, s[i+1] ) ) // not minimally encoded
+	return false;
+      if ( !isUtf8Continuation( s[i+2] ) ||
+	   !isUtf8Continuation( s[i+3] ) ||
+	   !isUtf8Continuation( s[i+4] ) ) // not followed by 3x 10xx xxxx
+	return false;
+      i += 4;
+    } else if ( isUtf8SextetIndicator( ch ) ) {
+      if ( len - i < 5 ) // too short
+	return false;
+      if ( isUtf8OverlongSextet( ch, s[i+1] ) ) // not minimally encoded
+	return false;
+      if ( !isUtf8Continuation( s[i+2] ) ||
+	   !isUtf8Continuation( s[i+3] ) ||
+	   !isUtf8Continuation( s[i+4] ) ||
+	   !isUtf8Continuation( s[i+5] ) ) // not followed by 4x 10xx xxxx
+	return false;
+      i += 5;
+    } else
+      return false;
+  }
+  return true;
+}
diff --git a/libksieve/shared/Makefile.am b/libksieve/shared/Makefile.am
new file mode 100644
index 000000000..a2999f687
--- /dev/null
+++ b/libksieve/shared/Makefile.am
@@ -0,0 +1,8 @@
+INCLUDES = -I$(top_srcdir)/libksieve $(all_includes)
+
+noinst_LTLIBRARIES = libksieve_shared.la
+
+libksieve_shared_la_SOURCES = error.cpp
+libksieve_shared_la_LIBADD = $(LIB_KDECORE)
+libksieve_shared_la_LDFLAGS = $(all_libraries) -no-undefined
+
diff --git a/libksieve/shared/error.cpp b/libksieve/shared/error.cpp
new file mode 100644
index 000000000..e53b0c252
--- /dev/null
+++ b/libksieve/shared/error.cpp
@@ -0,0 +1,247 @@
+/*  -*- c++ -*-
+    error.cpp
+
+    This file is part of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2002-2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#include <config.h>
+
+#include <ksieve/error.h>
+
+#include <klocale.h> // i18n
+
+#include <climits> // UINT_MAX
+
+namespace KSieve {
+
+  const char * Error::typeToString( Type t ) {
+    switch ( t ) {
+#define CASE(x) case x: return #x
+      CASE( None );
+      CASE( Custom );
+
+      CASE( CRWithoutLF );
+      CASE( SlashWithoutAsterisk );
+      CASE( IllegalCharacter );
+      CASE( UnexpectedCharacter );
+      CASE( NoLeadingDigits );
+      CASE( NonCWSAfterTextColon );
+
+      CASE( NumberOutOfRange );
+      CASE( InvalidUTF8 );
+
+      CASE( UnfinishedBracketComment );
+      CASE( PrematureEndOfMultiLine );
+      CASE( PrematureEndOfQuotedString );
+      CASE( PrematureEndOfStringList );
+      CASE( PrematureEndOfTestList );
+      CASE( PrematureEndOfBlock );
+      CASE( MissingWhitespace );
+      CASE( MissingSemicolonOrBlock );
+
+      CASE( ExpectedBlockOrSemicolon );
+      CASE( ExpectedCommand );
+      CASE( ConsecutiveCommasInStringList );
+      CASE( ConsecutiveCommasInTestList );
+      CASE( MissingCommaInTestList );
+      CASE( MissingCommaInStringList );
+      CASE( NonStringInStringList );
+      CASE( NonCommandInCommandList );
+      CASE( NonTestInTestList );
+
+      CASE( RequireNotFirst );
+      CASE( RequireMissingForCommand );
+      CASE( RequireMissingForTest );
+      CASE( RequireMissingForComparator );
+      CASE( UnsupportedCommand );
+      CASE( UnsupportedTest );
+      CASE( UnsupportedComparator );
+      CASE( TestNestingTooDeep );
+      CASE( BlockNestingTooDeep );
+      CASE( InvalidArgument );
+      CASE( ConflictingArguments );
+      CASE( ArgumentsRepeated );
+      CASE( CommandOrderingConstraintViolation );
+
+      CASE( IncompatibleActionsRequested );
+      CASE( MailLoopDetected );
+      CASE( TooManyActions );
+#undef CASE
+    default:
+      return "<unknown>";
+    }
+  }
+
+  QString Error::asString() const {
+
+    QString err;
+    switch( type() ) {
+    case None:
+      return QString::null;
+    case Custom:
+      return mStringOne;
+
+      // Parse errors:
+    case CRWithoutLF:
+      err = i18n("Parse error: Carriage Return (CR) without Line Feed (LF)");
+      break;
+    case SlashWithoutAsterisk:
+      err = i18n("Parse error: Unquoted Slash ('/') without Asterisk ('*'). "
+		 "Broken Comment?");
+      break;
+    case IllegalCharacter:
+      err = i18n("Parse error: Illegal Character");
+      break;
+    case UnexpectedCharacter:
+      err = i18n("Parse error: Unexpected Character, probably a missing space?");
+      break;
+    case NoLeadingDigits:
+      err = i18n("Parse error: Tag Name has leading Digits");
+      break;
+    case NonCWSAfterTextColon:
+      err = i18n("Parse error: Only whitespace and #comments may "
+		 "follow \"text:\" on the same line");
+      break;
+    case NumberOutOfRange:
+      err = i18n("Parse error: Number out of Range (must be smaller than %1)").arg(UINT_MAX);
+      break;
+    case InvalidUTF8:
+      err = i18n("Parse error: Invalid UTF-8 sequence");
+      break;
+    case PrematureEndOfMultiLine:
+      err = i18n("Parse error: Premature end of Multiline String (did you forget the '.'?)");
+      break;
+    case PrematureEndOfQuotedString:
+      err = i18n("Parse error: Premature end of Quoted String (missing closing '\"')");
+      break;
+    case PrematureEndOfStringList:
+      err = i18n("Parse error: Premature end of String List (missing closing ']')");
+      break;
+    case PrematureEndOfTestList:
+      err = i18n("Parse error: Premature end of Test List (missing closing ')')");
+      break;
+    case PrematureEndOfBlock:
+      err = i18n("Parse error: Premature end of Block (missing closing '}')");
+      break;
+    case MissingWhitespace:
+      err = i18n("Parse error: Missing Whitespace");
+      break;
+    case MissingSemicolonOrBlock:
+      err = i18n("Parse error: Missing ';' or Block");
+      break;
+    case ExpectedBlockOrSemicolon:
+      err = i18n("Parse error: Expected ';' or '{', got something else");
+      break;
+    case ExpectedCommand:
+      err = i18n("Parse error: Expected Command, got something else");
+      break;
+    case ConsecutiveCommasInStringList:
+      err = i18n("Parse error: Trailing, Leading or Duplicate Commas in String List");
+      break;
+    case ConsecutiveCommasInTestList:
+      err = i18n("Parse error: Trailing, Leading or Duplicate Commas in Test List");
+      break;
+    case MissingCommaInStringList:
+      err = i18n("Parse error: Missing ',' between Strings in String List");
+      break;
+    case MissingCommaInTestList:
+      err = i18n("Parse error: Missing ',' between Tests in Test List");
+      break;
+    case NonCommandInCommandList:
+      err = i18n("Parse error: Expected Command, got something else");
+      break;
+    case NonStringInStringList:
+      err = i18n("Parse error: Only Strings allowed in String Lists");
+      break;
+    case NonTestInTestList:
+      err = i18n("Parse error: Only Tests allowed in Test Lists");
+      break;
+
+      // validity errors:
+    case RequireNotFirst:
+      err = i18n("\"require\" must be first command");
+      break;
+    case RequireMissingForCommand:
+      err = i18n("\"require\" missing for command \"%1\"").arg(mStringOne);
+      break;
+    case RequireMissingForTest:
+      err = i18n("\"require\" missing for test \"%1\"").arg(mStringOne);
+      break;
+    case RequireMissingForComparator:
+      err = i18n("\"require\" missing for comparator \"%1\"").arg(mStringOne);
+      break;
+    case UnsupportedCommand:
+      err = i18n("Command \"%1\" not supported").arg(mStringOne);
+      break;
+    case UnsupportedTest:
+      err = i18n("Test \"%1\" not supported").arg(mStringOne);
+      break;
+    case UnsupportedComparator:
+      err = i18n("Comparator \"%1\" not supported").arg(mStringOne);
+      break;
+    case TestNestingTooDeep:
+      err = i18n("Site Policy Limit Violation: Test nesting too deep (max. %1)").arg( mStringOne.toUInt() );
+      break;
+    case BlockNestingTooDeep:
+      err = i18n("Site Policy Limit Violation: Block nesting too deep (max. %1)").arg( mStringOne.toUInt() );
+      break;
+    case InvalidArgument:
+      err = i18n("Invalid Argument \"%1\" to \"%2\"").arg(mStringOne).arg(mStringTwo);
+      break;
+    case ConflictingArguments:
+      err = i18n("Conflicting Arguments: \"%1\" and \"%2\"").arg(mStringOne).arg(mStringTwo);
+      break;
+    case ArgumentsRepeated:
+      err = i18n("Argument \"%1\" Repeated").arg(mStringOne);
+      break;
+    case CommandOrderingConstraintViolation:
+      err = i18n("Command \"%1\" violates command ordering constraints").arg(mStringOne);
+      break;
+
+      // runtime errors:
+    case IncompatibleActionsRequested:
+      err = i18n("Incompatible Actions \"%1\" and \"%2\" requested").arg(mStringOne).arg(mStringTwo);
+      break;
+    case MailLoopDetected:
+      err = i18n("Mail Loop detected");
+      break;
+    case TooManyActions:
+      err = i18n("Site Policy Limit Violation: Too many Actions requested (max. %1)").arg( mStringOne.toUInt() );
+      break;
+    default:
+      err = i18n("Unknown error");
+      break;
+    }
+
+    return err;
+  }
+  
+
+} // namespace KSieve
+
diff --git a/libksieve/tests/Makefile.am b/libksieve/tests/Makefile.am
new file mode 100644
index 000000000..36b538408
--- /dev/null
+++ b/libksieve/tests/Makefile.am
@@ -0,0 +1,13 @@
+
+INCLUDES = -I$(top_srcdir)/libksieve $(all_includes)
+LDADD = ../libksieve.la
+
+# test programs:
+check_PROGRAMS = \
+	lexertest \
+	parsertest
+
+TESTS = $(check_PROGRAMS)
+
+lexertest_SOURCES = lexertest.cpp
+parsertest_SOURCES = parsertest.cpp
diff --git a/libksieve/tests/lexertest.cpp b/libksieve/tests/lexertest.cpp
new file mode 100644
index 000000000..461499501
--- /dev/null
+++ b/libksieve/tests/lexertest.cpp
@@ -0,0 +1,484 @@
+/*  -*- c++ -*-
+    tests/lexertest.cpp
+
+    This file is part of the testsuite of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+#include <config.h>
+#include <ksieve/lexer.h>
+using KSieve::Lexer;
+
+#include <ksieve/error.h>
+using KSieve::Error;
+
+#include <qcstring.h> // qstrlen
+#include <qstring.h>
+
+#include <iostream>
+using std::cout;
+using std::cerr;
+using std::endl;
+
+static const char * token2string( Lexer::Token t ) {
+  switch ( t ) {
+#define CASE(x) case Lexer::x: return #x
+    CASE( None );
+    CASE( HashComment );
+    CASE( BracketComment );
+    CASE( Identifier );
+    CASE( Tag );
+    CASE( Number );
+    CASE( MultiLineString );
+    CASE( QuotedString );
+    CASE( Special );
+    CASE( LineFeeds );
+  }
+  return "";
+#undef CASE
+}
+
+struct TestCase {
+  const char * name;
+  const char * string;
+  struct {
+    Lexer::Token token;
+    const char * result;
+  } expected[16]; // end with { None, 0 }
+  Error::Type expectedError;
+  int errorLine, errorCol;
+};
+
+static const TestCase testcases[] = {
+  //
+  // Whitespace:
+  //
+
+  { "Null script", 0,
+    { { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Empty script", "",
+    { { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Whitespace-only script", " \t\n\t \n",
+    { { Lexer::LineFeeds, "2" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Lone CR", "\r",
+    { { Lexer::None, 0 } },
+    Error::CRWithoutLF, 0, 1
+  },
+
+  { "CR+Space", "\r ",
+    { { Lexer::None, 0 } },
+    Error::CRWithoutLF, 0, 1
+  },
+
+  { "CRLF alone", "\r\n",
+    { { Lexer::LineFeeds, "1" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  //
+  // hash comments:
+  //
+
+  { "Basic hash comment (no newline)", "#comment",
+    { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  
+  { "Basic hash comment (LF)", "#comment\n",
+    { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  
+  { "Basic hash comment (CRLF)", "#comment\r\n",
+    { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Basic hash comment (CR)", "#comment\r",
+    { { Lexer::HashComment, 0 } },
+    Error::CRWithoutLF, 0, 9
+  },
+
+  { "Non-UTF-8 in hash comment", "#\xA9 copyright",
+    { { Lexer::HashComment, 0 } },
+    Error::InvalidUTF8, 0, 12
+  },
+
+  //
+  // bracket comments:
+  //
+
+  { "Basic bracket comment", "/* comment */",
+    { { Lexer::BracketComment, " comment " }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Basic bracket comment - missing trailing slash", "/* comment *",
+    { { Lexer::BracketComment, 0 } },
+    Error::UnfinishedBracketComment, 0, 0
+  },
+
+  { "Basic bracket comment - missing trailing asterisk + slash", "/* comment ",
+    { { Lexer::BracketComment, 0 } },
+    Error::UnfinishedBracketComment, 0, 0
+  },
+
+  { "Basic bracket comment - missing leading slash", "* comment */",
+    { { Lexer::None, 0 } },
+    Error::IllegalCharacter, 0, 0
+  },
+
+  { "Basic bracket comment - missing leading asterisk + slash", "comment */",
+    { { Lexer::Identifier, "comment" }, { Lexer::None, 0 } },
+    Error::IllegalCharacter, 0, 8
+  },
+
+  { "Basic multiline bracket comment (LF)", "/* comment\ncomment */",
+    { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Basic multiline bracket comment (CRLF)", "/* comment\r\ncomment */",
+    { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  { "Basic multiline bracket comment (CR)", "/* comment\rcomment */",
+    { { Lexer::BracketComment, 0 } },
+    Error::CRWithoutLF, 0, 11
+  },
+
+  { "Non-UTF-8 in bracket comment", "/*\xA9 copyright*/",
+    { { Lexer::BracketComment, 0 } },
+    Error::InvalidUTF8, 0, 14
+  },
+
+  //
+  // numbers:
+  //
+  { "Basic number 1", "1",
+    { { Lexer::Number, "1" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic number 01", "01",
+    { { Lexer::Number, "01" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Qualified number 1k", "1k",
+    { { Lexer::Number, "1k" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Qualified number 1M", "1M",
+    { { Lexer::Number, "1M" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Qualified number 1G", "1G",
+    { { Lexer::Number, "1G" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  //
+  // identifiers:
+  //
+  { "Basic identifier \"id\"", "id",
+    { { Lexer::Identifier, "id" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic identifier \"_id\"", "_id",
+    { { Lexer::Identifier, "_id" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  //
+  // tags:
+  //
+  { "Basic tag \":tag\"", ":tag",
+    { { Lexer::Tag, "tag" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic tag \":_tag\"", ":_tag",
+    { { Lexer::Tag, "_tag" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  //
+  // specials:
+  //
+  { "Basic special \"{}[]();,\"", "{}[]();,",
+    { { Lexer::Special, "{" }, { Lexer::Special, "}" },
+      { Lexer::Special, "[" }, { Lexer::Special, "]" },
+      { Lexer::Special, "(" }, { Lexer::Special, ")" },
+      { Lexer::Special, ";" }, { Lexer::Special, "," }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  //
+  // quoted-string:
+  //
+  { "Basic quoted string \"foo\"", "\"foo\"",
+    { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic quoted string, UTF-8", "\"foo\xC3\xB1" "foo\"", // foo�foo
+    { { Lexer::QuotedString, "foo\xC3\xB1" "foo" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Quoted string, escaped '\"'", "\"foo\\\"bar\"",
+    { { Lexer::QuotedString, "foo\"bar" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Quoted string, escaped '\\'", "\"foo\\\\bar\"",
+    { { Lexer::QuotedString, "foo\\bar" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Quoted string, excessive escapes", "\"\\fo\\o\"",
+    { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Quoted string across lines (LF)", "\"foo\nbar\"",
+    { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Quoted string across lines (CRLF)", "\"foo\r\nbar\"",
+    { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  //
+  // multiline strings:
+  //
+  { "Basic multiline string I (LF)", "text:\nfoo\n.",
+    { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic multiline string I (CRLF)", "text:\r\nfoo\r\n.",
+    { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic multiline string II (LF)", "text:\nfoo\n.\n",
+    { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Basic multiline string II (CRLF)", "text:\r\nfoo\r\n.\r\n",
+    { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Dotstuffed multiline string (LF)", "text:\n..foo\n.",
+    { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Dotstuffed multiline string (CRLF)", "text:\r\n..foo\r\n.",
+    { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Incompletely dotstuffed multiline string (LF)", "text:\n.foo\n.",
+    { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Incompletely dotstuffed multiline string (CRLF)", "text:\r\n.foo\r\n.",
+    { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+  { "Mutiline with a line with only one '.'","text:\r\nfoo\r\n..\r\nbar\r\n.",
+    { { Lexer::MultiLineString, "foo\n.\nbar" }, { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+
+  //
+  // Errors in single tokens:
+  //
+
+  //
+  // numbers:
+  //
+  { "Number, unknown qualifier", "100f",
+    { { Lexer::Number, "100" } },
+    Error::UnexpectedCharacter, 0, 3
+  },
+  { "Negative number", "-100",
+    { { Lexer::None, 0 } },
+    Error::IllegalCharacter, 0, 0
+  },
+  //
+  // identifiers:
+  //
+  { "Identifier, leading digits", "0id",
+    { { Lexer::Number, "0" } },
+    Error::UnexpectedCharacter, 0, 1
+  },
+  { "Identifier, embedded umlaut", "id�id",
+    { { Lexer::Identifier, "id" } },
+    Error::IllegalCharacter, 0, 2
+  },
+  //
+  // tags:
+  //
+  { "Lone ':' (at end)", ":",
+    { { Lexer::Tag, 0 } },
+    Error::UnexpectedCharacter, 0, 0
+  },
+  { "Lone ':' (in stream)", ": ",
+    { { Lexer::Tag, 0 } },
+    Error::UnexpectedCharacter, 0, 1
+  },
+  { "Tag, leading digits", ":0tag",
+    { { Lexer::Tag, 0 } },
+    Error::NoLeadingDigits, 0, 1
+  },
+  { "Tag, embedded umlaut", ":tag�tag",
+    { { Lexer::Tag, "tag" } },
+    Error::IllegalCharacter, 0, 4
+  },
+  //
+  // specials: (none)
+  // quoted string:
+  //
+  { "Premature end of quoted string", "\"foo",
+    { { Lexer::QuotedString, "foo" } },
+    Error::PrematureEndOfQuotedString, 0, 0
+  },
+  { "Invalid UTF-8 in quoted string", "\"foo\xC0\xA0" "foo\"",
+    { { Lexer::QuotedString, "foo" } },
+    Error::InvalidUTF8, 0, 4
+  },
+
+  //
+  // Whitespace / token separation: valid
+  //
+
+  { "Two identifiers with linebreaks", "foo\nbar\n",
+    { { Lexer::Identifier, "foo" },
+      { Lexer::LineFeeds, "1" },
+      { Lexer::Identifier, "bar" },
+      { Lexer::LineFeeds, "1" },
+      { Lexer::None, 0 } },
+    Error::None, 0, 0
+  },
+
+  //
+  // Whitespace / token separation: invalid
+  //
+
+};
+
+static const int numTestCases = sizeof testcases / sizeof *testcases ;
+
+int main( int argc, char * argv[]  ) {
+
+  if ( argc == 2 ) { // manual test
+
+    const char * scursor = argv[1];
+    const char * const send = argv[1] + qstrlen( argv[1] );
+
+    Lexer lexer( scursor, send );
+
+    cout << "Begin" << endl;
+    while ( !lexer.atEnd() ) {
+      QString result;
+      Lexer::Token token = lexer.nextToken( result );
+      if ( lexer.error() ) {
+	cout << "Error " << token2string( token ) << ": \""
+	     << lexer.error().asString().latin1() << "\" at ("
+	     << lexer.error().line() << "," << lexer.error().column()
+	     << ")" << endl;
+	break;
+      } else
+	cout << "Got " << token2string( token ) << ": \""
+	     << result.utf8().data() << "\" at ("
+	     << lexer.line() << "," << lexer.column() << ")" << endl;
+    }
+    cout << "End" << endl;
+
+  } else if ( argc == 1 ) { // automated test
+    bool success = true;
+    for ( int i = 0 ; i < numTestCases ; ++i ) {
+      bool ok = true;
+      const TestCase & t = testcases[i];
+      const char * const send = t.string + qstrlen( t.string );
+      Lexer lexer( t.string, send, Lexer::IncludeComments );
+      cerr << t.name << ":";
+      for ( int j = 0 ; !lexer.atEnd() ; ++j ) {
+	QString result;
+	Lexer::Token token = lexer.nextToken( result );
+	Error error = lexer.error();
+	if ( t.expected[j].token != token ) {
+	  ok = false;
+	  cerr << " expected token " << token2string( t.expected[j].token )
+	       << ", got " << token2string( token );
+	}
+	if ( QString::fromUtf8( t.expected[j].result ) != result ) {
+	  ok = false;
+	  if ( t.expected[j].result )
+	    cerr << " expected string \"" << t.expected[j].result << "\"";
+	  else
+	    cerr << " expected null string";
+	  if ( !result.utf8().isNull() )
+	    cerr << ", got \"" << result.utf8().data() << "\"";
+	  else
+	    cerr << ", got null string";
+	}
+	if ( error && error.type() != t.expectedError ) {
+	  ok = false;
+	  cerr << " expected error #" << (int)t.expectedError
+	       << ", got #" << (int)error.type();
+	}
+	if ( error && ( error.line() != t.errorLine || error.column() != t.errorCol ) ) {
+	  ok = false;
+	  cerr << " expected position (" << t.errorLine << "," << t.errorCol
+	       << "), got (" << error.line() << "," << error.column() << ")";
+	}
+	if ( error )
+	  goto ErrorOut;
+	if ( t.expected[j].token == Lexer::None &&
+	     t.expected[j].result == 0 )
+	  break;
+      }
+      if ( !lexer.atEnd() ) {
+	ok = false;
+	cerr << " premature end of expected token list";
+      }
+    ErrorOut:
+      if ( ok )
+	cerr << " ok";
+      cerr << endl;
+      if ( !ok )
+	success = false;
+    }
+    if ( !success )
+      return 1;
+  } else { // usage error
+    cerr << "usage: lexertest [ <string> ]" << endl;
+    exit( 1 );
+  }
+
+  return 0;
+}
diff --git a/libksieve/tests/parsertest.cpp b/libksieve/tests/parsertest.cpp
new file mode 100644
index 000000000..e2ea0fd39
--- /dev/null
+++ b/libksieve/tests/parsertest.cpp
@@ -0,0 +1,667 @@
+/*  -*- c++ -*-
+    tests/parsertest.cpp
+
+    This file is part of the testsuite of KSieve,
+    the KDE internet mail/usenet news message filtering library.
+    Copyright (c) 2003 Marc Mutz <[email protected]>
+
+    KSieve is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KSieve is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this program with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+#include <config.h>
+#include <ksieve/parser.h>
+using KSieve::Parser;
+
+#include <ksieve/error.h>
+#include <ksieve/scriptbuilder.h>
+
+#include <qcstring.h> // qstrlen
+#include <qstring.h>
+
+#include <iostream>
+using std::cout;
+using std::cerr;
+using std::endl;
+
+#include <cassert>
+
+enum BuilderMethod {
+  TaggedArgument,
+  StringArgument,
+  NumberArgument,
+  CommandStart,
+  CommandEnd,
+  TestStart,
+  TestEnd,
+  TestListStart,
+  TestListEnd,
+  BlockStart,
+  BlockEnd,
+  StringListArgumentStart,
+  StringListEntry,
+  StringListArgumentEnd,
+  HashComment,
+  BracketComment,
+  Error,
+  Finished
+};
+
+static const unsigned int MAX_RESPONSES = 100;
+
+struct TestCase {
+  const char * name;
+  const char * script;
+  struct Response {
+    BuilderMethod method;
+    const char * string;
+    bool boolean;
+  } responses[MAX_RESPONSES];
+} testCases[] = {
+
+  //
+  // single commands:
+  //
+
+  { "Null script",
+    0,
+    { { Finished, 0, false } }
+  },
+
+  { "Empty script",
+    "",
+    { { Finished, 0, false } }
+  },
+
+  { "WS-only script",
+    " \t\n\r\n",
+    { { Finished, 0, false } }
+  },
+
+  { "Bare hash comment",
+    "#comment",
+    { { HashComment, "comment", false },
+      { Finished, 0, false } }
+  },
+
+  { "Bare bracket comment",
+    "/*comment*/",
+    { { BracketComment, "comment", false },
+      { Finished, 0, false } }
+  },
+
+  { "Bare command",
+    "command;",
+    { { CommandStart, "command", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "Bare command - missing semicolon",
+    "command",
+    { { CommandStart, "command", false },
+      { Error, "MissingSemicolonOrBlock", false } }
+  },
+
+  { "surrounded by bracket comments",
+    "/*comment*/command/*comment*/;/*comment*/",
+    { { BracketComment, "comment", false },
+      { CommandStart, "command", false },
+      { BracketComment, "comment", false },
+      { CommandEnd, 0, false },
+      { BracketComment, "comment", false },
+      { Finished, 0, false } }
+  },
+
+  { "surrounded by hash comments",
+    "#comment\ncommand#comment\n;#comment",
+    { { HashComment, "comment", false },
+      { CommandStart, "command", false },
+      { HashComment, "comment", false },
+      { CommandEnd, 0, false },
+      { HashComment, "comment", false },
+      { Finished, 0, false } }
+  },
+
+  { "single tagged argument",
+    "command :tag;",
+    { { CommandStart, "command", false },
+      { TaggedArgument, "tag", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single tagged argument - missing semicolon",
+    "command :tag",
+    { { CommandStart, "command", false },
+      { TaggedArgument, "tag", false },
+      { Error, "MissingSemicolonOrBlock", false } }
+  },
+
+  { "single string argument - quoted string",
+    "command \"string\";",
+    { { CommandStart, "command", false },
+      { StringArgument, "string", false /*quoted*/ },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single string argument - multi-line string",
+    "command text:\nstring\n.\n;",
+    { { CommandStart, "command", false },
+      { StringArgument, "string", true /*multiline*/ },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single number argument - 100",
+    "command 100;",
+    { { CommandStart, "command", false },
+      { NumberArgument, "100 ", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single number argument - 100k",
+    "command 100k;",
+    { { CommandStart, "command", false },
+      { NumberArgument, "102400k", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single number argument - 100M",
+    "command 100M;",
+    { { CommandStart, "command", false },
+      { NumberArgument, "104857600M", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single number argument - 2G",
+    "command 2G;",
+    { { CommandStart, "command", false },
+      { NumberArgument, "2147483648G", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+#if SIZEOF_UNSIGNED_LONG == 8
+#  define ULONG_MAX_STRING "18446744073709551615"
+#  define ULONG_MAXP1_STRING "18446744073709551616"
+#elif SIZEOF_UNSIGNED_LONG == 4
+#  define ULONG_MAX_STRING "4294967295"
+#  define ULONG_MAXP1_STRING "4G"
+#else
+#  error sizeof( unsigned long ) != 4 && sizeof( unsigned long ) != 8 ???
+#endif
+
+  { "single number argument - ULONG_MAX + 1",
+    "command " ULONG_MAXP1_STRING ";",
+    { { CommandStart, "command", false },
+      { Error, "NumberOutOfRange", false } }
+  },
+
+  { "single number argument - ULONG_MAX",
+    "command " ULONG_MAX_STRING ";",
+    { { CommandStart, "command", false },
+      { NumberArgument, ULONG_MAX_STRING " ", false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single one-element string list argument - quoted string",
+    "command [\"string\"];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", false /*quoted*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single one-element string list argument - multi-line string",
+    "command [text:\nstring\n.\n];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", true /*multiline*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single two-element string list argument - quoted strings",
+    "command [\"string\",\"string\"];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", false /*quoted*/ },
+      { StringListEntry, "string", false /*quoted*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single two-element string list argument - multi-line strings",
+    "command [text:\nstring\n.\n,text:\nstring\n.\n];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", true /*multiline*/ },
+      { StringListEntry, "string", true /*multiline*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single two-element string list argument - quoted + multi-line strings",
+    "command [\"string\",text:\nstring\n.\n];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", false /*quoted*/ },
+      { StringListEntry, "string", true /*multiline*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single two-element string list argument - multi-line + quoted strings",
+    "command [text:\nstring\n.\n,\"string\"];",
+    { { CommandStart, "command", false },
+      { StringListArgumentStart, 0, false },
+      { StringListEntry, "string", true /*multiline*/ },
+      { StringListEntry, "string", false /*quoted*/ },
+      { StringListArgumentEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "single bare test argument",
+    "command test;",
+    { { CommandStart, "command", false },
+      { TestStart, "test", false },
+      { TestEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "one-element test list argument",
+    "command(test);",
+    { { CommandStart, "command", false },
+      { TestListStart, 0, false },
+      { TestStart, "test", false },
+      { TestEnd, 0, false },
+      { TestListEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "two-element test list argument",
+    "command(test,test);",
+    { { CommandStart, "command", false },
+      { TestListStart, 0, false },
+      { TestStart, "test", false },
+      { TestEnd, 0, false },
+      { TestStart, "test", false },
+      { TestEnd, 0, false },
+      { TestListEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "zero-element block",
+    "command{}",
+    { { CommandStart, "command", false },
+      { BlockStart, 0, false },
+      { BlockEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "one-element block",
+    "command{command;}",
+    { { CommandStart, "command", false },
+      { BlockStart, 0, false },
+      { CommandStart, "command", false },
+      { CommandEnd, 0, false },
+      { BlockEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "two-element block",
+    "command{command;command;}",
+    { { CommandStart, "command", false },
+      { BlockStart, 0, false },
+      { CommandStart, "command", false },
+      { CommandEnd, 0, false },
+      { CommandStart, "command", false },
+      { CommandEnd, 0, false },
+      { BlockEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+  { "command with a test with a test with a test",
+    "command test test test;",
+    { { CommandStart, "command", false },
+      { TestStart, "test", false },
+      { TestStart, "test", false },
+      { TestStart, "test", false },
+      { TestEnd, 0, false },
+      { TestEnd, 0, false },
+      { TestEnd, 0, false },
+      { CommandEnd, 0, false },
+      { Finished, 0, false } }
+  },
+
+};
+
+static const int numTestCases = sizeof testCases / sizeof *testCases ;
+
+// Prints out the parse tree in XML-like format. For visual inspection
+// (manual tests).
+class PrintingScriptBuilder : public KSieve::ScriptBuilder {
+public:
+  PrintingScriptBuilder()
+    : KSieve::ScriptBuilder(), indent( 0 )
+  {
+    write( "<script type=\"application/sieve\">" );
+    ++indent;
+  }
+  virtual ~PrintingScriptBuilder() {}
+
+  void taggedArgument( const QString & tag ) {
+    write( "tag", tag );
+  }
+  void stringArgument( const QString & string, bool multiLine, const QString & /*fixme*/ ) {
+    write( multiLine ? "string type=\"multiline\"" : "string type=\"quoted\"", string );
+  }
+  void numberArgument( unsigned long number, char quantifier ) {
+    const QString txt = "number" + ( quantifier ? QString(" quantifier=\"%1\"").arg( quantifier ) : QString::null ) ;
+    write( txt.latin1(), QString::number( number ) );
+  }
+  void commandStart( const QString & identifier ) {
+    write( "<command>" );
+    ++indent;
+    write( "identifier", identifier );
+  }
+  void commandEnd() {
+    --indent;
+    write( "</command>" );
+  }
+  void testStart( const QString & identifier ) {
+    write( "<test>" );
+    ++indent;
+    write( "identifier", identifier );
+  }
+  void testEnd() {
+    --indent;
+    write( "</test>" );
+  }
+  void testListStart() {
+    write( "<testlist>" );
+    ++indent;
+  }
+  void testListEnd() {
+    --indent;
+    write( "</testlist>" );
+  }
+  void blockStart() {
+    write( "<block>" );
+    ++indent;
+  }
+  void blockEnd() {
+    --indent;
+    write( "</block>" );
+  }
+  void stringListArgumentStart() {
+    write( "<stringlist>" );
+    ++indent;
+  }
+  void stringListArgumentEnd() {
+    --indent;
+    write( "</stringlist>" );
+  }
+  void stringListEntry( const QString & string, bool multiline, const QString & hashComment ) {
+    stringArgument( string, multiline, hashComment );
+  }
+  void hashComment( const QString & comment ) {
+    write( "comment type=\"hash\"", comment );
+  }
+  void bracketComment( const QString & comment ) {
+    write( "comment type=\"bracket\"", comment );
+  }
+
+  void lineFeed() {
+    write( "<crlf/>" );
+  }
+
+  void error( const KSieve::Error & error ) {
+    indent = 0;
+    write( ("Error: " + error.asString()).latin1() );
+  }
+  void finished() {
+    --indent;
+    write( "</script>" );
+  }
+private:
+  int indent;
+  void write( const char * msg ) {
+    for ( int i = 2*indent ; i > 0 ; --i )
+      cout << " ";
+    cout << msg << endl;
+  }
+  void write( const QCString & key, const QString & value ) {
+    if ( value.isEmpty() ) {
+      write( "<" + key + "/>" );
+      return;
+    }
+    write( "<" + key + ">" );
+    ++indent;
+    write( value.utf8().data() );
+    --indent;
+    write( "</" + key + ">" );
+  }
+};
+
+
+// verifes that methods get called with expected arguments (and in
+// expected sequence) as specified by the TestCase. For automated
+// tests.
+class VerifyingScriptBuilder : public KSieve::ScriptBuilder {
+public:
+  VerifyingScriptBuilder( const TestCase & testCase )
+    : KSieve::ScriptBuilder(),
+      mNextResponse( 0 ), mTestCase( testCase ), mOk( true )
+  {
+  }
+  virtual ~VerifyingScriptBuilder() {}
+
+  bool ok() const { return mOk; }
+
+  void taggedArgument( const QString & tag ) {
+    checkIs( TaggedArgument );
+    checkEquals( tag );
+    ++mNextResponse;
+  }
+  void stringArgument( const QString & string, bool multiline, const QString & /*fixme*/ ) {
+    checkIs( StringArgument );
+    checkEquals( string );
+    checkEquals( multiline );
+    ++mNextResponse;
+  }
+  void numberArgument( unsigned long number, char quantifier ) {
+    checkIs( NumberArgument );
+    checkEquals( QString::number( number ) + ( quantifier ? quantifier : ' ' ) );
+    ++mNextResponse;
+  }
+  void commandStart( const QString & identifier ) {
+    checkIs( CommandStart );
+    checkEquals( identifier );
+    ++mNextResponse;
+  }
+  void commandEnd() {
+    checkIs( CommandEnd );
+    ++mNextResponse;
+  }
+  void testStart( const QString & identifier ) {
+    checkIs( TestStart );
+    checkEquals( identifier );
+    ++mNextResponse;
+  }
+  void testEnd() {
+    checkIs( TestEnd );
+    ++mNextResponse;
+  }
+  void testListStart() {
+    checkIs( TestListStart );
+    ++mNextResponse;
+  }
+  void testListEnd() {
+    checkIs( TestListEnd );
+    ++mNextResponse;
+  }
+  void blockStart() {
+    checkIs( BlockStart );
+    ++mNextResponse;
+  }
+  void blockEnd() {
+    checkIs( BlockEnd );
+    ++mNextResponse;
+  }
+  void stringListArgumentStart() {
+    checkIs( StringListArgumentStart );
+    ++mNextResponse;
+  }
+  void stringListEntry( const QString & string, bool multiLine, const QString & /*fixme*/ ) {
+    checkIs( StringListEntry );
+    checkEquals( string );
+    checkEquals( multiLine );
+    ++mNextResponse;
+  }
+  void stringListArgumentEnd() {
+    checkIs( StringListArgumentEnd );
+    ++mNextResponse;
+  }
+  void hashComment( const QString & comment ) {
+    checkIs( HashComment );
+    checkEquals( comment );
+    ++mNextResponse;
+  }
+  void bracketComment( const QString & comment ) {
+    checkIs( BracketComment );
+    checkEquals( comment );
+    ++mNextResponse;
+  }
+  void lineFeed() {
+    // FIXME
+  }
+  void error( const KSieve::Error & error ) {
+    checkIs( Error );
+    checkEquals( QString( KSieve::Error::typeToString( error.type() ) ) );
+    ++mNextResponse;
+  }
+  void finished() {
+    checkIs( Finished );
+    //++mNextResponse (no!)
+  }
+
+private:
+  const TestCase::Response & currentResponse() const {
+    assert( mNextResponse <= MAX_RESPONSES );
+    return mTestCase.responses[mNextResponse];
+  }
+
+  void checkIs( BuilderMethod m ) {
+    if ( currentResponse().method != m ) {
+      cerr << " expected method " << (int)currentResponse().method
+	   << ", got " << (int)m;
+      mOk = false;
+    }
+  }
+
+  void checkEquals( const QString & s ) {
+    if ( s != QString::fromUtf8( currentResponse().string ) ) {
+      cerr << " expected string arg \""
+	   << ( currentResponse().string ? currentResponse().string : "<null>" )
+	   << "\", got \"" << ( s.isNull() ? "<null>" : s.utf8().data() ) << "\"";
+      mOk = false;
+    }
+  }
+  void checkEquals( bool b ) {
+    if ( b != currentResponse().boolean ) {
+      cerr << " expected boolean arg <" << currentResponse().boolean
+	   << ">, got <" << b << ">";
+      mOk = false;
+    }
+  }
+
+  unsigned int mNextResponse;
+  const TestCase & mTestCase;
+  bool mOk;
+};
+
+
+int main( int argc, char * argv[]  ) {
+
+  if ( argc == 2 ) { // manual test
+
+    const char * scursor = argv[1];
+    const char * const send = argv[1] + qstrlen( argv[1] );
+
+    Parser parser( scursor, send );
+    PrintingScriptBuilder psb;
+    parser.setScriptBuilder( &psb );
+    if ( parser.parse() )
+      cout << "ok" << endl;
+    else
+      cout << "bad" << endl;
+
+
+  } else if ( argc == 1 ) { // automated test
+    bool success = true;
+    for ( int i = 0 ; i < numTestCases ; ++i ) {
+      const TestCase & t = testCases[i];
+      cerr << t.name << ":";
+      VerifyingScriptBuilder v( t );
+      Parser p( t.script, t.script + qstrlen( t.script ) );
+      p.setScriptBuilder( &v );
+      const bool ok = p.parse();
+      if ( v.ok() )
+	if ( ok )
+	  cerr << " ok";
+	else 
+	  cerr << " xfail";
+      else
+	success = false;
+      cerr << endl;
+    }
+    if ( !success )
+      exit( 1 );
+
+  } else { // usage error
+    cerr << "usage: parsertest [ <string> ]" << endl;
+    exit( 1 );
+  }
+
+  return 0;
+}