diff options
Diffstat (limited to 'kturtle/src/lexer.cpp')
-rw-r--r-- | kturtle/src/lexer.cpp | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/kturtle/src/lexer.cpp b/kturtle/src/lexer.cpp new file mode 100644 index 00000000..1a480641 --- /dev/null +++ b/kturtle/src/lexer.cpp @@ -0,0 +1,411 @@ +/* + Copyright (C) 2003 by Walter Schreppers + Copyright (C) 2004 by Cies Breijs + + This program is free software; you can redistribute it and/or + modify it under the terms of version 2 of the GNU General Public + License as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <qdom.h> +#include <qfile.h> + +#include <kdebug.h> +#include <klocale.h> + +#include "settings.h" + +#include "lexer.h" + + +Lexer::Lexer(QTextIStream& iStream) +{ + inputStream = &iStream; + row = 1; + col = 1; + prevCol = 1; + translate = new Translate(); +} + + +Token Lexer::lex() +{ + skipSpaces(); // skips the white space that it quite likely (indentation) infront of the Token + + Token currentToken; + currentToken.type = tokNotSet; // not really needed + currentToken.look = ""; + currentToken.value = 0; + currentToken.start.row = row; + currentToken.start.col = col; + + QChar currentChar = getChar(); + + if ( inputStream->atEnd() ) + { + kdDebug(0)<<"Lexer::lex(), got EOF."<<endl; + currentToken.type = tokEOF; + currentToken.look = "EOF"; + ungetChar(currentChar); // unget the currentChar and fix the row/col values + return currentToken; + } + + if (currentChar == '#') + { + while ( !inputStream->atEnd() && !(currentChar == '\x0a' || currentChar == '\n') ) + currentChar = getChar(); + } + + // if (currentChar.category() == QChar::Separator_Line) somehow doesnt work + if (currentChar == '\x0a' || currentChar == '\n') + { + currentToken.type = tokEOL; + currentToken.look = "EOL"; + } + else if (currentChar.isLetter() || currentChar == '[' || currentChar == ']') + { + ungetChar(currentChar); + // sets currentToken.look by reference, and set the currentToken.type to tokUnknown + currentToken.type = getWord(currentToken.look); + setTokenType(currentToken); // gets the actual tokenType + } + else if ( currentChar.isNumber() ) + { + ungetChar(currentChar); + // set currentToken.value/look by reference, and set the currentToken.type to tokNumber + currentToken.type = getNumber(currentToken.value, currentToken.look); + } + else if (currentChar == '>') + { + currentChar = getChar(); + if (currentChar == '=') + { + currentToken.type = tokGe; + currentToken.look = ">="; + } + else + { + ungetChar(currentChar); + currentToken.type = tokGt; + currentToken.look = ">"; + } + } + else if (currentChar == '<') + { + currentChar = getChar(); + if ( currentChar == '=' ) + { + currentToken.type = tokLe; + currentToken.look = "<="; + } + else + { + ungetChar(currentChar); + currentToken.type = tokLt; + currentToken.look = ">"; + } + } + else if (currentChar == '!') + { + currentChar = getChar(); + if (currentChar == '=') + { + currentToken.type = tokNe; + currentToken.look = "!="; + } + else + { + ungetChar(currentChar); + currentToken.type = tokNot; + currentToken.look = "!"; + } + } + else if (currentChar == '=') + { + currentChar = getChar(); + if (currentChar == '=') + { + currentToken.type = tokEq; + currentToken.look = "=="; + } + else + { + ungetChar(currentChar); + currentToken.type = tokAssign; + currentToken.look = "="; + } + } + else if (currentChar == '(') + { + currentToken.type = tokBraceOpen; + currentToken.look = "("; + } + else if (currentChar == ')') + { + currentToken.type = tokBraceClose; + currentToken.look = ")"; + } + else if (currentChar == '+') + { + currentToken.type = tokPlus; + currentToken.look = "+"; + } + else if (currentChar == '-') + { + currentToken.type = tokMinus; + currentToken.look = "-"; + } + else if (currentChar == '*') + { + currentToken.type = tokMul; + currentToken.look = "*"; + } + else if (currentChar == '/') + { + currentToken.type = tokDev; + currentToken.look = "/"; + } + else if (currentChar == ',') + { + currentToken.type = tokComma; + currentToken.look = ","; + } + else if (currentChar == '"') + { + getString(currentToken); + } + else + { + currentToken.type = tokUnknown; + currentToken.look = currentChar; + } + + currentToken.end.row = row; + currentToken.end.col = col; + return currentToken; +} + + + +// PRIVATEs + +QChar Lexer::getChar() +{ + QChar c; + if ( !putBackChar.isNull() ) + { + c = putBackChar; // use the char that is stored to be put back + // kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl; + putBackChar = QChar(); // and set putBackChar back to NULL + if (c == '\x0a' || c == '\n') + { + row++; + prevCol = col; + col = 1; + } + else + { + col++; + } + } + else + { + *inputStream >> c; // take a QChar of the inputStream + // kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl; + if (c == '\x0a' || c == '\n') + { + row++; + prevCol = col; + col = 1; + } + else + { + col++; + } + } + return c; +} + +void Lexer::ungetChar(QChar c) +{ + if (c == '\x0a' || c == '\n') + { + row--; + col = prevCol; + } + else + { + col--; + } + putBackChar = c; + // kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl; +} + +int Lexer::getWord(QString& word) +{ + // kdDebug(0)<<"Lexer::getWord()"<<endl; + QChar currentChar = getChar(); + if ( currentChar.isLetter() || currentChar == '[' || currentChar == ']' ) { + while ( ( currentChar.isLetterOrNumber() || currentChar == '_' || currentChar == '[' || currentChar == ']' ) && !inputStream->atEnd() ) + { + word += currentChar; + currentChar = getChar(); + } + kdDebug(0)<<"Lexer::getWord(), got NAME: '"<<word<<"'"<<endl; + ungetChar(currentChar); //read one too much + return tokUnknown; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType + } + else return tokError; +} + +void Lexer::setTokenType(Token& currentToken) +{ + if (currentToken.type == tokUnknown) + { + // make lowercase copy of the word as it was found in the inputStream + QString k = currentToken.look.lower(); + // if the key is an alias translate that alias to a key + if ( !translate->alias2key(k).isEmpty() ) k = translate->alias2key(k); + + if (k == translate->name2key("begin") ) currentToken.type = tokBegin; + else if (k == translate->name2key("end") ) currentToken.type = tokEnd; + else if (k == translate->name2key("while") ) currentToken.type = tokWhile; + else if (k == translate->name2key("if") ) currentToken.type = tokIf; + else if (k == translate->name2key("else") ) currentToken.type = tokElse; + else if (k == translate->name2key("for") ) currentToken.type = tokFor; + else if (k == translate->name2key("to") ) currentToken.type = tokTo; + else if (k == translate->name2key("step") ) currentToken.type = tokStep; + else if (k == translate->name2key("and") ) currentToken.type = tokAnd; + else if (k == translate->name2key("or") ) currentToken.type = tokOr; + else if (k == translate->name2key("not") ) currentToken.type = tokNot; + else if (k == translate->name2key("return") ) currentToken.type = tokReturn; + else if (k == translate->name2key("break") ) currentToken.type = tokBreak; + else if (k == translate->name2key("run") ) currentToken.type = tokRun; + else if (k == translate->name2key("foreach") ) currentToken.type = tokForEach; + else if (k == translate->name2key("in") ) currentToken.type = tokIn; + + else if (k == translate->name2key("learn") ) currentToken.type = tokLearn; + + else if (k == translate->name2key("clear") ) currentToken.type = tokClear; + else if (k == translate->name2key("go") ) currentToken.type = tokGo; + else if (k == translate->name2key("gox") ) currentToken.type = tokGoX; + else if (k == translate->name2key("goy") ) currentToken.type = tokGoY; + else if (k == translate->name2key("forward") ) currentToken.type = tokForward; + else if (k == translate->name2key("backward") ) currentToken.type = tokBackward; + else if (k == translate->name2key("direction") ) currentToken.type = tokDirection; + else if (k == translate->name2key("turnleft") ) currentToken.type = tokTurnLeft; + else if (k == translate->name2key("turnright") ) currentToken.type = tokTurnRight; + else if (k == translate->name2key("center") ) currentToken.type = tokCenter; + else if (k == translate->name2key("setpenwidth") ) currentToken.type = tokSetPenWidth; + else if (k == translate->name2key("penup") ) currentToken.type = tokPenUp; + else if (k == translate->name2key("pendown") ) currentToken.type = tokPenDown; + else if (k == translate->name2key("setfgcolor") ) currentToken.type = tokSetFgColor; + else if (k == translate->name2key("setbgcolor") ) currentToken.type = tokSetBgColor; + else if (k == translate->name2key("resizecanvas") ) currentToken.type = tokResizeCanvas; + else if (k == translate->name2key("spriteshow") ) currentToken.type = tokSpriteShow; + else if (k == translate->name2key("spritehide") ) currentToken.type = tokSpriteHide; + else if (k == translate->name2key("spritepress") ) currentToken.type = tokSpritePress; + else if (k == translate->name2key("spritechange") ) currentToken.type = tokSpriteChange; + + else if (k == translate->name2key("do") ) currentToken.type = tokDo; // dummy commands + + else if (k == translate->name2key("message") ) currentToken.type = tokMessage; + else if (k == translate->name2key("inputwindow") ) currentToken.type = tokInputWindow; + else if (k == translate->name2key("print") ) currentToken.type = tokPrint; + else if (k == translate->name2key("fonttype") ) currentToken.type = tokFontType; + else if (k == translate->name2key("fontsize") ) currentToken.type = tokFontSize; + else if (k == translate->name2key("repeat") ) currentToken.type = tokRepeat; + else if (k == translate->name2key("random") ) currentToken.type = tokRandom; + else if (k == translate->name2key("wait") ) currentToken.type = tokWait; + else if (k == translate->name2key("wrapon") ) currentToken.type = tokWrapOn; + else if (k == translate->name2key("wrapoff") ) currentToken.type = tokWrapOff; + else if (k == translate->name2key("reset") ) currentToken.type = tokReset; + else + { + kdDebug(0)<<"Lexer::setTokenType, found UNKNOWN word @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<"), can be anything"<<endl; + // t.type = tokUnknown; is allready + } + + kdDebug(0)<<"Lexer::setTokenType, found tok-number: '"<<currentToken.type<<"' with the key: '"<<k<<"' @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<")"<<endl; + } +} + + +void Lexer::skipSpaces() +{ + // kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl; + QChar currentChar = getChar(); + // when the Separator_* groups can be identified in the QChar thing would be easier + while ( !inputStream->atEnd() && ( currentChar.isSpace() && !(currentChar == '\x0a' || currentChar == '\n') ) ) + { + currentChar = getChar(); + } + ungetChar(currentChar); // unget the tokEOL we likely just found +} + + +int Lexer::getNumber(Value& num, QString& look) +{ + // by reference the value (Value) and look part are set + // kdDebug(0)<<"Lexer::getNumber()"<<endl; + bool hasPoint = false; + QChar currentChar = getChar(); + if ( currentChar.isNumber() ) + { + while ( ( currentChar.isNumber() || (currentChar == '.' && !hasPoint) ) && !inputStream->atEnd() ) + { + if (currentChar == '.') + { + hasPoint = true; + } + look += currentChar; + currentChar = getChar(); + } + ungetChar(currentChar); //read one too much + num.setNumber( look.toDouble() ); + kdDebug(0)<<"Lexer::getNumber(), got NUMBER: '"<<num.Number()<<"'"<<endl; + return tokNumber; + } + else return tokError; +} + +void Lexer::getString(Token& currentToken) +{ + QString str = "\""; // start with a " cauz it just got lost + QChar currentChar = QChar(); // start empty + while ( currentChar != '"' && !(currentChar == '\x0a' || currentChar == '\n') && !inputStream->atEnd() ) + { + currentChar = getChar(); + if (currentChar == '\\') // escape sequence + { + currentChar = getChar(); + switch (currentChar) + { + case 'n': str += '\n'; break; + case 't': str += '\t'; break; + case 'f': str += '\f'; break; + case '"': str += '"'; break; + } + } + else if (currentChar == '\x0a' || currentChar == '\n') // if the user forgot to delimit the string + { + ungetChar(currentChar); + break; + } + else str += currentChar; + } + currentToken.type = tokString; + currentToken.look = str; + + kdDebug(0)<<"Lexer::getStringConstant, got STRINGCONSTANT: "<<currentToken.look<<"'"<<endl; +} + |