diff options
Diffstat (limited to 'lib/antlr/src/TokenStreamRewriteEngine.cpp')
-rw-r--r-- | lib/antlr/src/TokenStreamRewriteEngine.cpp | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/lib/antlr/src/TokenStreamRewriteEngine.cpp b/lib/antlr/src/TokenStreamRewriteEngine.cpp new file mode 100644 index 00000000..2f171eb6 --- /dev/null +++ b/lib/antlr/src/TokenStreamRewriteEngine.cpp @@ -0,0 +1,214 @@ +#include <antlr/config.hpp> + +#include <string> +#include <list> +#include <vector> +#include <map> +#include <utility> +#include <iostream> +#include <iterator> +#include <sstream> +#include <cassert> + +#include <antlr/TokenStream.hpp> +#include <antlr/TokenWithIndex.hpp> +#include <antlr/BitSet.hpp> +#include <antlr/TokenStreamRewriteEngine.hpp> + +#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE +namespace antlr { +#endif + +#ifndef NO_STATIC_CONSTS +const size_t TokenStreamRewriteEngine::MIN_TOKEN_INDEX = 0; +const int TokenStreamRewriteEngine::PROGRAM_INIT_SIZE = 100; +#endif + +const char* TokenStreamRewriteEngine::DEFAULT_PROGRAM_NAME = "default"; + +namespace { + + struct compareOperationIndex { + typedef TokenStreamRewriteEngine::RewriteOperation RewriteOperation; + bool operator() ( const RewriteOperation* a, const RewriteOperation* b ) const + { + return a->getIndex() < b->getIndex(); + } + }; + struct dumpTokenWithIndex { + dumpTokenWithIndex( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} + void operator() ( const RefTokenWithIndex& t ) { + out << "[txt='" << t->getText() << "' tp=" << t->getType() << " idx=" << t->getIndex() << "]\n"; + } + ANTLR_USE_NAMESPACE(std)ostream& out; + }; +} + +TokenStreamRewriteEngine::TokenStreamRewriteEngine(TokenStream& upstream) +: stream(upstream) +, index(MIN_TOKEN_INDEX) +, tokens() +, programs() +, discardMask() +{ +} + +TokenStreamRewriteEngine::TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize ) +: stream(upstream) +, index(MIN_TOKEN_INDEX) +, tokens(initialSize) +, programs() +, discardMask() +{ +} + +RefToken TokenStreamRewriteEngine::nextToken( void ) +{ + RefTokenWithIndex t; + // suck tokens until end of stream or we find a non-discarded token + do { + t = RefTokenWithIndex(stream.nextToken()); + if ( t ) + { + t->setIndex(index); // what is t's index in list? + if ( t->getType() != Token::EOF_TYPE ) { + tokens.push_back(t); // track all tokens except EOF + } + index++; // move to next position + } + } while ( t && discardMask.member(t->getType()) ); + return RefToken(t); +} + +void TokenStreamRewriteEngine::rollback( const std::string& programName, + size_t instructionIndex ) +{ + program_map::iterator rewrite = programs.find(programName); + if( rewrite != programs.end() ) + { + operation_list& prog = rewrite->second; + operation_list::iterator + j = prog.begin(), + end = prog.end(); + + std::advance(j,instructionIndex); + if( j != end ) + prog.erase(j, end); + } +} + +void TokenStreamRewriteEngine::originalToStream( std::ostream& out, + size_t start, + size_t end ) const +{ + token_list::const_iterator s = tokens.begin(); + std::advance( s, start ); + token_list::const_iterator e = s; + std::advance( e, end-start ); + std::for_each( s, e, tokenToStream(out) ); +} + +void TokenStreamRewriteEngine::toStream( std::ostream& out, + const std::string& programName, + size_t firstToken, + size_t lastToken ) const +{ + if( tokens.size() == 0 ) + return; + + program_map::const_iterator rewriter = programs.find(programName); + + if ( rewriter == programs.end() ) + return; + + // get the prog and some iterators in it... + const operation_list& prog = rewriter->second; + operation_list::const_iterator + rewriteOpIndex = prog.begin(), + rewriteOpEnd = prog.end(); + + size_t tokenCursor = firstToken; + // make sure we don't run out of the tokens we have... + if( lastToken > (tokens.size() - 1) ) + lastToken = tokens.size() - 1; + + while ( tokenCursor <= lastToken ) + { +// std::cout << "tokenCursor = " << tokenCursor << " first prog index = " << (*rewriteOpIndex)->getIndex() << std::endl; + + if( rewriteOpIndex != rewriteOpEnd ) + { + size_t up_to_here = std::min(lastToken,(*rewriteOpIndex)->getIndex()); + while( tokenCursor < up_to_here ) + out << tokens[tokenCursor++]->getText(); + } + while ( rewriteOpIndex != rewriteOpEnd && + tokenCursor == (*rewriteOpIndex)->getIndex() && + tokenCursor <= lastToken ) + { + tokenCursor = (*rewriteOpIndex)->execute(out); + ++rewriteOpIndex; + } + if( tokenCursor <= lastToken ) + out << tokens[tokenCursor++]->getText(); + } + // std::cout << "Handling tail operations # left = " << std::distance(rewriteOpIndex,rewriteOpEnd) << std::endl; + // now see if there are operations (append) beyond last token index + std::for_each( rewriteOpIndex, rewriteOpEnd, executeOperation(out) ); + rewriteOpIndex = rewriteOpEnd; +} + +void TokenStreamRewriteEngine::toDebugStream( std::ostream& out, + size_t start, + size_t end ) const +{ + token_list::const_iterator s = tokens.begin(); + std::advance( s, start ); + token_list::const_iterator e = s; + std::advance( e, end-start ); + std::for_each( s, e, dumpTokenWithIndex(out) ); +} + +void TokenStreamRewriteEngine::addToSortedRewriteList( const std::string& programName, + RewriteOperation* op ) +{ + program_map::iterator rewrites = programs.find(programName); + // check if we got the program already.. + if ( rewrites == programs.end() ) + { + // no prog make a new one... + operation_list ops; + ops.push_back(op); + programs.insert(std::make_pair(programName,ops)); + return; + } + operation_list& prog = rewrites->second; + + if( prog.empty() ) + { + prog.push_back(op); + return; + } + + operation_list::iterator i, end = prog.end(); + i = end; + --i; + // if at or beyond last op's index, just append + if ( op->getIndex() >= (*i)->getIndex() ) { + prog.push_back(op); // append to list of operations + return; + } + i = prog.begin(); + + if( i != end ) + { + operation_list::iterator pos = std::upper_bound( i, end, op, compareOperationIndex() ); + prog.insert(pos,op); + } + else + prog.push_back(op); +} + +#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE +} +#endif |