poxml/antlr/src/Parser.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

/**
 * <b>SOFTWARE RIGHTS</b>
 * <p>
 * ANTLR 2.6.0 MageLang Insitute, 1998
 * <p>
 * We reserve no legal rights to the ANTLR--it is fully in the
 * public domain. An individual or company may do whatever
 * they wish with source code distributed with ANTLR or the
 * code generated by ANTLR, including the incorporation of
 * ANTLR, or its output, into commerical software.
 * <p>
 * We encourage users to develop software with ANTLR. However,
 * we do ask that credit is given to us for developing
 * ANTLR. By "credit", we mean that if you use ANTLR or
 * incorporate any source code into one of your programs
 * (commercial product, research project, or otherwise) that
 * you acknowledge this fact somewhere in the documentation,
 * research report, etc... If you like ANTLR and have
 * developed a nice tool with the output, please mention that
 * you developed it using ANTLR. In addition, we ask that the
 * headers remain intact in our source code. As long as these
 * guidelines are kept, we expect to continue enhancing this
 * system and expect to make other tools available as they are
 * completed.
 * <p>
 * The ANTLR gang:
 * @version ANTLR 2.6.0 MageLang Insitute, 1998
 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
 * @author <br><a href="mailto:[email protected]">Pete Wells</a>
 */

#include "antlr/Parser.h"

#include "antlr/BitSet.h"
#include "antlr/TokenBuffer.h"
#include "antlr/MismatchedTokenException.h"
//#include "antlr/ASTFactory.h"
#include <iostream>
#include <stdlib.h>

ANTLR_BEGIN_NAMESPACE(antlr)
ANTLR_C_USING(exit)

/**A generic ANTLR parser (LL(k) for k>=1) containing a bunch of
 * utility routines useful at any lookahead depth.  We distinguish between
 * the LL(1) and LL(k) parsers because of efficiency.  This may not be
 * necessary in the near future.
 *
 * Each parser object contains the state of the parse including a lookahead
 * cache (the form of which is determined by the subclass), whether or
 * not the parser is in guess mode, where tokens come from, etc...
 *
 * <p>
 * During <b>guess</b> mode, the current lookahead token(s) and token type(s)
 * cache must be saved because the token stream may not have been informed
 * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block.
 * Guessing is started by:
 * <ol>
 * <li>saving the lookahead cache.
 * <li>marking the current position in the TokenBuffer.
 * <li>increasing the guessing level.
 * </ol>
 *
 * After guessing, the parser state is restored by:
 * <ol>
 * <li>restoring the lookahead cache.
 * <li>rewinding the TokenBuffer.
 * <li>decreasing the guessing level.
 * </ol>
 *
 * @see antlr.Token
 * @see antlr.TokenBuffer
 * @see antlr.TokenStream
 * @see antlr.LL1Parser
 * @see antlr.LLkParser
 */

bool DEBUG_PARSER=false;

Parser::Parser(TokenBuffer& input)
: inputState(new ParserInputState(input)), traceDepth(0)
{
}

Parser::Parser(TokenBuffer* input)
: inputState(new ParserInputState(input)), traceDepth(0)
{
}

Parser::Parser(const ParserSharedInputState& state)
: inputState(state), traceDepth(0)
{
}

Parser::~Parser()
{
}

void Parser::setTokenNames(const char** tokenNames_)
{
	while (*tokenNames_) {
		tokenNames.push_back(*(tokenNames_++));
	}
}

/** Consume tokens until one matches the given token */
void Parser::consumeUntil(int tokenType)
{
	while (LA(1) != Token::EOF_TYPE && LA(1) != tokenType)
		consume();
}

/** Consume tokens until one matches the given token set */
void Parser::consumeUntil(const BitSet& set)
{
	while (LA(1) != Token::EOF_TYPE && !set.member(LA(1)))
		consume();
}

/** Get the AST return value squirreled away in the parser */
RefAST Parser::getAST()
{
	return returnAST;
}

ASTFactory& Parser::getASTFactory()
{
	return astFactory;
}

ANTLR_USE_NAMESPACE(std)string Parser::getFilename() const
{
	return inputState->filename;
}

ParserSharedInputState Parser::getInputState() const
{
	return inputState;
}

ANTLR_USE_NAMESPACE(std)string Parser::getTokenName(int num) const
{
	return tokenNames[num];
}

ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> Parser::getTokenNames() const
{
	return tokenNames;
}

// Forwarded to TokenBuffer
int Parser::mark()
{
	return inputState->getInput().mark();
}

/**Make sure current lookahead symbol matches token type <tt>t</tt>.
 * Throw an exception upon mismatch, which is catch by either the
 * error handler or by the syntactic predicate.
 */
void Parser::match(int t)
{
	if ( DEBUG_PARSER )
	{
		traceIndent();
		ANTLR_USE_NAMESPACE(std)cout << "enter match(" << t << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
	}
	if ( LA(1)!=t ) {
		if ( DEBUG_PARSER )
		{
			traceIndent();
			ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << "!=" << t << ANTLR_USE_NAMESPACE(std)endl;
		}
		throw MismatchedTokenException(tokenNames, LT(1), t, false, getFilename());
	} else {
		// mark token as consumed -- fetch next token deferred until LA/LT
		consume();
	}
}

/**Make sure current lookahead symbol matches the given set
 * Throw an exception upon mismatch, which is catch by either the
 * error handler or by the syntactic predicate.
 */
void Parser::match(const BitSet& b)
{
	if ( DEBUG_PARSER )
	{
		traceIndent();
		ANTLR_USE_NAMESPACE(std)cout << "enter match(" << "bitset" /*b.toString()*/
			  << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
	}
	if ( !b.member(LA(1)) ) {
		if ( DEBUG_PARSER )
		{
			traceIndent();
			ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << " not member of "
				  << "bitset" /*b.toString()*/ << ANTLR_USE_NAMESPACE(std)endl;
		}
		throw MismatchedTokenException(tokenNames, LT(1), b, false, getFilename());
	} else {
		// mark token as consumed -- fetch next token deferred until LA/LT
		consume();
	}
}

void Parser::matchNot(int t)
{
	if ( LA(1)==t ) {
		// Throws inverted-sense exception
		throw MismatchedTokenException(tokenNames, LT(1), t, true, getFilename());
	} else {
		// mark token as consumed -- fetch next token deferred until LA/LT
		consume();
	}
}

void Parser::panic()
{
	ANTLR_USE_NAMESPACE(std)cerr << "Parser: panic" << ANTLR_USE_NAMESPACE(std)endl;
	exit(1);
}

/** Parser error-reporting function can be overridden in subclass */
void Parser::reportError(const RecognitionException& ex)
{
	ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl;
}

/** Parser error-reporting function can be overridden in subclass */
void Parser::reportError(const ANTLR_USE_NAMESPACE(std)string& s)
{
	if ( getFilename().empty() )
		ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
	else
		ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
}

/** Parser warning-reporting function can be overridden in subclass */
void Parser::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s)
{
	if ( getFilename().empty() )
		ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
	else
		ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
}

void Parser::rewind(int pos)
{
	inputState->getInput().rewind(pos);
}

/** Set the object used to generate ASTs */
//	void setASTFactory(ASTFactory astFactory_);

/** Specify the type of node to create during tree building */
void Parser::setASTNodeFactory(ASTFactory::factory_type factory)
{
	astFactory.setASTNodeFactory(factory);
}

void Parser::setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
{
	inputState->filename = f;
}

void Parser::setInputState(ParserSharedInputState state)
{
	inputState = state;
}

/** Set or change the input token buffer */
//	void setTokenBuffer(TokenBuffer<Token>* t);

void Parser::traceIndent()
{
	for( int i = 0; i < traceDepth; i++ )
		ANTLR_USE_NAMESPACE(std)cout << " ";
}

void Parser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname)
{
	traceDepth++;

	for( int i = 0; i < traceDepth; i++ )
		ANTLR_USE_NAMESPACE(std)cout << " ";

	ANTLR_USE_NAMESPACE(std)cout << "> " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() <<
			((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl;
}

void Parser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname)
{
	for( int i = 0; i < traceDepth; i++ )
		ANTLR_USE_NAMESPACE(std)cout << " ";

	ANTLR_USE_NAMESPACE(std)cout << "< " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() <<
			((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl;

	traceDepth--;
}

ANTLR_END_NAMESPACE