Five minute introduction to ANTLR 3
What is ANTLR 3?
ANTLR – - ANother Tool for Language Recognition – - is a tool that is used in the construction of formal language software tools (or just language tools) such as translators, compilers, recognizers and, static/dynamic program analyzers. Developers use ANTLR to reduce the time and effort needed to build and maintain language processing tools. In common terminology, ANTLR is a compiler generator or compiler compiler (in the tradition of tools such as Lex/Flex and Yacc/Bison) and it is used to generate the source code for language recognizers, analyzers and translators from language specifications. ANTLR takes as it's input a grammar - a precise description of a language augmented with semantic actions - and generates source code files and other auxiliary files. The target language of the generated source code (e.g. Java, C/C++, C#, Python, Ruby) is specified in the grammar.
...
Java | Code Block |
---|
grammar SimpleCalc;
tokens {
PLUS = '+' ;
MINUS = '-' ;
MULT = '*' ;
DIV = '/' ;
}
@members {
public static void main(String[] args) throws Exception {
SimpleCalcLexer lex = new SimpleCalcLexer(new ANTLRFileStream(args[0]));
CommonTokenStream tokens = new CommonTokenStream(lex);
SimpleCalcParser parser = new SimpleCalcParser(tokens);
try {
parser.expr();
} catch (RecognitionException e) {
e.printStackTrace();
}
}
}
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( MULT | DIV ) factor )* ;
factor : NUMBER ;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
NUMBER : (DIGIT)+ ;
WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ;
fragment DIGIT : '0'..'9' ;
|
|
---|
C# | Code Block |
---|
grammar SimpleCalc;
options {
language=CSharp;
}
tokens {
PLUS = '+' ;
MINUS = '-' ;
MULT = '*' ;
DIV = '/' ;
}
@members {
public static void Main(string[] args) {
SimpleCalcLexer lex = new SimpleCalcLexer(new ANTLRFileStream(args[0]));
CommonTokenStream tokens = new CommonTokenStream(lex);
SimpleCalcParser parser = new SimpleCalcParser(tokens);
try {
parser.expr();
} catch (RecognitionException e) {
Console.Error.WriteLine(e.StackTrace);
}
}
}
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( MULT | DIV ) factor )* ;
factor : NUMBER ;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
NUMBER : (DIGIT)+ ;
WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ;
fragment DIGIT : '0'..'9' ;
|
|
---|
Objective-C | To be written. Volunteers?
Code Block |
---|
grammar SimpleCalc;
options
{
language=ObjC;
}
OR : '||' ;
|
|
---|
C | Code Block |
---|
grammar SimpleCalc;
options
{
language=C;
}
tokens {
PLUS = '+' ;
MINUS = '-' ;
MULT = '*' ;
DIV = '/' ;
}
@members { int main(int argc, char * argv[])
{ #include "SimpleCalcLexer.h"
pANTLR3_INPUT_STREAM input = antlr3AsciiFileStreamNew(argv[1]);
pSimpleCalcLexer lex = SimpleCalcLexerNew(input);
pANTLR3_COMMON_TOKEN_STREAM tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, lex->pLexer->tokSource);
pSimpleCalcParser parser = SimpleCalcParserNew(tokens);
parser->expr(parser);
// Must manually clean up
parser->free(parser);
tokens->free(tokens);
lex->free(lex);
input->close(input);
return 0;
}
}
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( MULT | DIV ) factor )* ;
factor : NUMBER ;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
NUMBER : (DIGIT)+ ;
WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ;
fragment DIGIT : '0'..'9' ;
|
|
---|
...
You hide the token by setting the token's $channel
flag to the constant HIDDEN
. This requires adding a little code to the lexer, which you do by adding curly brackets:
Code Block |
---|
|
WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; };
|
...