|
Figure 1.1 The token manager converts a sequence of characters to a sequence of Token objects. |
Figure 1.2 The parser analyzes the sequence of tokens. |
< javacc
|
< javac
|
CLASSPATH=$CLASSPATH:/usr/local/javacc2.1/bin/lib/JavaCC.zip export CLASSPATH |
int main() {
|
java.io.StringReader sr = new java.io.StringReader( str ); java.io.Reader r = new java.io.BufferedReader( sr ); XXX parser = new XXX( r ); |
TOKEN : { < PLUS : "+" > } TOKEN : { < ASSIGN : "=" > } TOKEN : { < PLASSIGN : "+=" > } |
TOKEN : { < KWINT : "int" > } TOKEN : { < IDENT : ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* > } |
TOKEN : { < A : "x" | "y" > } TOKEN : { < B : "y" | "z" > } |
TOKEN : { < A : a > } TOKEN : { < B : b > } |
Token b() : {Token t ; }{ (t= < A > | t= < B > ) {return t;} } |
|
|
TOKEN : { < C : c′ > } TOKEN : { < A : a′ > TOKEN : { < B : b′ > } Token a() : {Token t ; } { (t= < C > | t= < A > ) {return t;} } Token b() : {Token t ; } { (t= < C > | t= < B > ) {return t;} } |
TOKEN : { < C : "y" > } TOKEN : { < A : "x" > } TOKEN : { < B : "z" > } Token a() : {Token t ; } { (t= < C > | t= < A > ) {return t;} } Token b() : {Token t ; } { (t= < C > | t= < B > ) {return t;} } |
|
< DEFAULT > TOKEN : { < STARTSCRIPT : " < < " > : SCRIPT } < DEFAULT > TOKEN : { < TEXT : ~[] > } < SCRIPT > TOKEN : { < ENDSCRIPT : " > > " > : DEFAULT } < SCRIPT > ... other TOKEN and SKIP productions for the SCRIPT state |
void start() : {}
{
|
|
|
// When a /** is seen in the DEFAULT state, switch to the
IN_JAVADOC_COMMENT state
TOKEN : {
|
SKIP : { < "/*"(~["*"])* "*"(~["/"] (~["*"])* "*")* "/" > } |
// When a /* is seen in the DEFAULT state, skip it and switch to the
IN_COMMENT state
SKIP : {
|
i = j/*p ; |
{ token_source.SwitchTo(name_of_state) ; } |
TOKEN_MGR_DECLS : {
|
// When a /* is seen in the DEFAULT state, skip it and switch to the
IN_COMMENT state
MORE : {
|
MORE : {
|
SPECIAL_TOKEN : {
|
TOKEN : {
|
|
TOKEN_MGR_DECLS : {
|
SKIP : { "(*" { commentNestingDepth = 1 ; } : COMMENT } |
< COMMENT > SKIP : { "(*" { commentNestingDepth += 1 ; } } |
< COMMENT > SKIP : { "*)"
{
|
< COMMENT > SKIP : { < ~[] > } |
options {
|
TOKEN_MGR_DECLS : {
|
< * > TOKEN :
{
|
InputStream istrm = new FileInputStream( theFileName ) ; Reader rdr = new InputStreamReader( istrm, ÜTF-8" ) ; |
SimpleCharStream charStream = new SimpleCharStream( rdr ) ; |
XXXTokenManager tokenMan = new XXXTokenManager(charStream); XXX parser = new XXX( tokenMan ) ; |
If there is a prefix of the input sequence of tokens that matches this nonterminal's definition, then remove such a prefix from the input sequence else throw a ParseExceptionI say only roughly, as the actual prefix matched is not arbitrary, but is determined by the rules of JavaCC.
void A() : {} {
|
void A() {
|
void A() : {} {
|
void A() : {} {
|
void A() : {} { B() | {} } |
void A() : {} { [ B() ] } |
Warning: Choice conflict ... Consider using a lookahead of 2 for ...Read the message carefully. Understand why there is a choice conflict (choice conflicts will be explained shortly) and take appropriate action. The appropriate action, in my experience, is rarely to use a lookahead of 2. So what is a choice conflict. Well suppose you have a BNF production
void a() : {} {
|
void a() : {} {
|
void a() : {} {
|
void a() : {} {
|
void paramList() : {} {
|
void paramList() : {} {
|
void paramList() : {} {
|
void statement() : {}
{
|
|
void statement() : {}
{
|
void eg() : {}
{
|
void start( ) : { } {
|
void a( ) : { } {
|
void a( ) : { } {
|
|
void a() : {} { LOOKAHEAD(C) A | B } |
|
void typedef_name() : {} {
|
{ typedef int T ; T i ; i = 0 ; return i ; } |
//A regular expression production
TOKEN : { < ABC : "abc" > }
//A BNF production
void nonterm() : {} {
|
//A regular expression production
TOKEN : { < ABC : "abc" > }
TOKEN : { < ANON0 : "def" > }
TOKEN : { < ANON1 : < (["0"-"9"])+ > }
TOKEN : { < ANON2 : < (["0"-"9"])+ > }
//A BNF production
void nonterm() : {}
{
|
void letter_number_letters() : {
|
TOKEN : < LETTER : ["a"-"z"]
> }
TOKEN : < NUMBER : ["0"-"9"]
> }
TOKEN : < LETTERS : (["a"-"z"])+
> }
void letter_number_letters() : {
|
void abc() : {} {
|
void abc() : {} {
|
void toughChoice() : {}
{
|
TOKEN : { < HTTP : "http" > } TOKEN : { < LABEL : < ALPHANUM > | < ALPHANUM > ( < ALPHANUM > |"-")* < ALPHANUM > > } void httpURL() : {} { < HTTP > ":""//"host() port_path_and_query() } void host() : {} { < LABEL > ("." < LABEL > )* } |
void host() : {} { label() ("."label())* } void label() : {} { < LABEL > | < HTTP > } |
void httpURL() : {} {
|
TOKEN : { < HTTP : "http" > } TOKEN : { < DSLASH : "//" > : LABELEXPECTED } TOKEN : { < DOT : "." > : LABELEXPECTED } < LABELEXPECTED > TOKEN : { < LABEL : < ALPHANUM > | < ALPHANUM > ( < ALPHANUM > |"-")* < ALPHANUM > > :DEFAULT } |
void httpURL() : {} { < HTTP > ":" < DSLASH > host() port_path_and_query() } void host() : {} { < LABEL > ( < DOT > < LABEL > )* } |
class TokenList {
|
TokenList CompilationUnit() : {
|
class TokenList {
|
|
class TokenList {
|
void CompilationUnit() : {
|
|
void eg() : {} {a() (b() [","])* } |
eg : a eg1
;
eg1 : /* empty */
|
|
"/*" (~["*"])* "*" (~["*","/"] (~["*"])* "*" | "*")* "/" |