Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Switched from global to dynamic scope

...

(tick) This works, but line is getting cluttered. Let's use a global and still return a testing string from field:

Introducing scopes

We've been passing a value back from field to line, but there's another way to pass information between rules: dynamic scopes. This is a good place to see how they work.

Code Block
titleCSV.g with global listscope
grammar CSV;

@members/* {Old List<String>definitions fields = new ArrayList<String>();
}

commented out:
line returns [List<String> result]
	:@init ({ 	result = new ArrayList<String>(); }
	: (NEWLINE) => NEWLINE
	| (
		fieldResult=field  | field { result.add(fieldResult); }
		( COMMA fieldResult=field {result.add(fieldResult);} )*
	 	NEWLINE
	  )
	;

field returns [String parsedItem]
@init { $resultparsedItem = fields""; }
	;

/** Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""; }
	: (: (f=FIELD {$parsedItem=$f.text;}
	  | // nothing
	  )
 	{ fields.add($parsedItem); }
	;

*/
	
NEWLINE	:	'\r'? '\n';

COMMA	:	',';

FIELD:	NONBREAKING+;
	
// Anything except a line-breaking character is allowed.
fragment NONBREAKING	
	:	~('\r' | '\n' | ',');
// New definitions:
line returns [List<String> result]
scope { List fields; } 
@init { $line::fields = new ArrayList(); }
	: (
	    (NEWLINE) => NEWLINE
	    | field (COMMA  field)* NEWLINE
	  )
	  { $result = $line::fields; }
	;

field
	: ( f=FIELD
	  | // nothing
	  )
 	{ $line::fields.add($f.text); }
	;

NEWLINE	:	'\r'? '\n';

COMMA	:	',';

FIELD:	NONBREAKING+;
	
// Anything except a line-breaking character is allowed.
fragment NONBREAKING	
	:	~('\r' | '\n' | ',');

Since field no longer returns a string, we'll need to alter the test to pass the value through line and add a newline to the end of the line:

Code Block
titleCSVTests.java, new field test via line

@Test
public void testSingleWord() throws IOException, RecognitionException {
    String result = parseField("Red");
    assert result.equals("Red") : "Expected Red, but found " + result;
}

private String parseField(String testString) throws IOException, RecognitionException {
    CSVParser parser = createParser(testString + "\n");
    List<String> result = parser.line();
    return result.get(0);
}

Quoting, part 1

CSV requires that fields that contain special characters (newline, return, double-quote, comma, space) be surrounded by double quotes.

Code Block
titleCSVTests.java fragment
@Test
public void testQuotedString() throws IOException, RecognitionException {
    CSVParserString parserresult = createParserparser.parseField("\"Red, White, and Blue\"");
    String result = parser.field();
    assert result.equals("Red, White, and Blue") : "Expected <<Red, White, and Blue>>, but found <<" + result + ">>";
}

...

Code Block
titleCSV.g
grammar CSV;

@membersline {returns [List<String> fields = new ArrayList<String>()result]
scope { List fields; } 
@init 
line returns [List<String> result]{ $line::fields = new ArrayList(); }
	: (
	    (NEWLINE) => NEWLINE
	    | field (COMMA COMMA field )* NEWLINE
	  )
	  { $result = fields; }
	;

/** Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""
	  { $result = $line::fields; }
	;

field
	: ( f=QUOTED
	  | f=UNQUOTED
	  | // nothing
	  )
 	{ $parsedItem = $line::fields.add(($f == null) ? "" : $f.text; fields.add($parsedItem); }
	;
	
NEWLINE	:	'\r'? '\n';

COMMA	:	',';

QUOTED	: '"' ( options {greedy=false;} : . )* '"' 
	  {
	  	// Strip the surrounding quotes
	  	String txt = getText(); 
	  	setText(txt.substring(1, txt.length() -1)); 
	  };
	
UNQUOTED	:	~('\r' | '\n' | ',' | ' ' | '"')+;

...

Code Block
titleCSVTests.java fragment
@Test
public void testQuoteEscaping() throws IOException, RecognitionException {
    CSVParserString parserresult = createParserparseField("\"Before\"\"After\"");

   String result = parser.field();
    assert result.equals("Before\"After") : "Expected <<Before\"After>>, but found <<" + result + ">>";
}

...

Override this method in the lexer and add the exception to a list (all of the changes are in @lexer::members):

grammar CSV; @members { List<String> fields = new ArrayList<String>(); }
Code Block
titleCSV.g

grammar CSV;

@lexer::members {
List<RecognitionException> exceptions = new ArrayList<RecognitionException>();

public List<RecognitionException> getExceptions() {
  return exceptions;
}

@Override
public void reportError(RecognitionException e) {
  super.reportError(e);
  exceptions.add(e);
}

}

line returns [List<String> result]
	:scope ({ 	List fields; } 
(NEWLINE) => NEWLINE
	    | field ( COMMA field )* NEWLINE
	  )
	  { $result = fields@init { $line::fields = new ArrayList(); }
	;: (
/**	 Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""(NEWLINE) => NEWLINE
	    | field (COMMA  field)* NEWLINE
	  )
	  { $result = $line::fields; }
	;

field
	: ( f=QUOTED
	  | f=UNQUOTED
	  | // nothing
	  )
 	{ $parsedItem = $line::fields.add(($f == null) ? "" : $f.text; fields.add($parsedItem); }
	;
	
NEWLINE	:	'\r'? '\n';

COMMA	:	',';

QUOTED	: ('"' ( options {greedy=false;}: . )+ '"')+
	  {
	  	StringBuffer txt = new StringBuffer(getText()); 
	  	// Remove first and last double-quote
	  	txt.deleteCharAt(0);
	  	txt.deleteCharAt(txt.length()-1);
	  	// "" -> "
	  	int probe;
	  	while ((probe = txt.lastIndexOf("\"\"")) >= 0) {
	  		txt.deleteCharAt(probe);
	  	}
	  	setText(txt.toString()); 
	  };
	
// Anything except a line-breaking character is allowed.
UNQUOTED	
	:	~('\r' | '\n' | ',' | ' ')+;

...