...
This works, but line
is getting cluttered. Let's use a global and still return a testing string from field:
Introducing scopes
We've been passing a value back from field
to line
, but there's another way to pass information between rules: dynamic scopes. This is a good place to see how they work.
Code Block |
---|
title | CSV.g with global listscope |
---|
|
grammar CSV;
@members/* {Old List<String>definitions fields = new ArrayList<String>();
}
commented out:
line returns [List<String> result]
:@init ({ result = new ArrayList<String>(); }
: (NEWLINE) => NEWLINE
| (
fieldResult=field | field { result.add(fieldResult); }
( COMMA fieldResult=field {result.add(fieldResult);} )*
NEWLINE
)
;
field returns [String parsedItem]
@init { $resultparsedItem = fields""; }
;
/** Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""; }
: (: (f=FIELD {$parsedItem=$f.text;}
| // nothing
)
{ fields.add($parsedItem); }
;
*/
NEWLINE : '\r'? '\n';
COMMA : ',';
FIELD: NONBREAKING+;
// Anything except a line-breaking character is allowed.
fragment NONBREAKING
: ~('\r' | '\n' | ',');
// New definitions:
line returns [List<String> result]
scope { List fields; }
@init { $line::fields = new ArrayList(); }
: (
(NEWLINE) => NEWLINE
| field (COMMA field)* NEWLINE
)
{ $result = $line::fields; }
;
field
: ( f=FIELD
| // nothing
)
{ $line::fields.add($f.text); }
;
NEWLINE : '\r'? '\n';
COMMA : ',';
FIELD: NONBREAKING+;
// Anything except a line-breaking character is allowed.
fragment NONBREAKING
: ~('\r' | '\n' | ',');
|
Since field
no longer returns a string, we'll need to alter the test to pass the value through line
and add a newline to the end of the line:
Code Block |
---|
title | CSVTests.java, new field test via line |
---|
|
@Test
public void testSingleWord() throws IOException, RecognitionException {
String result = parseField("Red");
assert result.equals("Red") : "Expected Red, but found " + result;
}
private String parseField(String testString) throws IOException, RecognitionException {
CSVParser parser = createParser(testString + "\n");
List<String> result = parser.line();
return result.get(0);
}
|
Quoting, part 1
CSV requires that fields that contain special characters (newline, return, double-quote, comma, space) be surrounded by double quotes.
Code Block |
---|
title | CSVTests.java fragment |
---|
|
@Test
public void testQuotedString() throws IOException, RecognitionException {
CSVParserString parserresult = createParserparser.parseField("\"Red, White, and Blue\"");
String result = parser.field();
assert result.equals("Red, White, and Blue") : "Expected <<Red, White, and Blue>>, but found <<" + result + ">>";
}
|
...
Code Block |
---|
|
grammar CSV;
@membersline {returns [List<String> fields = new ArrayList<String>()result]
scope { List fields; }
@init
line returns [List<String> result]{ $line::fields = new ArrayList(); }
: (
(NEWLINE) => NEWLINE
| field (COMMA COMMA field )* NEWLINE
)
{ $result = fields; }
;
/** Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""
{ $result = $line::fields; }
;
field
: ( f=QUOTED
| f=UNQUOTED
| // nothing
)
{ $parsedItem = $line::fields.add(($f == null) ? "" : $f.text; fields.add($parsedItem); }
;
NEWLINE : '\r'? '\n';
COMMA : ',';
QUOTED : '"' ( options {greedy=false;} : . )* '"'
{
// Strip the surrounding quotes
String txt = getText();
setText(txt.substring(1, txt.length() -1));
};
UNQUOTED : ~('\r' | '\n' | ',' | ' ' | '"')+;
|
...
Code Block |
---|
title | CSVTests.java fragment |
---|
|
@Test
public void testQuoteEscaping() throws IOException, RecognitionException {
CSVParserString parserresult = createParserparseField("\"Before\"\"After\"");
String result = parser.field();
assert result.equals("Before\"After") : "Expected <<Before\"After>>, but found <<" + result + ">>";
}
|
...
Override this method in the lexer and add the exception to a list (all of the changes are in @lexer::members
):
Code Block |
---|
|
grammar CSV;
@members {
List<String> fields = new ArrayList<String>();
}
|
grammar CSV;
@lexer::members {
List<RecognitionException> exceptions = new ArrayList<RecognitionException>();
public List<RecognitionException> getExceptions() {
return exceptions;
}
@Override
public void reportError(RecognitionException e) {
super.reportError(e);
exceptions.add(e);
}
}
line returns [List<String> result]
:scope ({ List fields; }
(NEWLINE) => NEWLINE
| field ( COMMA field )* NEWLINE
)
{ $result = fields@init { $line::fields = new ArrayList(); }
;: (
/** Adds the field to the master result and also returns it for unit testing */
field returns [String parsedItem]
@init { parsedItem = ""(NEWLINE) => NEWLINE
| field (COMMA field)* NEWLINE
)
{ $result = $line::fields; }
;
field
: ( f=QUOTED
| f=UNQUOTED
| // nothing
)
{ $parsedItem = $line::fields.add(($f == null) ? "" : $f.text; fields.add($parsedItem); }
;
NEWLINE : '\r'? '\n';
COMMA : ',';
QUOTED : ('"' ( options {greedy=false;}: . )+ '"')+
{
StringBuffer txt = new StringBuffer(getText());
// Remove first and last double-quote
txt.deleteCharAt(0);
txt.deleteCharAt(txt.length()-1);
// "" -> "
int probe;
while ((probe = txt.lastIndexOf("\"\"")) >= 0) {
txt.deleteCharAt(probe);
}
setText(txt.toString());
};
// Anything except a line-breaking character is allowed.
UNQUOTED
: ~('\r' | '\n' | ',' | ' ')+;
|
...