Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Migrated to Confluence 5.3

...

Info
titleNotes

Note the use of fragment rules with no body to define the token types that the main FLOATING_POINT_LITERAL uses.
You may feel that this looks like a complicated rule, but in fact it is very simple as all the possible paths through a literal definition are laid out and you can read it directly, without having to infer any decisions that ANTLR tried to make for you.

Code Block
borderStylesolid
titlenumericlex.gborderStylesolid
/*
 * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

//------------------------------------------------------------
// Numeric literals.
// These are handled specially to reduce lexer complexity and
// negate the need to override standard ANTLR lexing methods.
// This improves performance and enhance readability.
// The following fragment rules are to document the types and
// to provide a lexer symbol for the token type. The actual
// parsing is carried out in the FLOATING_POINT_LITERAL rule.
//

// Time literals are self evident in meaning and are currently
// recognized by the lexer. This may change as in some cases
// trying to do too much in the lexer results in lexing errors
// that are difficult to recover from.
//
fragment    TIME_LITERAL        :   ;

// Decimal literals may not have leading zeros unless
// they are just the constant 0. They are integer only.
// In order to do more accurate error processing, these
// numeric literlas may merge into one rule that overrides
// the type.
//
fragment    DECIMAL_LITERAL     :   ;

// Octal literals are preceded by a leading zero and must be followed
// by one or more valid octal digits.
//
fragment    OCTAL_LITERAL       :   ;

// Hex literals are preceded by 0X or 0x and must have one or
// more valid hex digits following them.
//
fragment    HEX_LITERAL         :   ;

// Range operator, which is two dots - '..'
//
fragment        DOTDOT                  :       ;

// Dot operator, which is a single '.'
//
fragment        DOT                     :       ;

// ------------------------------------------------------------
// This rule is in fact the proxy rule for all types of numeric
// literals. ANTLR lexers are LL recognizers rather than pattern
// matchers such as flex. Hence we want to hand craft this rule
// to guide it through all the possible combinations of digits and
// dots in the most efficient way.
//
// This rule presents all the decision points in definite order,
// giving the scanner little hard work to do to select the
// correct token to match. The fragment rules above (TIME_LITERAL, DOTDOT
// and so on), are essentially just there to create the token
// types.
//
FLOATING_POINT_LITERAL

@init
{
    // Indicates out of range digit
    //
    boolean rangeError = false;

    // First character of rule
    //
    int     sPos = getCharIndex();

    // Is this going to be a negative numeric?
    //
    boolean negative = input.LT(-1) == '-';

}
    :
        // A leading zero can either be a decimal literal
        // (if it is the sole component) or introduces
        // an octal or hexadecimal number. Time sequences
        // are also possible for the single '0' digit.
        //
        '0'
            (
                  ('x'|'X')     // Hex literal indicated

                  {
                    // Always set the type, so the parser is not confused
                    //
                    $type = HEX_LITERAL;
                  }
                  (
                        // We consume any letters and digits that follow 0x
                        // and control the error that we issue.
                      (
                          ('0'..'9'|'a'..'f'|'A'..'F')      // Valid Hex
                        | ('g'..'z' |'G'..'Z')          // Invalid hex

                            {
                                rangeError = true;  // Signal at least one bad digit
                            }
                      )+

                      {
                            setText(getText().substring(2, getText().length()));
                            if  (rangeError)
                            {
                                // Error - malformed hex constant
                                //
                                log.error(sPos, MsgSym.MESSAGE_JAVAFX_HEX_MALFORMED);
                                setText("0");
                            }
                            else
                            {
                                if (! checkIntLiteralRange(getText(), getCharIndex(), 16, negative))
                                {
                                    setText("0");
                                }
                            }
                      }

                      (
                            // Hex numbers cannot be floating point, but catch this here
                            // rather than mismatch it.
                            //
                                { input.LA(2) != '.'}?=>

                                    { sPos = getCharIndex(); }

                                    '.' (
                                              ('0'..'9'|'a'..'f'|'A'..'F')      // Valid Hex
                                            | ('g'..'z' |'G'..'Z')          // Invalid hex

                                        )*

                                    {
                                        // Error - malformed hex constant
                                        //
                                        log.error(sPos, MsgSym.MESSAGE_JAVAFX_HEX_FLOAT);
                                        setText("0");
                                    }
                            |

                      )

                    |   // If no digits follow 0x then it is an error
                        //
                        {
                            log.error(getCharIndex()-1, MsgSym.MESSAGE_JAVAFX_HEX_MISSING);
                            setText("0");
                        }

                  )

                |   // Digits indicate an octal sequence
                    // but we allow a match for any standard ASCII digit
                    // and issue a controlled error, rather than allow
                    // the lexer to throw mismatch errors. This is much nicer
                    // for users.
                    //
                    (
                          '0'..'7'  // Valid octal digit

                        | '8'..'9'  // Invalid octal digit

                            {
                                rangeError = true; // Signal that at least one digit was wrong
                            }
                    )+

                    {
                        // Always set the type to octal, so the parser does not see
                        // a lexing error, even though the compiler knows there is an
                        // error.
                        //
                        $type = OCTAL_LITERAL;

                        if  (rangeError)
                        {
                            log.error(sPos, MsgSym.MESSAGE_JAVAFX_OCTAL_MALFORMED);
                            setText("0");
                        }
                        else
                        {
                            if  (! checkIntLiteralRange(getText(), getCharIndex(), 8, negative))
                            {
                                setText("0");
                            }
                        }
                    }
                     (
                            // Octal numbers cannot be floating point, but catch this here
                            // rather than mismatch it.
                            //
                            { input.LA(2) != '.'}?=>

                            { sPos = getCharIndex(); }

                            '.' Digits?

                                {
                                    log.error(sPos, MsgSym.MESSAGE_JAVAFX_OCTAL_FLOAT);
                                    setText("0");
                                }
                        |
                      )

                |   // Time sequence specifier means this was 0 length time
                    // in whatever units.
                    //
                    ('m' 's'? | 's' | 'h')

                    { $type = TIME_LITERAL; }


                |   // We can of course have 0.nnnnn
                    //
                    { input.LA(2) != '.'}?=> '.'
                        (
                              // Decimal, but possibly time
                              //
                              Digits Exponent?

                                (
                                        ('m' 's'? | 's' | 'h')

                                        { $type = TIME_LITERAL; }

                                    |   // Just 0.nnn
                                        //
                                        { $type = FLOATING_POINT_LITERAL; }
                                )

                            |   // Just 0.
                                //
                                { $type = FLOATING_POINT_LITERAL; }
                        )

                |   // If there were no following digits or adornments or range follows
                    // then this was just Zero
                    //
                    {
                        $type = DECIMAL_LITERAL;
                        if  (! checkIntLiteralRange(getText(), getCharIndex(), 10, negative))
                        {
                            setText("0");
                        }
                    }
            )

    |   // Leading non zero digits can only be base 10, but might
        // be a floating point or a time,
        //
        ('1'..'9') Digits?

            // Numeric so far, resolve float and times
            //
            (

                { input.LA(2) != '.'}?=>

                      '.' Digits? Exponent?

                    (
                          ('m' 's'? | 's' | 'h')

                            { $type = TIME_LITERAL; }

                        |   // Just n.nnn
                                        //
                            { $type = FLOATING_POINT_LITERAL; }
                    )

                |   // Just a decimal literal
                    //
                    (
                          ('m' 's'? | 's' | 'h')

                            { $type = TIME_LITERAL; }

                        | Exponent

                            {
                                $type = FLOATING_POINT_LITERAL;
                            }

                        |   // Just n.nnn
                            //
                            {
                                $type = DECIMAL_LITERAL;
                                if (! checkIntLiteralRange(getText(), getCharIndex(), 10, negative))
                                {
                                    setText("0");
                                }
                            }
                    )
            )

    |
        '.'

            (     // Float, but is it a time?
                  //
                  Digits Exponent?

                    (
                         ('m' 's'? | 's' | 'h')

                            { $type = TIME_LITERAL; }

                        |   // Just  floating point
                            //
                            { $type = FLOATING_POINT_LITERAL; }

                    )

                |   // Is it a range specifer?
                    //
                    '.'
                    {
                        $type = DOTDOT; // Yes, it was ..
                    }

                |   // It was just a single .
                    //

                    { $type = DOT; }
            )
    ;


fragment
Digits
    :   ('0'..'9')+
    ;

fragment
Exponent
    :   ('e'|'E') ('+'|'-')?

            (
                  Digits
                |   {
                        log.error(getCharIndex()-1, MsgSym.MESSAGE_JAVAFX_EXPONENT_MALFORMED);
                        setText("0.0");
                    }
            )
    ;