// --------------------------------------------------------------- // Token // --------------------------------------------------------------- import java.util.ArrayList; import java.util.regex.*; public class Token { public enum Type { PLUS, MINUS, TIMES, DIVIDE, POWER, LEFT_PARENTHESIS, RIGHT_PARENTHESIS, NUMBER, IDENTIFIER } public Token(String text, Type type) { mText = text; mType = type; } public Token(char c, Type type) { this("" + c, type); } public String getText() { return(mText); } public Type getType() { return(mType); } public String toString() { return( String.format( "{ text: \"%s\", type: %s }", mText, mType ) ); } public static boolean isIdentifierStart(char c) { return(Character.isLetter(c) || c == '_'); } public static Token[] getTokens(String theString) throws Exception { ArrayList theTokens = new ArrayList(); int length = theString.length(); int index = 0; while(index < length) { // Bypass leading whitespace while(Character.isWhitespace(theString.charAt(index))) { if(++index == length) break; } // We found only whitespace if(index == length) break; // Handle single-character tokens first Token theToken = null; char c = theString.charAt(index); switch(c) { case '+': theToken = new Token(c, Type.PLUS); break; case '-': theToken = new Token(c, Type.MINUS); break; case '*': theToken = new Token(c, Type.TIMES); break; case '/': theToken = new Token(c, Type.DIVIDE); break; case '^': theToken = new Token(c, Type.POWER); break; case '(': theToken = new Token(c, Type.LEFT_PARENTHESIS); break; case ')': theToken = new Token(c, Type.RIGHT_PARENTHESIS); break; } if(theToken != null) { index++; } else { // Not a single character token, look for a number if(Character.isDigit(c) || c == '.') { String current = theString.substring(index); Matcher theMatcher = mPattern.matcher(current); if(!theMatcher.matches()) { throw new Exception( String.format( "text: \"%s\", offset: %d, Not a valid number", theString, index ) ); } String matchText = theMatcher.group(1); int matchLength = matchText.length(); // Anything after our match? if(index + matchLength < length) { char next = theString.charAt(index + matchLength); if(isIdentifierStart(next) || next == '.') { throw new Exception( String.format( "text: \"%s\", offset: %d, Invalid input after number", theString, index ) ); } } theToken = new Token(matchText, Type.NUMBER); index += matchLength; } else if(isIdentifierStart(c)) { String s = "" + c; while(++index < length) { c = theString.charAt(index); if(isIdentifierStart(c) || Character.isDigit(c)) s += c; else break; } theToken = new Token(s, Type.IDENTIFIER); } else { throw new Exception( String.format( "text: \"%s\", offset: %d, Unrecognized token", theString, index ) ); } } theTokens.add(theToken); } return( theTokens.toArray(new Token[theTokens.size()]) ); } private String mText; private Type mType; private static Pattern mPattern = Pattern.compile("^(\\d+\\.\\d+|\\d+\\.|\\d+|\\.\\d+).*"); public static void main(String[] args) { String s = ""; for(String arg : args) s += (s.equals("") ? "" : " ") + arg; try { Token[] list = getTokens(s); for(Token t : list) System.out.println(t); } catch(Exception e) { System.out.println(e); } } }