Tokenizer uses single process step and length-based exhaustion check

Issue: SPR-16032
8 years ago · 03b68286a2
3 changed files with 70 additions and 63 deletions
--- a/spring-expression/src/main/java/org/springframework/expression/spel/standard/InternalSpelExpressionParser.java
+++ b/spring-expression/src/main/java/org/springframework/expression/spel/standard/InternalSpelExpressionParser.java
@ -124,8 +124,7 @@ class InternalSpelExpressionParser extends TemplateAwareExpressionParser {
 		try {
 			this.expressionString = expressionString;
 			Tokenizer tokenizer = new Tokenizer(expressionString);
-			tokenizer.process();
+			this.tokenStream = tokenizer.process();
 			this.tokenStream = tokenizer.getTokens();
 			this.tokenStreamLength = this.tokenStream.size();
 			this.tokenStreamPointer = 0;
 			this.constructedNodes.clear();
--- a/spring-expression/src/main/java/org/springframework/expression/spel/standard/Tokenizer.java
+++ b/spring-expression/src/main/java/org/springframework/expression/spel/standard/Tokenizer.java
@ -28,14 +28,15 @@ import org.springframework.expression.spel.SpelParseException;
 * Lex some input data into a stream of tokens that can then be parsed.
 *
 * @author Andy Clement
 * @author Juergen Hoeller
 * @author Phillip Webb
 * @since 3.0
 */
 class Tokenizer {
-	// if this is changed, it must remain sorted
+	// If this gets changed, it must remain sorted...
-	private static final String[] ALTERNATIVE_OPERATOR_NAMES = { "DIV", "EQ", "GE", "GT",
+	private static final String[] ALTERNATIVE_OPERATOR_NAMES =
-		"LE", "LT", "MOD", "NE", "NOT" };
+			{"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};
 	private static final byte FLAGS[] = new byte[256];
@ -64,29 +65,28 @@ class Tokenizer {
 	}
-	String expressionString;
+	private String expressionString;
-	char[] toProcess;
+	private char[] charsToProcess;
-	int pos;
+	private int pos;
-	int max;
+	private int max;
-	List<Token> tokens = new ArrayList<>();
+	private List<Token> tokens = new ArrayList<>();
 	public Tokenizer(String inputData) {
 		this.expressionString = inputData;
-		this.toProcess = (inputData + "\0").toCharArray();
+		this.charsToProcess = (inputData + "\0").toCharArray();
-		this.max = this.toProcess.length;
+		this.max = this.charsToProcess.length;
 		this.pos = 0;
 		process();
 	}
-	public void process() {
+	public List<Token> process() {
 		while (this.pos < this.max) {
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (isAlphabetic(ch)) {
 				lexIdentifier();
 			}
@ -190,9 +190,7 @@ class Tokenizer {
 						break;
 					case '|':
 						if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
-							throw new InternalParseException(new SpelParseException(
+							raiseParseException(this.pos, SpelMessage.MISSING_CHARACTER, "|");
 									this.expressionString, this.pos, SpelMessage.MISSING_CHARACTER,
 									"|"));
 						}
 						pushPairToken(TokenKind.SYMBOLIC_OR);
 						break;
@ -264,38 +262,35 @@ class Tokenizer {
 						this.pos++;  // will take us to the end
 						break;
 					case '\\':
-						throw new InternalParseException(
+						raiseParseException(this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR);
-								new SpelParseException(this.expressionString, this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR));
+						break;
 					default:
 						throw new IllegalStateException("Cannot handle (" + Integer.valueOf(ch) + ") '" + ch + "'");
 				}
 			}
 		}
 	}
 	public List<Token> getTokens() {
 		return this.tokens;
 	}
 	// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
 	private void lexQuotedStringLiteral() {
 		int start = this.pos;
 		boolean terminated = false;
 		while (!terminated) {
 			this.pos++;
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (ch == '\'') {
 				// may not be the end if the char after is also a '
-				if (this.toProcess[this.pos + 1] == '\'') {
+				if (this.charsToProcess[this.pos + 1] == '\'') {
 					this.pos++;  // skip over that too, and continue
 				}
 				else {
 					terminated = true;
 				}
 			}
-			if (ch == 0) {
+			if (isExhausted()) {
-				throw new InternalParseException(new SpelParseException(this.expressionString, start,
+				raiseParseException(start, SpelMessage.NON_TERMINATING_QUOTED_STRING);
 						SpelMessage.NON_TERMINATING_QUOTED_STRING));
 			}
 		}
 		this.pos++;
@ -308,19 +303,18 @@ class Tokenizer {
 		boolean terminated = false;
 		while (!terminated) {
 			this.pos++;
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (ch == '"') {
 				// may not be the end if the char after is also a "
-				if (this.toProcess[this.pos + 1] == '"') {
+				if (this.charsToProcess[this.pos + 1] == '"') {
 					this.pos++;  // skip over that too, and continue
 				}
 				else {
 					terminated = true;
 				}
 			}
-			if (ch == 0) {
+			if (isExhausted()) {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
+				raiseParseException(start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING);
 						start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
 			}
 		}
 		this.pos++;
@ -346,7 +340,7 @@ class Tokenizer {
 	private void lexNumericLiteral(boolean firstCharIsZero) {
 		boolean isReal = false;
 		int start = this.pos;
-		char ch = this.toProcess[this.pos + 1];
+		char ch = this.charsToProcess[this.pos + 1];
 		boolean isHex = ch == 'x' || ch == 'X';
 		// deal with hexadecimal
@ -355,7 +349,7 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isHexadecimalDigit(this.toProcess[this.pos]));
+			while (isHexadecimalDigit(this.charsToProcess[this.pos]));
 			if (isChar('L', 'l')) {
 				pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
 				this.pos++;
@ -372,10 +366,10 @@ class Tokenizer {
 		do {
 			this.pos++;
 		}
-		while (isDigit(this.toProcess[this.pos]));
+		while (isDigit(this.charsToProcess[this.pos]));
 		// a '.' indicates this number is a real
-		ch = this.toProcess[this.pos];
+		ch = this.charsToProcess[this.pos];
 		if (ch == '.') {
 			isReal = true;
 			int dotpos = this.pos;
@ -383,7 +377,7 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isDigit(this.toProcess[this.pos]));
+			while (isDigit(this.charsToProcess[this.pos]));
 			if (this.pos == dotpos + 1) {
 				// the number is something like '3.'. It is really an int but may be
 				// part of something like '3.toString()'. In this case process it as
@ -398,19 +392,18 @@ class Tokenizer {
 		// Now there may or may not be an exponent
-		// is it a long ?
+		// Is it a long ?
 		if (isChar('L', 'l')) {
 			if (isReal) {  // 3.4L - not allowed
-				throw new InternalParseException(new SpelParseException(this.expressionString,
+				raiseParseException(start, SpelMessage.REAL_CANNOT_BE_LONG);
 						start, SpelMessage.REAL_CANNOT_BE_LONG));
 			}
 			pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
 			this.pos++;
 		}
-		else if (isExponentChar(this.toProcess[this.pos])) {
+		else if (isExponentChar(this.charsToProcess[this.pos])) {
 			isReal = true;  // if it wasn't before, it is now
 			this.pos++;
-			char possibleSign = this.toProcess[this.pos];
+			char possibleSign = this.charsToProcess[this.pos];
 			if (isSign(possibleSign)) {
 				this.pos++;
 			}
@ -419,19 +412,19 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isDigit(this.toProcess[this.pos]));
+			while (isDigit(this.charsToProcess[this.pos]));
 			boolean isFloat = false;
-			if (isFloatSuffix(this.toProcess[this.pos])) {
+			if (isFloatSuffix(this.charsToProcess[this.pos])) {
 				isFloat = true;
 				endOfNumber = ++this.pos;
 			}
-			else if (isDoubleSuffix(this.toProcess[this.pos])) {
+			else if (isDoubleSuffix(this.charsToProcess[this.pos])) {
 				endOfNumber = ++this.pos;
 			}
 			pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
 		}
 		else {
-			ch = this.toProcess[this.pos];
+			ch = this.charsToProcess[this.pos];
 			boolean isFloat = false;
 			if (isFloatSuffix(ch)) {
 				isReal = true;
@ -456,7 +449,7 @@ class Tokenizer {
 		do {
 			this.pos++;
 		}
-		while (isIdentifier(this.toProcess[this.pos]));
+		while (isIdentifier(this.charsToProcess[this.pos]));
 		char[] subarray = subarray(start, this.pos);
 		// Check if this is the alternative (textual) representation of an operator (see
@ -484,14 +477,10 @@ class Tokenizer {
 	private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
 		if (data.length == 0) {
 			if (isLong) {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
+				raiseParseException(start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start, end + 1));
 						start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start,
 								end + 1)));
 			}
 			else {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
+				raiseParseException(start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(start, end));
 						start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(
 								start, end)));
 			}
 		}
 		if (isLong) {
@ -513,7 +502,7 @@ class Tokenizer {
 	private char[] subarray(int start, int end) {
 		char[] result = new char[end - start];
-		System.arraycopy(this.toProcess, start, result, 0, end - start);
+		System.arraycopy(this.charsToProcess, start, result, 0, end - start);
 		return result;
 	}
@ -522,8 +511,8 @@ class Tokenizer {
 	 */
 	private boolean isTwoCharToken(TokenKind kind) {
 		return (kind.tokenChars.length == 2 &&
-				this.toProcess[this.pos] == kind.tokenChars[0] &&
+				this.charsToProcess[this.pos] == kind.tokenChars[0] &&
-				this.toProcess[this.pos + 1] == kind.tokenChars[1]);
+				this.charsToProcess[this.pos + 1] == kind.tokenChars[1]);
 	}
 	/**
@ -552,7 +541,7 @@ class Tokenizer {
 	}
 	private boolean isChar(char a, char b) {
-		char ch = this.toProcess[this.pos];
+		char ch = this.charsToProcess[this.pos];
 		return ch == a || ch == b;
 	}
@ -593,4 +582,12 @@ class Tokenizer {
 		return (FLAGS[ch] & IS_HEXDIGIT) != 0;
 	}
 	private boolean isExhausted() {
 		return (this.pos == this.max - 1);
 	}
 	private void raiseParseException(int start, SpelMessage msg, Object... inserts) {
 		throw new InternalParseException(new SpelParseException(this.expressionString, start, msg, inserts));
 	}
 }
--- a/spring-expression/src/test/java/org/springframework/expression/spel/SpelReproTests.java
+++ b/spring-expression/src/test/java/org/springframework/expression/spel/SpelReproTests.java
@ -59,6 +59,7 @@ import org.springframework.expression.spel.support.ReflectivePropertyAccessor;
 import org.springframework.expression.spel.support.StandardEvaluationContext;
 import org.springframework.expression.spel.support.StandardTypeLocator;
 import org.springframework.expression.spel.testresources.le.div.mod.reserved.Reserver;
 import org.springframework.util.ObjectUtils;
 import static org.hamcrest.Matchers.*;
 import static org.junit.Assert.*;
@ -2095,6 +2096,16 @@ public class SpelReproTests extends AbstractExpressionTests {
 		assertEquals(StandardCharsets.UTF_8, result);
 	}
 	@Test
 	public void SPR16032() {
 		EvaluationContext context = new StandardEvaluationContext();
 		context.setVariable("str", "a\0b");
 		Expression ex = parser.parseExpression("#str?.split('\0')");
 		Object result = ex.getValue(context);
 		assertTrue(ObjectUtils.nullSafeEquals(result, new String[] {"a", "b"}));
 	}
 	public static class ListOf {