Browse Source

Various UrlParser improvements

- Consistent use of codePointAt instead of charAt.
- Fix bug in domainToAscii

See gh-32513
pull/32765/head
Arjen Poutsma 2 years ago
parent
commit
8bca7cd8e7
  1. 176
      spring-web/src/main/java/org/springframework/web/util/UrlParser.java

176
spring-web/src/main/java/org/springframework/web/util/UrlParser.java

@ -179,7 +179,7 @@ final class UrlParser { @@ -179,7 +179,7 @@ final class UrlParser {
void sanitizeInput(boolean removeC0ControlOrSpace) {
boolean strip = true;
for (int i = 0; i < this.input.length(); i++) {
char c = this.input.charAt(i);
int c = this.input.codePointAt(i);
boolean isSpaceOrC0 = c == ' ' || isC0Control(c);
boolean isTabOrNL = c == '\t' || isNewline(c);
if ((strip && isSpaceOrC0) || isTabOrNL) {
@ -204,7 +204,7 @@ final class UrlParser { @@ -204,7 +204,7 @@ final class UrlParser {
}
if (removeC0ControlOrSpace) {
for (int i = this.input.length() - 1; i >= 0; i--) {
char c = this.input.charAt(i);
int c = this.input.codePointAt(i);
if (c == ' ' || isC0Control(c)) {
if (validate()) {
// If input contains any (leading or) trailing C0 control or space, invalid-URL-unit validation error.
@ -224,7 +224,7 @@ final class UrlParser { @@ -224,7 +224,7 @@ final class UrlParser {
if (logger.isTraceEnabled()) {
String c;
if (this.pointer < this.input.length()) {
c = Character.toString(this.input.charAt(this.pointer));
c = Character.toString(this.input.codePointAt(this.pointer));
}
else {
c = "EOF";
@ -265,16 +265,16 @@ final class UrlParser { @@ -265,16 +265,16 @@ final class UrlParser {
private static String domainToAscii(String domain, boolean beStrict) {
// If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) does not produce any item that starts with an ASCII case-insensitive match for "xn--", this step is equivalent to ASCII lowercasing domain.
boolean onlyLowerCase = !beStrict;
if (!beStrict && containsOnlyAscii(domain)) {
int dotIdx = domain.indexOf('.');
boolean onlyLowerCase = true;
while (dotIdx != -1) {
if (domain.length() - dotIdx > 4) {
// ASCII case-insensitive match for "xn--"
char ch0 = domain.charAt(dotIdx + 1);
char ch1 = domain.charAt(dotIdx + 2);
char ch2 = domain.charAt(dotIdx + 3);
char ch3 = domain.charAt(dotIdx + 4);
int ch0 = domain.codePointAt(dotIdx + 1);
int ch1 = domain.codePointAt(dotIdx + 2);
int ch2 = domain.codePointAt(dotIdx + 3);
int ch3 = domain.codePointAt(dotIdx + 4);
if ((ch0 == 'x' || ch0 == 'X') &&
(ch1 == 'n' || ch1 == 'N') &&
ch2 == '-' && ch3 == '_') {
@ -284,9 +284,9 @@ final class UrlParser { @@ -284,9 +284,9 @@ final class UrlParser {
}
dotIdx = domain.indexOf('.', dotIdx + 1);
}
}
if (onlyLowerCase) {
return domain.toLowerCase(Locale.ENGLISH);
if (onlyLowerCase) {
return domain.toLowerCase(Locale.ENGLISH);
}
}
// Let result be the result of running Unicode ToASCII (https://www.unicode.org/reports/tr46/#ToASCII) with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
int flag = 0;
@ -392,7 +392,7 @@ final class UrlParser { @@ -392,7 +392,7 @@ final class UrlParser {
private static boolean containsOnlyAsciiDigits(CharSequence string) {
for (int i=0; i< string.length(); i++ ) {
char ch = string.charAt(i);
int ch = codePointAt(string, i);
if (!isAsciiDigit(ch)) {
return false;
}
@ -400,9 +400,9 @@ final class UrlParser { @@ -400,9 +400,9 @@ final class UrlParser {
return true;
}
private static boolean containsOnlyAscii(CharSequence string) {
for (int i=0; i< string.length(); i++ ) {
char ch = string.charAt(i);
private static boolean containsOnlyAscii(String string) {
for (int i = 0; i < string.length(); i++) {
int ch = string.codePointAt(i);
if (!isAsciiCodePoint(ch)) {
return false;
}
@ -505,7 +505,7 @@ final class UrlParser { @@ -505,7 +505,7 @@ final class UrlParser {
private int remaining(int deltaPos) {
int pos = this.pointer + deltaPos + 1;
if (pos < this.input.length()) {
return this.input.charAt(pos);
return this.input.codePointAt(pos);
}
else {
return EOF;
@ -571,27 +571,27 @@ final class UrlParser { @@ -571,27 +571,27 @@ final class UrlParser {
int len = b.length();
switch (len) {
case 1 -> {
char ch0 = b.charAt(0);
int ch0 = b.codePointAt(0);
return ch0 == '.';
}
case 2 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
return ch0 == '/' && ch1 == '.';
}
case 3 -> {
// ASCII case-insensitive match for "%2e".
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E');
}
case 4 -> {
// ASCII case-insensitive match for "/%2e".
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
char ch3 = b.charAt(3);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
int ch3 = b.codePointAt(3);
return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E');
}
default -> {
@ -607,55 +607,55 @@ final class UrlParser { @@ -607,55 +607,55 @@ final class UrlParser {
int len = b.length();
switch (len) {
case 2 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
return ch0 == '.' && ch1 == '.';
}
case 3 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
return ch0 == '/' && ch1 == '.' && ch2 == '.';
}
case 4 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
char ch3 = b.charAt(3);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
int ch3 = b.codePointAt(3);
// case-insensitive match for ".%2e" or "%2e."
return (ch0 == '.' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') ||
(ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') && ch3 == '.'));
}
case 5 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
char ch3 = b.charAt(3);
char ch4 = b.charAt(4);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
int ch3 = b.codePointAt(3);
int ch4 = b.codePointAt(4);
// case-insensitive match for "/.%2e" or "/%2e."
return ch0 == '/' &&
(ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E')
|| (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.'));
}
case 6 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
char ch3 = b.charAt(3);
char ch4 = b.charAt(4);
char ch5 = b.charAt(5);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
int ch3 = b.codePointAt(3);
int ch4 = b.codePointAt(4);
int ch5 = b.codePointAt(5);
// case-insensitive match for "%2e%2e".
return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E')
&& ch3 == '%' && ch4 == '2' && (ch5 == 'e' || ch5 == 'E');
}
case 7 -> {
char ch0 = b.charAt(0);
char ch1 = b.charAt(1);
char ch2 = b.charAt(2);
char ch3 = b.charAt(3);
char ch4 = b.charAt(4);
char ch5 = b.charAt(5);
char ch6 = b.charAt(6);
int ch0 = b.codePointAt(0);
int ch1 = b.codePointAt(1);
int ch2 = b.codePointAt(2);
int ch3 = b.codePointAt(3);
int ch4 = b.codePointAt(4);
int ch5 = b.codePointAt(5);
int ch6 = b.codePointAt(6);
// case-insensitive match for "/%2e%2e".
return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E')
&& ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E');
@ -686,7 +686,7 @@ final class UrlParser { @@ -686,7 +686,7 @@ final class UrlParser {
* its first two code points are a Windows drive letter
* its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
*/
private static boolean startsWithWindowsDriveLetter(CharSequence input) {
private static boolean startsWithWindowsDriveLetter(String input) {
int len = input.length();
if (len < 2) {
return false;
@ -698,18 +698,18 @@ final class UrlParser { @@ -698,18 +698,18 @@ final class UrlParser {
return true;
}
else {
char ch2 = input.charAt(2);
int ch2 = input.codePointAt(2);
return ch2 == '/' || ch2 == '\\' || ch2 == '?' || ch2 == '#';
}
}
private static boolean isWindowsDriveLetterInternal(CharSequence s, boolean normalized) {
char ch0 = s.charAt(0);
int ch0 = codePointAt(s, 0);
if (!isAsciiAlpha(ch0)) {
return false;
}
else {
char ch1 = s.charAt(1);
int ch1 = codePointAt(s, 1);
if (normalized) {
return ch1 == ':';
}
@ -719,6 +719,18 @@ final class UrlParser { @@ -719,6 +719,18 @@ final class UrlParser {
}
}
private static int codePointAt(CharSequence s, int index) {
if (s instanceof String string) {
return string.codePointAt(index);
}
else if (s instanceof StringBuilder builder) {
return builder.codePointAt(index);
}
else {
throw new IllegalStateException();
}
}
private enum State {
@ -1500,8 +1512,8 @@ final class UrlParser { @@ -1500,8 +1512,8 @@ final class UrlParser {
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
else if (c == '%' &&
(p.pointer >= p.input.length() - 2 ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 2)))) {
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) {
p.validationError("Invalid URL Unit: \"" + (char) c + "\"");
}
}
@ -1549,8 +1561,8 @@ final class UrlParser { @@ -1549,8 +1561,8 @@ final class UrlParser {
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
else if (c == '%' &&
(p.pointer >= p.input.length() - 2 ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 2)))) {
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) {
p.validationError("Invalid URL Unit: \"" + (char) c + "\"");
}
}
@ -1612,8 +1624,8 @@ final class UrlParser { @@ -1612,8 +1624,8 @@ final class UrlParser {
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
else if (c == '%' &&
(p.pointer >= p.input.length() - 2 ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 2)))) {
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) {
p.validationError("Invalid URL Unit: \"" + (char) c + "\"");
}
}
@ -1635,8 +1647,8 @@ final class UrlParser { @@ -1635,8 +1647,8 @@ final class UrlParser {
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
else if (c == '%' &&
(p.pointer >= p.input.length() - 2 ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.charAt(p.pointer + 2)))) {
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) ||
!isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) {
p.validationError("Invalid URL Unit: \"" + (char) c + "\"");
}
}
@ -2079,10 +2091,10 @@ final class UrlParser { @@ -2079,10 +2091,10 @@ final class UrlParser {
*/
static Host parse(String input, boolean isOpaque, UrlParser p) {
// If input starts with U+005B ([), then:
if (!input.isEmpty() && input.charAt(0) == '[') {
if (!input.isEmpty() && input.codePointAt(0) == '[') {
int last = input.length() - 1;
// If input does not end with U+005D (]), IPv6-unclosed validation error, return failure.
if (input.charAt(last) != ']') {
if (input.codePointAt(last) != ']') {
throw new InvalidUrlException("IPv6 address is missing the closing \"]\").");
}
// Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed.
@ -2102,7 +2114,7 @@ final class UrlParser { @@ -2102,7 +2114,7 @@ final class UrlParser {
String asciiDomain = domainToAscii(domain, false);
for (int i=0; i < asciiDomain.length(); i++) {
char ch = asciiDomain.charAt(i);
int ch = asciiDomain.codePointAt(i);
// If asciiDomain contains a forbidden domain code point, domain-invalid-code-point validation error, return failure.
if (isForbiddenDomain(ch)) {
throw new InvalidUrlException("Invalid character \"" + ch + "\" in domain \"" + input + "\"");
@ -2245,7 +2257,7 @@ final class UrlParser { @@ -2245,7 +2257,7 @@ final class UrlParser {
*/
public static OpaqueHost parse(String input, UrlParser p) {
for (int i = 0; i < input.length(); i++) {
char ch = input.charAt(i);
int ch = input.codePointAt(i);
// If input contains a forbidden host code point, host-invalid-code-point validation error, return failure.
if (isForbiddenHost(ch)) {
throw new InvalidUrlException("An opaque host contains a forbidden host code point.");
@ -2255,7 +2267,7 @@ final class UrlParser { @@ -2255,7 +2267,7 @@ final class UrlParser {
p.validationError("Code point \"" + ch + "\" is not a URL unit.");
}
//If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, invalid-URL-unit validation error.
if (p.validate() && ch == '%' && (input.length() - i < 2 || !isAsciiDigit(input.charAt(i + 1)) || !isAsciiDigit(input.charAt(i + 2)))) {
if (p.validate() && ch == '%' && (input.length() - i < 2 || !isAsciiDigit(input.codePointAt(i + 1)) || !isAsciiDigit(input.codePointAt(i + 2)))) {
p.validationError("Code point \"" + ch + "\" is not a URL unit.");
}
}
@ -2442,8 +2454,8 @@ final class UrlParser { @@ -2442,8 +2454,8 @@ final class UrlParser {
int len = input.length();
// If input contains at least two code points and the first two code points are either "0X" or "0x", then:
if (len >= 2) {
char ch0 = input.charAt(0);
char ch1 = input.charAt(1);
int ch0 = input.codePointAt(0);
int ch1 = input.codePointAt(1);
if (ch0 == '0' && (ch1 == 'X' || ch1 == 'x')) {
// Set validationError to true.
validationError = true;
@ -2535,11 +2547,11 @@ final class UrlParser { @@ -2535,11 +2547,11 @@ final class UrlParser {
// Let pointer be a pointer for input.
int pointer = 0;
int inputLength = input.length();
int c = (inputLength > 0) ? input.charAt(0) : EOF;
int c = (inputLength > 0) ? input.codePointAt(0) : EOF;
// If c is U+003A (:), then:
if (c == ':') {
// If remaining does not start with U+003A (:), IPv6-invalid-compression validation error, return failure.
if (inputLength > 1 && input.charAt(1) != ':') {
if (inputLength > 1 && input.codePointAt(1) != ':') {
throw new InvalidUrlException("IPv6 address begins with improper compression.");
}
// Increase pointer by 2.
@ -2548,7 +2560,7 @@ final class UrlParser { @@ -2548,7 +2560,7 @@ final class UrlParser {
pieceIndex++;
compress = pieceIndex;
}
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
// While c is not the EOF code point:
while (c != EOF) {
// If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure.
@ -2565,7 +2577,7 @@ final class UrlParser { @@ -2565,7 +2577,7 @@ final class UrlParser {
pointer++;
pieceIndex++;
compress = pieceIndex;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
continue;
}
// Let value and length be 0.
@ -2577,7 +2589,7 @@ final class UrlParser { @@ -2577,7 +2589,7 @@ final class UrlParser {
value = (value * 0x10) + cHex;
pointer++;
length++;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
}
// If c is U+002E (.), then:
if (c == '.') {
@ -2593,7 +2605,7 @@ final class UrlParser { @@ -2593,7 +2605,7 @@ final class UrlParser {
}
// Let numbersSeen be 0.
int numbersSeen = 0;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
// While c is not the EOF code point:
while (c != EOF) {
// Let ipv4Piece be null.
@ -2603,7 +2615,7 @@ final class UrlParser { @@ -2603,7 +2615,7 @@ final class UrlParser {
// If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1.
if (c =='.' && numbersSeen < 4) {
pointer++;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
}
// Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure.
else {
@ -2637,7 +2649,7 @@ final class UrlParser { @@ -2637,7 +2649,7 @@ final class UrlParser {
}
// Increase pointer by 1.
pointer++;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
}
// Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece.
address[pieceIndex] = address[pieceIndex] * 0x100 + (ipv4Piece != null ? ipv4Piece : 0);
@ -2647,7 +2659,7 @@ final class UrlParser { @@ -2647,7 +2659,7 @@ final class UrlParser {
if (numbersSeen == 2 || numbersSeen == 4) {
pieceIndex++;
}
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
}
// If numbersSeen is not 4, IPv4-in-IPv6-too-few-parts validation error, return failure.
if (numbersSeen != 4) {
@ -2660,7 +2672,7 @@ final class UrlParser { @@ -2660,7 +2672,7 @@ final class UrlParser {
else if (c == ':') {
// Increase pointer by 1.
pointer++;
c = (pointer < inputLength) ? input.charAt(pointer) : EOF;
c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF;
// If c is the EOF code point, IPv6-invalid-code-point validation error, return failure.
if (c == EOF) {
throw new InvalidUrlException("IPv6 address unexpectedly ends.");

Loading…
Cancel
Save