Various UrlParser improvements

See gh-32513
2 years ago · 3cfa4ed4f7
2 changed files with 515 additions and 107 deletions
--- a/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java
+++ b/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java
@ -657,12 +657,6 @@ final class HierarchicalUriComponents extends UriComponents {
				@@ -657,12 +657,6 @@ final class HierarchicalUriComponents extends UriComponents {
 			public boolean isAllowed(int c) {
 				return isUnreserved(c);
 			}
-		},
-		C0 {
-			@Override
-			public boolean isAllowed(int c) {
-				return !(c >= 0 && c <= 0x1f) && !(c > '~');
-			}
 		};

 		/**
--- a/spring-web/src/main/java/org/springframework/web/util/UrlParser.java
+++ b/spring-web/src/main/java/org/springframework/web/util/UrlParser.java
@ -23,9 +23,10 @@ import java.util.ArrayList;
				@@ -23,9 +23,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
-import java.util.StringTokenizer;
 import java.util.function.Consumer;
+import java.util.function.IntPredicate;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -234,23 +235,70 @@ final class UrlParser {
				@@ -234,23 +235,70 @@ final class UrlParser {
 		this.state = newState;
 	}

-	private static List<String> tokenize(String str, String delimiters) {
-		StringTokenizer st = new StringTokenizer(str, delimiters);
+	private static List<String> strictSplit(String input, int delimiter) {
+		// Let position be a position variable for input, initially pointing at the start of input.
+		int position = 0;
+		// Let tokens be a list of strings, initially empty.
 		List<String> tokens = new ArrayList<>();
-		while (st.hasMoreTokens()) {
-			tokens.add(st.nextToken());
+		// Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position.
+		int delIdx = input.indexOf(delimiter, position);
+		String token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position);
+		position = delIdx;
+		// Append token to tokens.
+		tokens.add(token);
+		// While position is not past the end of input:
+		while (position != EOF) {
+			// Assert: the code point at position within input is delimiter.
+			Assert.state(input.codePointAt(position) == delimiter, "Codepoint is not a delimiter");
+			// Advance position by 1.
+			position++;
+			delIdx = input.indexOf(delimiter, position);
+			// Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position.
+			token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position);
+			position = delIdx;
+			// Append token to tokens.
+			tokens.add(token);
 		}
 		return tokens;
 	}

 	private static String domainToAscii(String domain, boolean beStrict) {
+		// If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) does not produce any item that starts with an ASCII case-insensitive match for "xn--", this step is equivalent to ASCII lowercasing domain.
+		boolean onlyLowerCase = !beStrict;
+		if (!beStrict && containsOnlyAscii(domain)) {
+			int dotIdx = domain.indexOf('.');
+			while (dotIdx != -1) {
+				if (domain.length() - dotIdx > 4) {
+					// ASCII case-insensitive match for "xn--"
+					char ch0 = domain.charAt(dotIdx + 1);
+					char ch1 = domain.charAt(dotIdx + 2);
+					char ch2 = domain.charAt(dotIdx + 3);
+					char ch3 = domain.charAt(dotIdx + 4);
+					if ((ch0 == 'x' || ch0 == 'X') &&
+							(ch1 == 'n' || ch1 == 'N') &&
+							ch2 == '-' && ch3 == '_') {
+						onlyLowerCase = false;
+						break;
+					}
+				}
+				dotIdx = domain.indexOf('.', dotIdx + 1);
+			}
+		}
+		if (onlyLowerCase) {
+			return domain.toLowerCase(Locale.ENGLISH);
+		}
 		// Let result be the result of running Unicode ToASCII (https://www.unicode.org/reports/tr46/#ToASCII) with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
 		int flag = 0;
 		if (beStrict) {
 			flag |= IDN.USE_STD3_ASCII_RULES;
 		}
 		// Implementation note: implementing Unicode ToASCII is beyond the scope of this parser, we use java.net.IDN.toASCII
-		return IDN.toASCII(domain, flag);
+		try {
+			return IDN.toASCII(domain, flag);
+		}
+		catch (IllegalArgumentException ex) {
+			throw new InvalidUrlException("Could not convert \"" + domain + "\" to ASCII: " + ex.getMessage(), ex);
+		}
 	}

 	private boolean validate() {
@ -284,6 +332,50 @@ final class UrlParser {
				@@ -284,6 +332,50 @@ final class UrlParser {
 		throw new InvalidUrlException(message.toString());
 	}

+	/**
+	 * The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~).
+	 */
+	private static boolean c0ControlPercentEncodeSet(int ch) {
+		return isC0Control(ch) || Integer.compareUnsigned(ch, '~') > 0;
+	}
+
+	/**
+	 * The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
+	 */
+	private static boolean fragmentPercentEncodeSet(int ch) {
+		return c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '<' || ch == '>' || ch == '`';
+	}
+
+	/**
+	 * The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
+	 */
+	private static boolean queryPercentEncodeSet(int ch) {
+		return c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '#' || ch == '<' || ch == '>';
+	}
+
+	/**
+	 * The special-query percent-encode set is the query percent-encode set and U+0027 (').
+	 */
+	private static boolean specialQueryPercentEncodeSet(int ch) {
+		return queryPercentEncodeSet(ch) || ch == '\'';
+	}
+
+
+	/**
+	 * The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
+	 */
+	private static boolean pathPercentEncodeSet(int ch) {
+		return queryPercentEncodeSet(ch) || ch == '?' || ch == '`' || ch == '{' || ch == '}';
+	}
+
+	/**
+	 * The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|).
+	 */
+	private static boolean userinfoPercentEncodeSet(int ch) {
+		return pathPercentEncodeSet(ch) || ch == '/' || ch == ':' || ch == ';' || ch == '=' || ch == '@' ||
+				(Integer.compareUnsigned(ch, '[') >= 0 && Integer.compareUnsigned(ch, '^') <= 0) || ch == '|';
+	}
+
 	private static boolean isC0Control(int ch) {
 		return ch >= 0 && ch <= 0x1F;
 	}
@ -307,6 +399,21 @@ final class UrlParser {
				@@ -307,6 +399,21 @@ final class UrlParser {
 		return true;
 	}

+	private static boolean containsOnlyAscii(CharSequence string) {
+		for (int i=0; i< string.length(); i++ ) {
+			char ch = string.charAt(i);
+			if (!isAsciiCodePoint(ch)) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	private static boolean isAsciiCodePoint(int ch) {
+		// An ASCII code point is a code point in the range U+0000 NULL to U+007F DELETE, inclusive.
+		return Integer.compareUnsigned(ch, 0) >= 0 && Integer.compareUnsigned(ch, 127) <= 0;
+	}
+
 	private static boolean isAsciiDigit(int ch) {
 		return (ch >= '0' && ch <= '9');
 	}
@ -400,76 +507,151 @@ final class UrlParser {
				@@ -400,76 +507,151 @@ final class UrlParser {
 		}
 	}

-	private String percentEncode(int c, HierarchicalUriComponents.Type type) {
-		return percentEncode(Character.toString(c), type);
+	private static String percentDecode(String input) {
+		try {
+			return UriUtils.decode(input, StandardCharsets.UTF_8);
+		}
+		catch (IllegalArgumentException ex) {
+			throw new InvalidUrlException("Could not decode \"" + input + "\": " + ex.getMessage(), ex);
+		}
+	}
+
+	private String percentEncode(int c, IntPredicate percentEncodeSet) {
+		return percentEncode(Character.toString(c), percentEncodeSet);
 	}

-	private String percentEncode(String source, HierarchicalUriComponents.Type type) {
-		if (this.encoding != null) {
-			return HierarchicalUriComponents.encodeUriComponent(source, this.encoding, type);
+	private String percentEncode(String input, IntPredicate percentEncodeSet) {
+		if (this.encoding == null) {
+			return input;
 		}
 		else {
-			return source;
+			byte[] bytes = input.getBytes(this.encoding);
+			boolean original = true;
+			for (byte b : bytes) {
+				if (percentEncodeSet.test(b)) {
+					original = false;
+					break;
+				}
+			}
+			if (original) {
+				return input;
+			}
+			StringBuilder output = new StringBuilder();
+			for (byte b : bytes) {
+				if (!percentEncodeSet.test(b)) {
+					output.append((char)b);
+				}
+				else {
+					output.append('%');
+					char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
+					char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
+					output.append(hex1);
+					output.append(hex2);
+				}
+			}
+			return output.toString();
 		}
 	}

 	/**
-	 * A single-dot URL path segment is a URL path segment that is "." or an ASCII case-insensitive match for "%2e".
+	 * A single-dot URL path segment is a URL path segment that is "[/]." or an ASCII case-insensitive match for "[/]%2e".
 	 */
 	private static boolean isSingleDotPathSegment(StringBuilder b) {
 		int len = b.length();
-		if (len == 1) {
-			char ch0 = b.charAt(0);
-			return ch0 == '.';
-		}
-		else if (len == 3) {
-			//  ASCII case-insensitive match for "%2e".
-			char ch0 = b.charAt(0);
-			char ch1 = b.charAt(1);
-			char ch2 = b.charAt(2);
-			return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E');
-		}
-		else {
-			return false;
+		switch (len) {
+			case 1 -> {
+				char ch0 = b.charAt(0);
+				return ch0 == '.';
+			}
+			case 2 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				return ch0 == '/' && ch1 == '.';
+			}
+			case 3 -> {
+				//  ASCII case-insensitive match for "%2e".
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E');
+			}
+			case 4 -> {
+				//  ASCII case-insensitive match for "/%2e".
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				char ch3 = b.charAt(3);
+				return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E');
+			}
+			default -> {
+				return false;
+			}
 		}
 	}

 	/**
-	 * A double-dot URL path segment is a URL path segment that is "/.." or an ASCII case-insensitive match for "/.%2e", "/%2e.", or "/%2e%2e".
+	 * A double-dot URL path segment is a URL path segment that is "[/].." or an ASCII case-insensitive match for "/.%2e", "/%2e.", or "/%2e%2e".
 	 */
 	private static boolean isDoubleDotPathSegment(StringBuilder b) {
 		int len = b.length();
-		if (len == 3) {
-			char ch0 = b.charAt(0);
-			char ch1 = b.charAt(1);
-			char ch2 = b.charAt(2);
-			return ch0 == '/' && ch1 == '.' && ch2 == '.';
-		}
-		else if (len == 5) {
-			char ch0 = b.charAt(0);
-			char ch1 = b.charAt(1);
-			char ch2 = b.charAt(2);
-			char ch3 = b.charAt(3);
-			char ch4 = b.charAt(4);
-			// case-insensitive match for "/.%2e" or "/%2e."
-			return ch0 == '/' &&
-					(ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E')
-							|| (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.'));
-		}
-		else if (len == 7) {
-			char ch0 = b.charAt(0);
-			char ch1 = b.charAt(1);
-			char ch2 = b.charAt(2);
-			char ch3 = b.charAt(3);
-			char ch4 = b.charAt(4);
-			char ch5 = b.charAt(5);
-			char ch6 = b.charAt(6);
-			// case-insensitive match for "/%2e%2e".
-			return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E')
-					&& ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E');
-		}
-		else {
-			return false;
+		switch (len) {
+			case 2 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				return ch0 == '.' && ch1 == '.';
+			}
+			case 3 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				return ch0 == '/' && ch1 == '.' && ch2 == '.';
+			}
+			case 4 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				char ch3 = b.charAt(3);
+				// case-insensitive match for ".%2e" or "%2e."
+				return (ch0 == '.' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') ||
+						(ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') && ch3 == '.'));
+			}
+			case 5 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				char ch3 = b.charAt(3);
+				char ch4 = b.charAt(4);
+				// case-insensitive match for "/.%2e" or "/%2e."
+				return ch0 == '/' &&
+						(ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E')
+								|| (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.'));
+			}
+			case 6 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				char ch3 = b.charAt(3);
+				char ch4 = b.charAt(4);
+				char ch5 = b.charAt(5);
+				// case-insensitive match for "%2e%2e".
+				return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E')
+						&& ch3 == '%' && ch4 == '2' && (ch5 == 'e' || ch5 == 'E');
+			}
+			case 7 -> {
+				char ch0 = b.charAt(0);
+				char ch1 = b.charAt(1);
+				char ch2 = b.charAt(2);
+				char ch3 = b.charAt(3);
+				char ch4 = b.charAt(4);
+				char ch5 = b.charAt(5);
+				char ch6 = b.charAt(6);
+				// case-insensitive match for "/%2e%2e".
+				return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E')
+						&& ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E');
+			}
+			default -> {
+				return false;
+			}
 		}
 	}

@ -656,8 +838,8 @@ final class UrlParser {
				@@ -656,8 +838,8 @@ final class UrlParser {
 				// If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL
 				// validation error, return failure.
 				if (p.base == null || p.base.path().isOpaque() && c != '#') {
-					p.failure("The input is missing a scheme, because it does not begin with an ASCII alpha \"" + Character.toString(c) +
-							"\", and no base URL was provided.");
+					p.failure("The input is missing a scheme, because it does not begin with an ASCII alpha \"" +
+							(c != EOF ? Character.toString(c) : "") + "\", and no base URL was provided.");
 				}
 				// Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s
 				// path to base’s path, url’s query to base’s query, url’s fragment to the empty string, and set state to fragment state.
@ -738,8 +920,8 @@ final class UrlParser {
				@@ -738,8 +920,8 @@ final class UrlParser {
 				else {
 					// Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host,
 					// url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query.
-					url.username = p.base.username();
-					url.password = p.base.password();
+					url.username.replace(0, url.username.length(), p.base.username());
+					url.password.replace(0, url.password.length(), p.base.password());
 					url.host = p.base.host();
 					url.port = p.base.port();
 					url.path = p.base.path().clone();
@ -789,8 +971,8 @@ final class UrlParser {
				@@ -789,8 +971,8 @@ final class UrlParser {
 				// to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1.
 				else {
 					Assert.state(p.base != null, "No base URL available");
-					url.username = p.base.username();
-					url.password = p.base.password();
+					url.username.replace(0, url.username.length(), p.base.username());
+					url.password.replace(0, url.password.length(), p.base.password());
 					url.host = p.base.host();
 					url.port = p.base.port();
 					p.setState(PATH);
@ -850,9 +1032,6 @@ final class UrlParser {
				@@ -850,9 +1032,6 @@ final class UrlParser {
 					p.atSignSeen = true;

 					int bufferLen = p.buffer.length();
-					StringBuilder username = new StringBuilder(bufferLen);
-					StringBuilder password = new StringBuilder(bufferLen);
-
 					// For each codePoint in buffer:
 					for (int i = 0; i < bufferLen; i++) {
 						int codePoint = p.buffer.codePointAt(i);
@ -862,18 +1041,16 @@ final class UrlParser {
				@@ -862,18 +1041,16 @@ final class UrlParser {
 							continue;
 						}
 						// Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set.
-						String encodedCodePoints = p.percentEncode(codePoint,HierarchicalUriComponents.Type.USER_INFO);
+						String encodedCodePoints = p.percentEncode(codePoint, UrlParser::userinfoPercentEncodeSet);
 						// If passwordTokenSeen is true, then append encodedCodePoints to url’s password.
 						if (p.passwordTokenSeen) {
-							password.append(encodedCodePoints);
+							url.password.append(encodedCodePoints);
 						}
 						// Otherwise, append encodedCodePoints to url’s username.
 						else {
-							username.append(encodedCodePoints);
+							url.username.append(encodedCodePoints);
 						}
 					}
-					url.username = username.toString();
-					url.password = password.toString();
 					// Set buffer to the empty string.
 					p.emptyBuffer();
 				}
@ -917,7 +1094,7 @@ final class UrlParser {
				@@ -917,7 +1094,7 @@ final class UrlParser {
 						return;
 					}
 					// Let host be the result of host parsing buffer with url is not special.
-					Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
+					Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p);
 					// Set url’s host to host, buffer to the empty string, and state to port state.
 					url.host = host;
 					p.emptyBuffer();
@ -943,7 +1120,7 @@ final class UrlParser {
				@@ -943,7 +1120,7 @@ final class UrlParser {
 					// EXTRA: if buffer is not empty
 					if (!p.buffer.isEmpty()) {
 						// Let host be the result of host parsing buffer with url is not special.
-						Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
+						Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p);
 						// Set url’s host to host, buffer to the empty string, and state to path start state.
 						url.host = host;
 					}
@ -1005,7 +1182,7 @@ final class UrlParser {
				@@ -1005,7 +1182,7 @@ final class UrlParser {
 								}
 								int defaultPort = defaultPort(url.scheme);
 								// Set url’s port to null, if port is url’s scheme’s default port; otherwise to port.
-								if (defaultPort == -1 || port == defaultPort) {
+								if (defaultPort != -1 && port == defaultPort) {
 									url.port = null;
 								}
 								else {
@ -1160,7 +1337,7 @@ final class UrlParser {
				@@ -1160,7 +1337,7 @@ final class UrlParser {
 					// Otherwise, run these steps:
 					else {
 						// Let host be the result of host parsing buffer with url is not special.
-						Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
+						Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p);
 						// If host is "localhost", then set host to the empty string.
 						if (host instanceof Domain domain && domain.domain().equals("localhost")) {
 							host = EmptyHost.INSTANCE;
@ -1308,7 +1485,7 @@ final class UrlParser {
				@@ -1308,7 +1485,7 @@ final class UrlParser {
 						}
 					}
 					// UTF-8 percent-encode c using the path percent-encode set and append the result to buffer.
-					String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.PATH_SEGMENT);
+					String encoded = p.percentEncode(c, UrlParser::pathPercentEncodeSet);
 					p.buffer.append(encoded);
 				}
 			}
@ -1353,7 +1530,7 @@ final class UrlParser {
				@@ -1353,7 +1530,7 @@ final class UrlParser {
 					}
 					// If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
 					if (c != EOF) {
-						String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.C0);
+						String encoded = p.percentEncode(c, UrlParser::c0ControlPercentEncodeSet);
 						url.path.append(encoded);
 					}
 				}
@ -1376,8 +1553,9 @@ final class UrlParser {
				@@ -1376,8 +1553,9 @@ final class UrlParser {
 				// - c is the EOF code point
 				if ( (p.stateOverride == null && c == '#') || c == EOF) {
 					// Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set.
+					IntPredicate queryPercentEncodeSet = url.isSpecial() ? UrlParser::specialQueryPercentEncodeSet : UrlParser::queryPercentEncodeSet;
 					// Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
-					String encoded = p.percentEncode(p.buffer.toString(), HierarchicalUriComponents.Type.QUERY);
+					String encoded = p.percentEncode(p.buffer.toString(), queryPercentEncodeSet);
 					Assert.state(url.query != null, "Url's query should not be null");
 					url.query += encoded;
 					// Set buffer to the empty string.
@ -1432,7 +1610,7 @@ final class UrlParser {
				@@ -1432,7 +1610,7 @@ final class UrlParser {
 						}
 					}
 					// UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment.
-					String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.FRAGMENT);
+					String encoded = p.percentEncode(c, UrlParser::fragmentPercentEncodeSet);
 					Assert.state(url.fragment != null, "Url's fragment should not be null");
 					url.fragment += encoded;
 				}
@ -1472,9 +1650,9 @@ final class UrlParser {
				@@ -1472,9 +1650,9 @@ final class UrlParser {

 		private String scheme = "";

-		private String username = "";
+		private StringBuilder username = new StringBuilder();

-		private String password = "";
+		private StringBuilder password = new StringBuilder();

 		@Nullable
 		private Host host = null;
@ -1517,6 +1695,33 @@ final class UrlParser {
				@@ -1517,6 +1695,33 @@ final class UrlParser {
 		}


+		/**
+		 * The serialization of an origin is the string obtained by applying the following algorithm to the given origin origin:
+		 * If origin is an opaque origin, then return "null".
+		 * Otherwise, let result be origin's scheme.
+		 * Append "://" to result.
+		 * Append origin's host, serialized, to result.
+		 * If origin's port is non-null, append a U+003A COLON character (:), and origin's port, serialized, to result.
+		 * Return result.
+		 */
+		public String origin() {
+			String scheme = scheme();
+			if (scheme.equals("ftp") || scheme.equals("http") || scheme.equals("https") || scheme.equals("ws") || scheme.equals("wss")) {
+				StringBuilder builder = new StringBuilder(scheme);
+				builder.append("://");
+				builder.append(host());
+				Port port = port();
+				if (port != null) {
+					builder.append(':');
+					builder.append(port);
+				}
+				return builder.toString();
+			}
+			else {
+				return "null";
+			}
+		}
+
 		/**
 		 * A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for
 		 * further processing after parsing. It is initially the empty string.
@ -1525,18 +1730,25 @@ final class UrlParser {
				@@ -1525,18 +1730,25 @@ final class UrlParser {
 			return this.scheme;
 		}

+		/**
+		 * The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:).
+		 */
+		public String protocol() {
+			return scheme() + ":";
+		}
+
 		/**
 		 * A URL’s username is an ASCII string identifying a username. It is initially the empty string.
 		 */
 		public String username() {
-			return this.username;
+			return this.username.toString();
 		}

 		/**
 		 * A URL’s password is an ASCII string identifying a password. It is initially the empty string.
 		 */
 		public String password() {
-			return this.password;
+			return this.password.toString();
 		}

 		/**
@ -1547,6 +1759,36 @@ final class UrlParser {
				@@ -1547,6 +1759,36 @@ final class UrlParser {
 			return this.host;
 		}

+		/**
+		 *The host getter steps are:
+		 * Let url be this’s URL.
+		 * If url’s host is null, then return the empty string.
+		 * If url’s port is null, return url’s host, serialized.
+		 * Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized.
+		 */
+		public String hostString() {
+			if (host() == null) {
+				return "";
+			}
+			StringBuilder builder = new StringBuilder(hostname());
+			Port port = port();
+			if (port != null) {
+				builder.append(':');
+				builder.append(port);
+			}
+			return builder.toString();
+		}
+
+		public String hostname() {
+			Host host = host();
+			if (host == null) {
+				return "";
+			}
+			else {
+				return host.toString();
+			}
+		}
+
 		/**
 		 * A URL’s port is either null, a string representing a 16-bit unsigned integer  that identifies a networking
 		 * port, or a string containing a uri template . It is initially {@code null}.
@ -1556,6 +1798,15 @@ final class UrlParser {
				@@ -1556,6 +1798,15 @@ final class UrlParser {
 			return this.port;
 		}

+		public String portString() {
+			if (port() == null) {
+				return "";
+			}
+			else {
+				return port().toString();
+			}
+		}
+
 		/**
 		 * A URL’s path is a URL {@linkplain Path path}, usually identifying a location. It is initially {@code « »}.
 		 */
@ -1563,6 +1814,10 @@ final class UrlParser {
				@@ -1563,6 +1814,10 @@ final class UrlParser {
 			return this.path;
 		}

+		public String pathname() {
+			return path().name();
+		}
+
 		/**
 		 * To shorten a url’s path:
 		 * <ol>
@ -1585,6 +1840,21 @@ final class UrlParser {
				@@ -1585,6 +1840,21 @@ final class UrlParser {
 			return this.query;
 		}

+		/**
+		 * The search getter steps are:
+		 * If this’s URL’s query is either null or the empty string, then return the empty string.
+		 * Return U+003F (?), followed by this’s URL’s query.
+		 */
+		public String search() {
+			String query = query();
+			if (query == null) {
+				return "";
+			}
+			else {
+				return "?" + query;
+			}
+		}
+
 		/**
 		 * A URL’s fragment is either {@code null}  or an ASCII string that can be used for further processing on the
 		 * resource the URL’s other components identify. It is initially {@code null}.
@ -1594,6 +1864,77 @@ final class UrlParser {
				@@ -1594,6 +1864,77 @@ final class UrlParser {
 			return this.fragment;
 		}

+		/**
+		 * The hash getter steps are:
+		 * If this’s URL’s fragment is either null or the empty string, then return the empty string.
+		 * Return U+0023 (#), followed by this’s URL’s fragment.
+		 */
+		public String hash() {
+			String fragment = fragment();
+			if (fragment == null || fragment.isEmpty()) {
+				return "";
+			}
+			else {
+				return "#" + fragment;
+			}
+		}
+
+		public String href() {
+			// Let output be url’s scheme and U+003A (:) concatenated.
+			StringBuilder output = new StringBuilder(scheme());
+			output.append(':');
+			Host host = host();
+			// If url’s host is non-null:
+			if (host != null) {
+				// Append "//" to output.
+				output.append("//");
+				// If url includes credentials, then:
+				if (includesCredentials()) {
+					// Append url’s username to output.
+					output.append(username());
+					String password = password();
+					// If url’s password is not the empty string, then append U+003A (:), followed by url’s password, to output.
+					if (!password.isEmpty()) {
+						output.append(':');
+						output.append(password);
+					}
+					// Append U+0040 (@) to output.
+					output.append('@');
+				}
+				// Append url’s host, serialized, to output.
+				output.append(hostname());
+				Port port = port();
+				// If url’s port is non-null, append U+003A (:) followed by url’s port, serialized, to output.
+				if (port != null) {
+					output.append(':');
+					output.append(port());
+				}
+			}
+			// If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output.
+			else if (!hasOpaquePath() &&
+					path() instanceof PathSegments pathSegments &&
+					pathSegments.size() > 1 &&
+					pathSegments.get(0).isEmpty()) {
+				output.append("/.");
+			}
+			// Append the result of URL path serializing url to output.
+			output.append(pathname());
+			// If url’s query is non-null, append U+003F (?), followed by url’s query, to output.
+			String query = query();
+			if (query != null) {
+				output.append('?');
+				output.append(query);
+			}
+			// If exclude fragment is false and url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output.
+			String fragment = fragment();
+			if (fragment != null) {
+				output.append('#');
+				output.append(fragment);
+			}
+			// Return output.
+			return output.toString();
+		}
+
 		@Override
 		public boolean equals(Object obj) {
 			if (obj == this) {
@ -1646,7 +1987,7 @@ final class UrlParser {
				@@ -1646,7 +1987,7 @@ final class UrlParser {
 		 * The host parser takes a scalar value string input with an optional
 		 * boolean isOpaque (default false), and then runs these steps. They return failure or a host.
 		 */
-		static Host parse(String input, boolean isOpaque, @Nullable Consumer<String> validationErrorHandler) {
+		static Host parse(String input, boolean isOpaque, UrlParser p) {
 			// If input starts with U+005B ([), then:
 			if (!input.isEmpty() && input.charAt(0) == '[') {
 				int last = input.length() - 1;
@ -1660,13 +2001,13 @@ final class UrlParser {
				@@ -1660,13 +2001,13 @@ final class UrlParser {
 			}
 			// If isOpaque is true, then return the result of opaque-host parsing input.
 			if (isOpaque) {
-				return OpaqueHost.parse(input);
+				return OpaqueHost.parse(input, p);
 			}
 			// Assert: input is not the empty string.
 			Assert.state(!input.isEmpty(), "Input should not be empty");

 			// Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
-			String domain = UriUtils.decode(input, StandardCharsets.UTF_8);
+			String domain = percentDecode(input);
 			// Let asciiDomain be the result of running domain to ASCII with domain and false.
 			String asciiDomain = domainToAscii(domain, false);

@ -1679,7 +2020,7 @@ final class UrlParser {
				@@ -1679,7 +2020,7 @@ final class UrlParser {
 			}
 			// If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain.
 			if (endsInNumber(asciiDomain)) {
-				Ipv4Address address = Ipv4Address.parse(asciiDomain, validationErrorHandler);
+				Ipv4Address address = Ipv4Address.parse(asciiDomain, p);
 				return new IpAddressHost(address);
 			}
 			// Return asciiDomain.
@ -1690,8 +2031,11 @@ final class UrlParser {
				@@ -1690,8 +2031,11 @@ final class UrlParser {

 		private static boolean endsInNumber(String input) {
 			// Let parts be the result of strictly splitting input on U+002E (.).
-			List<String> parts = tokenize(input, ".");
+			List<String> parts = strictSplit(input, '.');
 			int lastIdx = parts.size() - 1;
+			if (lastIdx == -1) {
+				return false;
+			}
 			// If the last item in parts is the empty string, then:
 			if (parts.get(lastIdx).isEmpty()) {
 				// If parts’s size is 1, then return false.
@ -1807,11 +2151,62 @@ final class UrlParser {
				@@ -1807,11 +2151,62 @@ final class UrlParser {
 		}
 	}

-	record OpaqueHost(String domain) implements Host {
+	static final class OpaqueHost implements Host {
+
+		private final String host;

-		public static OpaqueHost parse(String input) {
-			throw new UnsupportedOperationException("Not implemented yet");
+		private OpaqueHost(String host) {
+			this.host = host;
 		}
+
+		/**
+		 * The opaque-host parser takes a scalar value string input, and then runs these steps. They return failure or
+		 * an opaque host.
+		 */
+		public static OpaqueHost parse(String input, UrlParser p) {
+			for (int i = 0; i < input.length(); i++) {
+				char ch = input.charAt(i);
+				// If input contains a forbidden host code point, host-invalid-code-point validation error, return failure.
+				if (isForbiddenHost(ch)) {
+					throw new InvalidUrlException("An opaque host contains a forbidden host code point.");
+				}
+				// If input contains a code point that is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
+				if (p.validate() && !isUrlCodePoint(ch) && ch != '%') {
+					p.validationError("Code point \"" + ch + "\" is not a URL unit.");
+				}
+				//If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, invalid-URL-unit validation error.
+				if (p.validate() && ch == '%' && (input.length() - i < 2 || !isAsciiDigit(input.charAt(i + 1)) || !isAsciiDigit(input.charAt(i + 2)))) {
+					p.validationError("Code point \"" + ch + "\" is not a URL unit.");
+				}
+			}
+			//Return the result of running UTF-8 percent-encode on input using the C0 control percent-encode set.
+			String encoded = p.percentEncode(input, UrlParser::c0ControlPercentEncodeSet);
+			return new OpaqueHost(encoded);
+		}
+
+		@Override
+		public boolean equals(Object obj) {
+			if (obj == this) {
+				return true;
+			}
+			else if (obj instanceof OpaqueHost other) {
+				return this.host.equals(other.host);
+			}
+			else {
+				return false;
+			}
+		}
+
+		@Override
+		public int hashCode() {
+			return this.host.hashCode();
+		}
+
+		@Override
+		public String toString() {
+			return this.host;
+		}
+
 	}

 	static final class EmptyHost implements Host {
@ -1876,16 +2271,14 @@ final class UrlParser {
				@@ -1876,16 +2271,14 @@ final class UrlParser {
 			return output.toString();
 		}

-		public static Ipv4Address parse(String input, @Nullable Consumer<String> validationErrorHandler) {
+		public static Ipv4Address parse(String input, UrlParser p) {
 			// Let parts be the result of strictly splitting input on U+002E (.).
-			List<String> parts = tokenize(input, ".");
+			List<String> parts = strictSplit(input, '.');
 			int partsSize = parts.size();
 			// If the last item in parts is the empty string, then:
 			if (parts.get(partsSize - 1).isEmpty()) {
 				// IPv4-empty-part validation error.
-				if (validationErrorHandler != null) {
-					validationErrorHandler.accept("IPv4 address ends with \".\"");
-				}
+				p.validationError("IPv4 address ends with \".\"");
 				// If parts’s size is greater than 1, then remove the last item from parts.
 				if (partsSize > 1) {
 					parts.remove(partsSize - 1);
@ -1903,8 +2296,8 @@ final class UrlParser {
				@@ -1903,8 +2296,8 @@ final class UrlParser {
 				String part = parts.get(i);
 				// Let result be the result of parsing part.
 				ParseIpv4NumberResult result = parseIpv4Number(part);
-				if (validationErrorHandler != null && result.validationError()) {
-					validationErrorHandler.accept("The IPv4 address contains numbers expressed using hexadecimal or octal digits.");
+				if (p.validate() && result.validationError()) {
+					p.validationError("The IPv4 address contains numbers expressed using hexadecimal or octal digits.");
 				}
 				// Append result to numbers.
 				numbers.add(result.number());
@ -1912,8 +2305,8 @@ final class UrlParser {
				@@ -1912,8 +2305,8 @@ final class UrlParser {
 			for (Iterator<Integer> iterator = numbers.iterator(); iterator.hasNext(); ) {
 				Integer number = iterator.next();
 				// If any item in numbers is greater than 255, IPv4-out-of-range-part validation error.
-				if (validationErrorHandler != null && number > 255) {
-					validationErrorHandler.accept("An IPv4 address part exceeds 255.");
+				if (p.validate() && number > 255) {
+					p.validationError("An IPv4 address part exceeds 255.");
 				}
 				if (iterator.hasNext()) {
 					// If any but the last item in numbers is greater than 255, then return failure.
@ -2356,6 +2749,8 @@ final class UrlParser {
				@@ -2356,6 +2749,8 @@ final class UrlParser {
 		boolean isOpaque();

 		Path clone();
+
+		String name();
 	}

 	static final class PathSegment implements Path {
@ -2384,6 +2779,15 @@ final class UrlParser {
				@@ -2384,6 +2779,15 @@ final class UrlParser {
 			this.segment.append(s);
 		}

+		@Override
+		public String name() {
+			String name = segment();
+			if (name.startsWith("/")) {
+				name = name.substring(1);
+			}
+			return name;
+		}
+
 		@Override
 		public boolean isEmpty() {
 			return this.segment.isEmpty();
@ -2483,6 +2887,16 @@ final class UrlParser {
				@@ -2483,6 +2887,16 @@ final class UrlParser {
 			return new PathSegments(this.segments);
 		}

+		@Override
+		public String name() {
+			StringBuilder output = new StringBuilder();
+			for (PathSegment segment : this.segments) {
+				output.append('/');
+				output.append(segment.name());
+			}
+			return output.toString();
+		}
+
 		@Override
 		public boolean equals(Object o) {
 			if (o == this) {