@ -23,9 +23,10 @@ import java.util.ArrayList;
@@ -23,9 +23,10 @@ import java.util.ArrayList;
import java.util.Arrays ;
import java.util.Iterator ;
import java.util.List ;
import java.util.Locale ;
import java.util.Objects ;
import java.util.StringTokenizer ;
import java.util.function.Consumer ;
import java.util.function.IntPredicate ;
import org.apache.commons.logging.Log ;
import org.apache.commons.logging.LogFactory ;
@ -234,23 +235,70 @@ final class UrlParser {
@@ -234,23 +235,70 @@ final class UrlParser {
this . state = newState ;
}
private static List < String > tokenize ( String str , String delimiters ) {
StringTokenizer st = new StringTokenizer ( str , delimiters ) ;
private static List < String > strictSplit ( String input , int delimiter ) {
// Let position be a position variable for input, initially pointing at the start of input.
int position = 0 ;
// Let tokens be a list of strings, initially empty.
List < String > tokens = new ArrayList < > ( ) ;
while ( st . hasMoreTokens ( ) ) {
tokens . add ( st . nextToken ( ) ) ;
// Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position.
int delIdx = input . indexOf ( delimiter , position ) ;
String token = ( delIdx ! = EOF ) ? input . substring ( position , delIdx ) : input . substring ( position ) ;
position = delIdx ;
// Append token to tokens.
tokens . add ( token ) ;
// While position is not past the end of input:
while ( position ! = EOF ) {
// Assert: the code point at position within input is delimiter.
Assert . state ( input . codePointAt ( position ) = = delimiter , "Codepoint is not a delimiter" ) ;
// Advance position by 1.
position + + ;
delIdx = input . indexOf ( delimiter , position ) ;
// Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position.
token = ( delIdx ! = EOF ) ? input . substring ( position , delIdx ) : input . substring ( position ) ;
position = delIdx ;
// Append token to tokens.
tokens . add ( token ) ;
}
return tokens ;
}
private static String domainToAscii ( String domain , boolean beStrict ) {
// If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) does not produce any item that starts with an ASCII case-insensitive match for "xn--", this step is equivalent to ASCII lowercasing domain.
boolean onlyLowerCase = ! beStrict ;
if ( ! beStrict & & containsOnlyAscii ( domain ) ) {
int dotIdx = domain . indexOf ( '.' ) ;
while ( dotIdx ! = - 1 ) {
if ( domain . length ( ) - dotIdx > 4 ) {
// ASCII case-insensitive match for "xn--"
char ch0 = domain . charAt ( dotIdx + 1 ) ;
char ch1 = domain . charAt ( dotIdx + 2 ) ;
char ch2 = domain . charAt ( dotIdx + 3 ) ;
char ch3 = domain . charAt ( dotIdx + 4 ) ;
if ( ( ch0 = = 'x' | | ch0 = = 'X' ) & &
( ch1 = = 'n' | | ch1 = = 'N' ) & &
ch2 = = '-' & & ch3 = = '_' ) {
onlyLowerCase = false ;
break ;
}
}
dotIdx = domain . indexOf ( '.' , dotIdx + 1 ) ;
}
}
if ( onlyLowerCase ) {
return domain . toLowerCase ( Locale . ENGLISH ) ;
}
// Let result be the result of running Unicode ToASCII (https://www.unicode.org/reports/tr46/#ToASCII) with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
int flag = 0 ;
if ( beStrict ) {
flag | = IDN . USE_STD3_ASCII_RULES ;
}
// Implementation note: implementing Unicode ToASCII is beyond the scope of this parser, we use java.net.IDN.toASCII
return IDN . toASCII ( domain , flag ) ;
try {
return IDN . toASCII ( domain , flag ) ;
}
catch ( IllegalArgumentException ex ) {
throw new InvalidUrlException ( "Could not convert \"" + domain + "\" to ASCII: " + ex . getMessage ( ) , ex ) ;
}
}
private boolean validate ( ) {
@ -284,6 +332,50 @@ final class UrlParser {
@@ -284,6 +332,50 @@ final class UrlParser {
throw new InvalidUrlException ( message . toString ( ) ) ;
}
/ * *
* The C0 control percent - encode set are the C0 controls and all code points greater than U + 007E ( ~ ) .
* /
private static boolean c0ControlPercentEncodeSet ( int ch ) {
return isC0Control ( ch ) | | Integer . compareUnsigned ( ch , '~' ) > 0 ;
}
/ * *
* The fragment percent - encode set is the C0 control percent - encode set and U + 0020 SPACE , U + 0022 ( " ) , U + 003C ( < ) , U + 003E ( > ) , and U + 0060 ( ` ) .
* /
private static boolean fragmentPercentEncodeSet ( int ch ) {
return c0ControlPercentEncodeSet ( ch ) | | ch = = ' ' | | ch = = '"' | | ch = = '<' | | ch = = '>' | | ch = = '`' ;
}
/ * *
* The query percent - encode set is the C0 control percent - encode set and U + 0020 SPACE , U + 0022 ( " ) , U + 0023 ( # ) , U + 003C ( < ) , and U + 003E ( > ) .
* /
private static boolean queryPercentEncodeSet ( int ch ) {
return c0ControlPercentEncodeSet ( ch ) | | ch = = ' ' | | ch = = '"' | | ch = = '#' | | ch = = '<' | | ch = = '>' ;
}
/ * *
* The special - query percent - encode set is the query percent - encode set and U + 0027 ( ' ) .
* /
private static boolean specialQueryPercentEncodeSet ( int ch ) {
return queryPercentEncodeSet ( ch ) | | ch = = '\'' ;
}
/ * *
* The path percent - encode set is the query percent - encode set and U + 003F ( ? ) , U + 0060 ( ` ) , U + 007B ( { ) , and U + 007D ( } ) .
* /
private static boolean pathPercentEncodeSet ( int ch ) {
return queryPercentEncodeSet ( ch ) | | ch = = '?' | | ch = = '`' | | ch = = '{' | | ch = = '}' ;
}
/ * *
* The userinfo percent - encode set is the path percent - encode set and U + 002F ( / ) , U + 003A ( : ) , U + 003B ( ; ) , U + 003D ( = ) , U + 0040 ( @ ) , U + 005B ( [ ) to U + 005E ( ^ ) , inclusive , and U + 007C ( | ) .
* /
private static boolean userinfoPercentEncodeSet ( int ch ) {
return pathPercentEncodeSet ( ch ) | | ch = = '/' | | ch = = ':' | | ch = = ';' | | ch = = '=' | | ch = = '@' | |
( Integer . compareUnsigned ( ch , '[' ) > = 0 & & Integer . compareUnsigned ( ch , '^' ) < = 0 ) | | ch = = '|' ;
}
private static boolean isC0Control ( int ch ) {
return ch > = 0 & & ch < = 0x1F ;
}
@ -307,6 +399,21 @@ final class UrlParser {
@@ -307,6 +399,21 @@ final class UrlParser {
return true ;
}
private static boolean containsOnlyAscii ( CharSequence string ) {
for ( int i = 0 ; i < string . length ( ) ; i + + ) {
char ch = string . charAt ( i ) ;
if ( ! isAsciiCodePoint ( ch ) ) {
return false ;
}
}
return true ;
}
private static boolean isAsciiCodePoint ( int ch ) {
// An ASCII code point is a code point in the range U+0000 NULL to U+007F DELETE, inclusive.
return Integer . compareUnsigned ( ch , 0 ) > = 0 & & Integer . compareUnsigned ( ch , 127 ) < = 0 ;
}
private static boolean isAsciiDigit ( int ch ) {
return ( ch > = '0' & & ch < = '9' ) ;
}
@ -400,76 +507,151 @@ final class UrlParser {
@@ -400,76 +507,151 @@ final class UrlParser {
}
}
private String percentEncode ( int c , HierarchicalUriComponents . Type type ) {
return percentEncode ( Character . toString ( c ) , type ) ;
private static String percentDecode ( String input ) {
try {
return UriUtils . decode ( input , StandardCharsets . UTF_8 ) ;
}
catch ( IllegalArgumentException ex ) {
throw new InvalidUrlException ( "Could not decode \"" + input + "\": " + ex . getMessage ( ) , ex ) ;
}
}
private String percentEncode ( int c , IntPredicate percentEncodeSet ) {
return percentEncode ( Character . toString ( c ) , percentEncodeSet ) ;
}
private String percentEncode ( String source , HierarchicalUriComponents . Type type ) {
if ( this . encoding ! = null ) {
return HierarchicalUriComponents . encodeUriComponent ( source , this . encoding , type ) ;
private String percentEncode ( String input , IntPredicate percentEncodeSet ) {
if ( this . encoding = = null ) {
return inp ut ;
}
else {
return source ;
byte [ ] bytes = input . getBytes ( this . encoding ) ;
boolean original = true ;
for ( byte b : bytes ) {
if ( percentEncodeSet . test ( b ) ) {
original = false ;
break ;
}
}
if ( original ) {
return input ;
}
StringBuilder output = new StringBuilder ( ) ;
for ( byte b : bytes ) {
if ( ! percentEncodeSet . test ( b ) ) {
output . append ( ( char ) b ) ;
}
else {
output . append ( '%' ) ;
char hex1 = Character . toUpperCase ( Character . forDigit ( ( b > > 4 ) & 0xF , 16 ) ) ;
char hex2 = Character . toUpperCase ( Character . forDigit ( b & 0xF , 16 ) ) ;
output . append ( hex1 ) ;
output . append ( hex2 ) ;
}
}
return output . toString ( ) ;
}
}
/ * *
* A single - dot URL path segment is a URL path segment that is "." or an ASCII case - insensitive match for "%2e" .
* A single - dot URL path segment is a URL path segment that is "[/] ." or an ASCII case - insensitive match for "[/] %2e" .
* /
private static boolean isSingleDotPathSegment ( StringBuilder b ) {
int len = b . length ( ) ;
if ( len = = 1 ) {
char ch0 = b . charAt ( 0 ) ;
return ch0 = = '.' ;
}
else if ( len = = 3 ) {
// ASCII case-insensitive match for "%2e".
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
return ch0 = = '%' & & ch1 = = '2' & & ( ch2 = = 'e' | | ch2 = = 'E' ) ;
}
else {
return false ;
switch ( len ) {
case 1 - > {
char ch0 = b . charAt ( 0 ) ;
return ch0 = = '.' ;
}
case 2 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
return ch0 = = '/' & & ch1 = = '.' ;
}
case 3 - > {
// ASCII case-insensitive match for "%2e".
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
return ch0 = = '%' & & ch1 = = '2' & & ( ch2 = = 'e' | | ch2 = = 'E' ) ;
}
case 4 - > {
// ASCII case-insensitive match for "/%2e".
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
return ch0 = = '/' & & ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' ) ;
}
default - > {
return false ;
}
}
}
/ * *
* A double - dot URL path segment is a URL path segment that is "/.." or an ASCII case - insensitive match for "/.%2e" , "/%2e." , or "/%2e%2e" .
* A double - dot URL path segment is a URL path segment that is "[ /] .." or an ASCII case - insensitive match for "/.%2e" , "/%2e." , or "/%2e%2e" .
* /
private static boolean isDoubleDotPathSegment ( StringBuilder b ) {
int len = b . length ( ) ;
if ( len = = 3 ) {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
return ch0 = = '/' & & ch1 = = '.' & & ch2 = = '.' ;
}
else if ( len = = 5 ) {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
char ch4 = b . charAt ( 4 ) ;
// case-insensitive match for "/.%2e" or "/%2e."
return ch0 = = '/' & &
( ch1 = = '.' & & ch2 = = '%' & & ch3 = = '2' & & ( ch4 = = 'e' | | ch4 = = 'E' )
| | ( ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' ) & & ch4 = = '.' ) ) ;
}
else if ( len = = 7 ) {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
char ch4 = b . charAt ( 4 ) ;
char ch5 = b . charAt ( 5 ) ;
char ch6 = b . charAt ( 6 ) ;
// case-insensitive match for "/%2e%2e".
return ch0 = = '/' & & ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' )
& & ch4 = = '%' & & ch5 = = '2' & & ( ch6 = = 'e' | | ch6 = = 'E' ) ;
}
else {
return false ;
switch ( len ) {
case 2 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
return ch0 = = '.' & & ch1 = = '.' ;
}
case 3 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
return ch0 = = '/' & & ch1 = = '.' & & ch2 = = '.' ;
}
case 4 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
// case-insensitive match for ".%2e" or "%2e."
return ( ch0 = = '.' & & ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' ) | |
( ch0 = = '%' & & ch1 = = '2' & & ( ch2 = = 'e' | | ch2 = = 'E' ) & & ch3 = = '.' ) ) ;
}
case 5 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
char ch4 = b . charAt ( 4 ) ;
// case-insensitive match for "/.%2e" or "/%2e."
return ch0 = = '/' & &
( ch1 = = '.' & & ch2 = = '%' & & ch3 = = '2' & & ( ch4 = = 'e' | | ch4 = = 'E' )
| | ( ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' ) & & ch4 = = '.' ) ) ;
}
case 6 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
char ch4 = b . charAt ( 4 ) ;
char ch5 = b . charAt ( 5 ) ;
// case-insensitive match for "%2e%2e".
return ch0 = = '%' & & ch1 = = '2' & & ( ch2 = = 'e' | | ch2 = = 'E' )
& & ch3 = = '%' & & ch4 = = '2' & & ( ch5 = = 'e' | | ch5 = = 'E' ) ;
}
case 7 - > {
char ch0 = b . charAt ( 0 ) ;
char ch1 = b . charAt ( 1 ) ;
char ch2 = b . charAt ( 2 ) ;
char ch3 = b . charAt ( 3 ) ;
char ch4 = b . charAt ( 4 ) ;
char ch5 = b . charAt ( 5 ) ;
char ch6 = b . charAt ( 6 ) ;
// case-insensitive match for "/%2e%2e".
return ch0 = = '/' & & ch1 = = '%' & & ch2 = = '2' & & ( ch3 = = 'e' | | ch3 = = 'E' )
& & ch4 = = '%' & & ch5 = = '2' & & ( ch6 = = 'e' | | ch6 = = 'E' ) ;
}
default - > {
return false ;
}
}
}
@ -656,8 +838,8 @@ final class UrlParser {
@@ -656,8 +838,8 @@ final class UrlParser {
// If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL
// validation error, return failure.
if ( p . base = = null | | p . base . path ( ) . isOpaque ( ) & & c ! = '#' ) {
p . failure ( "The input is missing a scheme, because it does not begin with an ASCII alpha \"" + Character . toString ( c ) +
"\", and no base URL was provided." ) ;
p . failure ( "The input is missing a scheme, because it does not begin with an ASCII alpha \"" +
( c ! = EOF ? Character . toString ( c ) : "" ) + "\", and no base URL was provided." ) ;
}
// Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s
// path to base’s path, url’s query to base’s query, url’s fragment to the empty string, and set state to fragment state.
@ -738,8 +920,8 @@ final class UrlParser {
@@ -738,8 +920,8 @@ final class UrlParser {
else {
// Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host,
// url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query.
url . username = p . base . username ( ) ;
url . password = p . base . password ( ) ;
url . username . replace ( 0 , url . username . length ( ) , p . base . username ( ) ) ;
url . password . replace ( 0 , url . password . length ( ) , p . base . password ( ) ) ;
url . host = p . base . host ( ) ;
url . port = p . base . port ( ) ;
url . path = p . base . path ( ) . clone ( ) ;
@ -789,8 +971,8 @@ final class UrlParser {
@@ -789,8 +971,8 @@ final class UrlParser {
// to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1.
else {
Assert . state ( p . base ! = null , "No base URL available" ) ;
url . username = p . base . username ( ) ;
url . password = p . base . password ( ) ;
url . username . replace ( 0 , url . username . length ( ) , p . base . username ( ) ) ;
url . password . replace ( 0 , url . password . length ( ) , p . base . password ( ) ) ;
url . host = p . base . host ( ) ;
url . port = p . base . port ( ) ;
p . setState ( PATH ) ;
@ -850,9 +1032,6 @@ final class UrlParser {
@@ -850,9 +1032,6 @@ final class UrlParser {
p . atSignSeen = true ;
int bufferLen = p . buffer . length ( ) ;
StringBuilder username = new StringBuilder ( bufferLen ) ;
StringBuilder password = new StringBuilder ( bufferLen ) ;
// For each codePoint in buffer:
for ( int i = 0 ; i < bufferLen ; i + + ) {
int codePoint = p . buffer . codePointAt ( i ) ;
@ -862,18 +1041,16 @@ final class UrlParser {
@@ -862,18 +1041,16 @@ final class UrlParser {
continue ;
}
// Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set.
String encodedCodePoints = p . percentEncode ( codePoint , HierarchicalUriComponents . Type . USER_INFO ) ;
String encodedCodePoints = p . percentEncode ( codePoint , UrlParser : : userinfoPercentEncodeSet ) ;
// If passwordTokenSeen is true, then append encodedCodePoints to url’s password.
if ( p . passwordTokenSeen ) {
password . append ( encodedCodePoints ) ;
url . password . append ( encodedCodePoints ) ;
}
// Otherwise, append encodedCodePoints to url’s username.
else {
username . append ( encodedCodePoints ) ;
url . u sername . append ( encodedCodePoints ) ;
}
}
url . username = username . toString ( ) ;
url . password = password . toString ( ) ;
// Set buffer to the empty string.
p . emptyBuffer ( ) ;
}
@ -917,7 +1094,7 @@ final class UrlParser {
@@ -917,7 +1094,7 @@ final class UrlParser {
return ;
}
// Let host be the result of host parsing buffer with url is not special.
Host host = Host . parse ( p . buffer . toString ( ) , false , p . validationErrorHandler ) ;
Host host = Host . parse ( p . buffer . toString ( ) , ! url . isSpecial ( ) , p ) ;
// Set url’s host to host, buffer to the empty string, and state to port state.
url . host = host ;
p . emptyBuffer ( ) ;
@ -943,7 +1120,7 @@ final class UrlParser {
@@ -943,7 +1120,7 @@ final class UrlParser {
// EXTRA: if buffer is not empty
if ( ! p . buffer . isEmpty ( ) ) {
// Let host be the result of host parsing buffer with url is not special.
Host host = Host . parse ( p . buffer . toString ( ) , false , p . validationErrorHandler ) ;
Host host = Host . parse ( p . buffer . toString ( ) , ! url . isSpecial ( ) , p ) ;
// Set url’s host to host, buffer to the empty string, and state to path start state.
url . host = host ;
}
@ -1005,7 +1182,7 @@ final class UrlParser {
@@ -1005,7 +1182,7 @@ final class UrlParser {
}
int defaultPort = defaultPort ( url . scheme ) ;
// Set url’s port to null, if port is url’s scheme’s default port; otherwise to port.
if ( defaultPort = = - 1 | | port = = defaultPort ) {
if ( defaultPort ! = - 1 & & port = = defaultPort ) {
url . port = null ;
}
else {
@ -1160,7 +1337,7 @@ final class UrlParser {
@@ -1160,7 +1337,7 @@ final class UrlParser {
// Otherwise, run these steps:
else {
// Let host be the result of host parsing buffer with url is not special.
Host host = Host . parse ( p . buffer . toString ( ) , false , p . validationErrorHandler ) ;
Host host = Host . parse ( p . buffer . toString ( ) , ! url . isSpecial ( ) , p ) ;
// If host is "localhost", then set host to the empty string.
if ( host instanceof Domain domain & & domain . domain ( ) . equals ( "localhost" ) ) {
host = EmptyHost . INSTANCE ;
@ -1308,7 +1485,7 @@ final class UrlParser {
@@ -1308,7 +1485,7 @@ final class UrlParser {
}
}
// UTF-8 percent-encode c using the path percent-encode set and append the result to buffer.
String encoded = p . percentEncode ( c , HierarchicalUriComponents . Type . PATH_SEGMENT ) ;
String encoded = p . percentEncode ( c , UrlParser : : pathPercentEncodeSet ) ;
p . buffer . append ( encoded ) ;
}
}
@ -1353,7 +1530,7 @@ final class UrlParser {
@@ -1353,7 +1530,7 @@ final class UrlParser {
}
// If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
if ( c ! = EOF ) {
String encoded = p . percentEncode ( c , HierarchicalUriComponents . Type . C0 ) ;
String encoded = p . percentEncode ( c , UrlParser : : c0ControlPercentEncodeSet ) ;
url . path . append ( encoded ) ;
}
}
@ -1376,8 +1553,9 @@ final class UrlParser {
@@ -1376,8 +1553,9 @@ final class UrlParser {
// - c is the EOF code point
if ( ( p . stateOverride = = null & & c = = '#' ) | | c = = EOF ) {
// Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set.
IntPredicate queryPercentEncodeSet = url . isSpecial ( ) ? UrlParser : : specialQueryPercentEncodeSet : UrlParser : : queryPercentEncodeSet ;
// Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query.
String encoded = p . percentEncode ( p . buffer . toString ( ) , HierarchicalUriComponents . Type . QUERY ) ;
String encoded = p . percentEncode ( p . buffer . toString ( ) , queryPercentEncodeSet ) ;
Assert . state ( url . query ! = null , "Url's query should not be null" ) ;
url . query + = encoded ;
// Set buffer to the empty string.
@ -1432,7 +1610,7 @@ final class UrlParser {
@@ -1432,7 +1610,7 @@ final class UrlParser {
}
}
// UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment.
String encoded = p . percentEncode ( c , HierarchicalUriComponents . Type . FRAGMENT ) ;
String encoded = p . percentEncode ( c , UrlParser : : fragmentPercentEncodeSet ) ;
Assert . state ( url . fragment ! = null , "Url's fragment should not be null" ) ;
url . fragment + = encoded ;
}
@ -1472,9 +1650,9 @@ final class UrlParser {
@@ -1472,9 +1650,9 @@ final class UrlParser {
private String scheme = "" ;
private String username = "" ;
private StringBuilder username = new StringBuilder ( ) ;
private String password = "" ;
private StringBuilder password = new StringBuilder ( ) ;
@Nullable
private Host host = null ;
@ -1517,6 +1695,33 @@ final class UrlParser {
@@ -1517,6 +1695,33 @@ final class UrlParser {
}
/ * *
* The serialization of an origin is the string obtained by applying the following algorithm to the given origin origin :
* If origin is an opaque origin , then return "null" .
* Otherwise , let result be origin ' s scheme .
* Append "://" to result .
* Append origin ' s host , serialized , to result .
* If origin ' s port is non - null , append a U + 003A COLON character ( : ) , and origin ' s port , serialized , to result .
* Return result .
* /
public String origin ( ) {
String scheme = scheme ( ) ;
if ( scheme . equals ( "ftp" ) | | scheme . equals ( "http" ) | | scheme . equals ( "https" ) | | scheme . equals ( "ws" ) | | scheme . equals ( "wss" ) ) {
StringBuilder builder = new StringBuilder ( scheme ) ;
builder . append ( "://" ) ;
builder . append ( host ( ) ) ;
Port port = port ( ) ;
if ( port ! = null ) {
builder . append ( ':' ) ;
builder . append ( port ) ;
}
return builder . toString ( ) ;
}
else {
return "null" ;
}
}
/ * *
* A URL ’ s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for
* further processing after parsing . It is initially the empty string .
@ -1525,18 +1730,25 @@ final class UrlParser {
@@ -1525,18 +1730,25 @@ final class UrlParser {
return this . scheme ;
}
/ * *
* The protocol getter steps are to return this ’ s URL ’ s scheme , followed by U + 003A ( : ) .
* /
public String protocol ( ) {
return scheme ( ) + ":" ;
}
/ * *
* A URL ’ s username is an ASCII string identifying a username . It is initially the empty string .
* /
public String username ( ) {
return this . username ;
return this . username . toString ( ) ;
}
/ * *
* A URL ’ s password is an ASCII string identifying a password . It is initially the empty string .
* /
public String password ( ) {
return this . password ;
return this . password . toString ( ) ;
}
/ * *
@ -1547,6 +1759,36 @@ final class UrlParser {
@@ -1547,6 +1759,36 @@ final class UrlParser {
return this . host ;
}
/ * *
* The host getter steps are :
* Let url be this ’ s URL .
* If url ’ s host is null , then return the empty string .
* If url ’ s port is null , return url ’ s host , serialized .
* Return url ’ s host , serialized , followed by U + 003A ( : ) and url ’ s port , serialized .
* /
public String hostString ( ) {
if ( host ( ) = = null ) {
return "" ;
}
StringBuilder builder = new StringBuilder ( hostname ( ) ) ;
Port port = port ( ) ;
if ( port ! = null ) {
builder . append ( ':' ) ;
builder . append ( port ) ;
}
return builder . toString ( ) ;
}
public String hostname ( ) {
Host host = host ( ) ;
if ( host = = null ) {
return "" ;
}
else {
return host . toString ( ) ;
}
}
/ * *
* A URL ’ s port is either null , a string representing a 16 - bit unsigned integer that identifies a networking
* port , or a string containing a uri template . It is initially { @code null } .
@ -1556,6 +1798,15 @@ final class UrlParser {
@@ -1556,6 +1798,15 @@ final class UrlParser {
return this . port ;
}
public String portString ( ) {
if ( port ( ) = = null ) {
return "" ;
}
else {
return port ( ) . toString ( ) ;
}
}
/ * *
* A URL ’ s path is a URL { @linkplain Path path } , usually identifying a location . It is initially { @code « » } .
* /
@ -1563,6 +1814,10 @@ final class UrlParser {
@@ -1563,6 +1814,10 @@ final class UrlParser {
return this . path ;
}
public String pathname ( ) {
return path ( ) . name ( ) ;
}
/ * *
* To shorten a url ’ s path :
* < ol >
@ -1585,6 +1840,21 @@ final class UrlParser {
@@ -1585,6 +1840,21 @@ final class UrlParser {
return this . query ;
}
/ * *
* The search getter steps are :
* If this ’ s URL ’ s query is either null or the empty string , then return the empty string .
* Return U + 003F ( ? ) , followed by this ’ s URL ’ s query .
* /
public String search ( ) {
String query = query ( ) ;
if ( query = = null ) {
return "" ;
}
else {
return "?" + query ;
}
}
/ * *
* A URL ’ s fragment is either { @code null } or an ASCII string that can be used for further processing on the
* resource the URL ’ s other components identify . It is initially { @code null } .
@ -1594,6 +1864,77 @@ final class UrlParser {
@@ -1594,6 +1864,77 @@ final class UrlParser {
return this . fragment ;
}
/ * *
* The hash getter steps are :
* If this ’ s URL ’ s fragment is either null or the empty string , then return the empty string .
* Return U + 0023 ( # ) , followed by this ’ s URL ’ s fragment .
* /
public String hash ( ) {
String fragment = fragment ( ) ;
if ( fragment = = null | | fragment . isEmpty ( ) ) {
return "" ;
}
else {
return "#" + fragment ;
}
}
public String href ( ) {
// Let output be url’s scheme and U+003A (:) concatenated.
StringBuilder output = new StringBuilder ( scheme ( ) ) ;
output . append ( ':' ) ;
Host host = host ( ) ;
// If url’s host is non-null:
if ( host ! = null ) {
// Append "//" to output.
output . append ( "//" ) ;
// If url includes credentials, then:
if ( includesCredentials ( ) ) {
// Append url’s username to output.
output . append ( username ( ) ) ;
String password = password ( ) ;
// If url’s password is not the empty string, then append U+003A (:), followed by url’s password, to output.
if ( ! password . isEmpty ( ) ) {
output . append ( ':' ) ;
output . append ( password ) ;
}
// Append U+0040 (@) to output.
output . append ( '@' ) ;
}
// Append url’s host, serialized, to output.
output . append ( hostname ( ) ) ;
Port port = port ( ) ;
// If url’s port is non-null, append U+003A (:) followed by url’s port, serialized, to output.
if ( port ! = null ) {
output . append ( ':' ) ;
output . append ( port ( ) ) ;
}
}
// If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output.
else if ( ! hasOpaquePath ( ) & &
path ( ) instanceof PathSegments pathSegments & &
pathSegments . size ( ) > 1 & &
pathSegments . get ( 0 ) . isEmpty ( ) ) {
output . append ( "/." ) ;
}
// Append the result of URL path serializing url to output.
output . append ( pathname ( ) ) ;
// If url’s query is non-null, append U+003F (?), followed by url’s query, to output.
String query = query ( ) ;
if ( query ! = null ) {
output . append ( '?' ) ;
output . append ( query ) ;
}
// If exclude fragment is false and url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output.
String fragment = fragment ( ) ;
if ( fragment ! = null ) {
output . append ( '#' ) ;
output . append ( fragment ) ;
}
// Return output.
return output . toString ( ) ;
}
@Override
public boolean equals ( Object obj ) {
if ( obj = = this ) {
@ -1646,7 +1987,7 @@ final class UrlParser {
@@ -1646,7 +1987,7 @@ final class UrlParser {
* The host parser takes a scalar value string input with an optional
* boolean isOpaque ( default false ) , and then runs these steps . They return failure or a host .
* /
static Host parse ( String input , boolean isOpaque , @Nullable Consumer < String > validationErrorHandler ) {
static Host parse ( String input , boolean isOpaque , UrlParser p ) {
// If input starts with U+005B ([), then:
if ( ! input . isEmpty ( ) & & input . charAt ( 0 ) = = '[' ) {
int last = input . length ( ) - 1 ;
@ -1660,13 +2001,13 @@ final class UrlParser {
@@ -1660,13 +2001,13 @@ final class UrlParser {
}
// If isOpaque is true, then return the result of opaque-host parsing input.
if ( isOpaque ) {
return OpaqueHost . parse ( input ) ;
return OpaqueHost . parse ( input , p ) ;
}
// Assert: input is not the empty string.
Assert . state ( ! input . isEmpty ( ) , "Input should not be empty" ) ;
// Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
String domain = UriUtils . d ecode( input , StandardCharsets . UTF_8 ) ;
String domain = percentD ecode( input ) ;
// Let asciiDomain be the result of running domain to ASCII with domain and false.
String asciiDomain = domainToAscii ( domain , false ) ;
@ -1679,7 +2020,7 @@ final class UrlParser {
@@ -1679,7 +2020,7 @@ final class UrlParser {
}
// If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain.
if ( endsInNumber ( asciiDomain ) ) {
Ipv4Address address = Ipv4Address . parse ( asciiDomain , validationErrorHandler ) ;
Ipv4Address address = Ipv4Address . parse ( asciiDomain , p ) ;
return new IpAddressHost ( address ) ;
}
// Return asciiDomain.
@ -1690,8 +2031,11 @@ final class UrlParser {
@@ -1690,8 +2031,11 @@ final class UrlParser {
private static boolean endsInNumber ( String input ) {
// Let parts be the result of strictly splitting input on U+002E (.).
List < String > parts = tokenize ( input , "." ) ;
List < String > parts = strictSplit ( input , '.' ) ;
int lastIdx = parts . size ( ) - 1 ;
if ( lastIdx = = - 1 ) {
return false ;
}
// If the last item in parts is the empty string, then:
if ( parts . get ( lastIdx ) . isEmpty ( ) ) {
// If parts’s size is 1, then return false.
@ -1807,11 +2151,62 @@ final class UrlParser {
@@ -1807,11 +2151,62 @@ final class UrlParser {
}
}
record OpaqueHost ( String domain ) implements Host {
static final class OpaqueHost implements Host {
private final String host ;
public static OpaqueHost parse ( String input ) {
throw new UnsupportedOperationException ( "Not implemented yet" ) ;
private OpaqueHost ( String hos t ) {
this . host = host ;
}
/ * *
* The opaque - host parser takes a scalar value string input , and then runs these steps . They return failure or
* an opaque host .
* /
public static OpaqueHost parse ( String input , UrlParser p ) {
for ( int i = 0 ; i < input . length ( ) ; i + + ) {
char ch = input . charAt ( i ) ;
// If input contains a forbidden host code point, host-invalid-code-point validation error, return failure.
if ( isForbiddenHost ( ch ) ) {
throw new InvalidUrlException ( "An opaque host contains a forbidden host code point." ) ;
}
// If input contains a code point that is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
if ( p . validate ( ) & & ! isUrlCodePoint ( ch ) & & ch ! = '%' ) {
p . validationError ( "Code point \"" + ch + "\" is not a URL unit." ) ;
}
//If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, invalid-URL-unit validation error.
if ( p . validate ( ) & & ch = = '%' & & ( input . length ( ) - i < 2 | | ! isAsciiDigit ( input . charAt ( i + 1 ) ) | | ! isAsciiDigit ( input . charAt ( i + 2 ) ) ) ) {
p . validationError ( "Code point \"" + ch + "\" is not a URL unit." ) ;
}
}
//Return the result of running UTF-8 percent-encode on input using the C0 control percent-encode set.
String encoded = p . percentEncode ( input , UrlParser : : c0ControlPercentEncodeSet ) ;
return new OpaqueHost ( encoded ) ;
}
@Override
public boolean equals ( Object obj ) {
if ( obj = = this ) {
return true ;
}
else if ( obj instanceof OpaqueHost other ) {
return this . host . equals ( other . host ) ;
}
else {
return false ;
}
}
@Override
public int hashCode ( ) {
return this . host . hashCode ( ) ;
}
@Override
public String toString ( ) {
return this . host ;
}
}
static final class EmptyHost implements Host {
@ -1876,16 +2271,14 @@ final class UrlParser {
@@ -1876,16 +2271,14 @@ final class UrlParser {
return output . toString ( ) ;
}
public static Ipv4Address parse ( String input , @Nullable Consumer < String > validationErrorHandler ) {
public static Ipv4Address parse ( String input , UrlParser p ) {
// Let parts be the result of strictly splitting input on U+002E (.).
List < String > parts = tokenize ( input , "." ) ;
List < String > parts = strictSplit ( input , '.' ) ;
int partsSize = parts . size ( ) ;
// If the last item in parts is the empty string, then:
if ( parts . get ( partsSize - 1 ) . isEmpty ( ) ) {
// IPv4-empty-part validation error.
if ( validationErrorHandler ! = null ) {
validationErrorHandler . accept ( "IPv4 address ends with \".\"" ) ;
}
p . validationError ( "IPv4 address ends with \".\"" ) ;
// If parts’s size is greater than 1, then remove the last item from parts.
if ( partsSize > 1 ) {
parts . remove ( partsSize - 1 ) ;
@ -1903,8 +2296,8 @@ final class UrlParser {
@@ -1903,8 +2296,8 @@ final class UrlParser {
String part = parts . get ( i ) ;
// Let result be the result of parsing part.
ParseIpv4NumberResult result = parseIpv4Number ( part ) ;
if ( validationErrorHandler ! = null & & result . validationError ( ) ) {
validationErrorHandler . accept ( "The IPv4 address contains numbers expressed using hexadecimal or octal digits." ) ;
if ( p . validate ( ) & & result . validationError ( ) ) {
p . validationError ( "The IPv4 address contains numbers expressed using hexadecimal or octal digits." ) ;
}
// Append result to numbers.
numbers . add ( result . number ( ) ) ;
@ -1912,8 +2305,8 @@ final class UrlParser {
@@ -1912,8 +2305,8 @@ final class UrlParser {
for ( Iterator < Integer > iterator = numbers . iterator ( ) ; iterator . hasNext ( ) ; ) {
Integer number = iterator . next ( ) ;
// If any item in numbers is greater than 255, IPv4-out-of-range-part validation error.
if ( validationErrorHandler ! = null & & number > 255 ) {
validationErrorHandler . accept ( "An IPv4 address part exceeds 255." ) ;
if ( p . validate ( ) & & number > 255 ) {
p . validationError ( "An IPv4 address part exceeds 255." ) ;
}
if ( iterator . hasNext ( ) ) {
// If any but the last item in numbers is greater than 255, then return failure.
@ -2356,6 +2749,8 @@ final class UrlParser {
@@ -2356,6 +2749,8 @@ final class UrlParser {
boolean isOpaque ( ) ;
Path clone ( ) ;
String name ( ) ;
}
static final class PathSegment implements Path {
@ -2384,6 +2779,15 @@ final class UrlParser {
@@ -2384,6 +2779,15 @@ final class UrlParser {
this . segment . append ( s ) ;
}
@Override
public String name ( ) {
String name = segment ( ) ;
if ( name . startsWith ( "/" ) ) {
name = name . substring ( 1 ) ;
}
return name ;
}
@Override
public boolean isEmpty ( ) {
return this . segment . isEmpty ( ) ;
@ -2483,6 +2887,16 @@ final class UrlParser {
@@ -2483,6 +2887,16 @@ final class UrlParser {
return new PathSegments ( this . segments ) ;
}
@Override
public String name ( ) {
StringBuilder output = new StringBuilder ( ) ;
for ( PathSegment segment : this . segments ) {
output . append ( '/' ) ;
output . append ( segment . name ( ) ) ;
}
return output . toString ( ) ;
}
@Override
public boolean equals ( Object o ) {
if ( o = = this ) {