Parse IPv6 URIs (#3841).

This commit is contained in:
Millesimus 2021-09-04 17:33:25 +02:00 committed by Daniel Gultsch
parent 4040d5f647
commit ca08c27eef
1 changed files with 36 additions and 3 deletions

View File

@ -254,6 +254,39 @@ public class Patterns {
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9]))");
/**
* IPv6 address matcher for
* IPv6 addresses
* zero compressed IPv6 addresses (section 2.2 of rfc5952)
* link-local IPv6 addresses with zone index (section 11 of rfc4007)
* IPv4-Embedded IPv6 Address (section 2 of rfc6052)
* IPv4-mapped IPv6 addresses (section 2.1 of rfc2765)
* IPv4-translated addresses (section 2.1 of rfc2765)
*
* Taken from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses/17871737#17871737
*/
public static final Pattern IP6_ADDRESS
= Pattern.compile(
"\\[" +
"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" +
"([0-9a-fA-F]{1,4}:){1,7}:|" +
"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" +
"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" +
"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" +
"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" +
"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" +
"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" +
":((:[0-9a-fA-F]{1,4}){1,7}|:)|" +
"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" +
"::(ffff(:0{1,4}){0,1}:){0,1}" +
"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" +
"([0-9a-fA-F]{1,4}:){1,4}:" +
"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" +
"\\]"
);
/**
* Valid UCS characters defined in RFC 3987. Excludes space characters.
*/
@ -296,7 +329,7 @@ public class Patterns {
private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
public static final Pattern DOMAIN_NAME
= Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
= Pattern.compile("(" + HOST_NAME + "|" + IP6_ADDRESS + "|" + IP_ADDRESS +")");
private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
/* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */
private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
@ -341,12 +374,12 @@ public class Patterns {
* {@link #IP_ADDRESS}
*/
private static final Pattern STRICT_DOMAIN_NAME
= Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")");
= Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")");
/**
* Regular expression that matches domain names without a TLD
*/
private static final String RELAXED_DOMAIN_NAME =
"(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")";
"(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")";
/**
* Regular expression to match strings that do not start with a supported protocol. The TLDs
* are expected to be one of the known TLDs.