From ca08c27eef6493d799fdd6600ac8cc9e36ba0002 Mon Sep 17 00:00:00 2001 From: Millesimus Date: Sat, 4 Sep 2021 17:33:25 +0200 Subject: [PATCH] Parse IPv6 URIs (#3841). --- .../siacs/conversations/utils/Patterns.java | 39 +++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/main/java/eu/siacs/conversations/utils/Patterns.java b/src/main/java/eu/siacs/conversations/utils/Patterns.java index 0ee96b74c..02a437345 100644 --- a/src/main/java/eu/siacs/conversations/utils/Patterns.java +++ b/src/main/java/eu/siacs/conversations/utils/Patterns.java @@ -254,6 +254,39 @@ public class Patterns { + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]" + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" + "|[1-9][0-9]|[0-9]))"); + + /** + * IPv6 address matcher for + * IPv6 addresses + * zero compressed IPv6 addresses (section 2.2 of rfc5952) + * link-local IPv6 addresses with zone index (section 11 of rfc4007) + * IPv4-Embedded IPv6 Address (section 2 of rfc6052) + * IPv4-mapped IPv6 addresses (section 2.1 of rfc2765) + * IPv4-translated addresses (section 2.1 of rfc2765) + * + * Taken from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses/17871737#17871737 + */ + public static final Pattern IP6_ADDRESS + = Pattern.compile( + "\\[" + + "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" + + "([0-9a-fA-F]{1,4}:){1,7}:|" + + "([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" + + "([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" + + "([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" + + "([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" + + "([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" + + "[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" + + ":((:[0-9a-fA-F]{1,4}){1,7}|:)|" + + "fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" + + "::(ffff(:0{1,4}){0,1}:){0,1}" + + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" + + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" + + "([0-9a-fA-F]{1,4}:){1,4}:" + + "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" + + "(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" + + "\\]" + ); /** * Valid UCS characters defined in RFC 3987. Excludes space characters. */ @@ -296,7 +329,7 @@ public class Patterns { private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")"; private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD; public static final Pattern DOMAIN_NAME - = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")"); + = Pattern.compile("(" + HOST_NAME + "|" + IP6_ADDRESS + "|" + IP_ADDRESS +")"); private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/"; /* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */ private static final String WORD_BOUNDARY = "(?:\\b|$|^)"; @@ -341,12 +374,12 @@ public class Patterns { * {@link #IP_ADDRESS} */ private static final Pattern STRICT_DOMAIN_NAME - = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")"); + = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")"); /** * Regular expression that matches domain names without a TLD */ private static final String RELAXED_DOMAIN_NAME = - "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")"; + "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")"; /** * Regular expression to match strings that do not start with a supported protocol. The TLDs * are expected to be one of the known TLDs.