Improved URL matching

URLs with trailing special chars (e.g. slash, minus, ...) should now be
matched correctly, even when followed by a non-url char
This commit is contained in:
klonfish 2018-05-10 18:29:36 +02:00
parent 678fcf8b31
commit 41760b40ef
2 changed files with 18 additions and 3 deletions

View File

@ -123,7 +123,24 @@ public class MessageAdapter extends ArrayAdapter<Message> implements CopyTextVie
} }
} }
private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> start < 1 || (cs.charAt(start - 1) != '@' && cs.charAt(start - 1) != '.' && !cs.subSequence(Math.max(0, start - 3), start).equals("://")); private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> {
if (start > 0) {
if (cs.charAt(start - 1) == '@' || cs.charAt(start - 1) == '.'
|| cs.subSequence(Math.max(0, start - 3), start).equals("://")) {
return false;
}
}
if (end < cs.length()) {
// Reject strings that were probably matched only because they contain a dot followed by
// by some known TLD (see also comment for WORD_BOUNDARY in Patterns.java)
if (Character.isAlphabetic(cs.charAt(end-1)) && Character.isAlphabetic(cs.charAt(end))) {
return false;
}
}
return true;
};
private static final Linkify.MatchFilter XMPPURI_MATCH_FILTER = (s, start, end) -> { private static final Linkify.MatchFilter XMPPURI_MATCH_FILTER = (s, start, end) -> {
XmppUri uri = new XmppUri(s.subSequence(start, end).toString()); XmppUri uri = new XmppUri(s.subSequence(start, end).toString());

View File

@ -353,7 +353,6 @@ public class Patterns {
+ "(?:" + PORT_NUMBER + ")?" + "(?:" + PORT_NUMBER + ")?"
+ ")" + ")"
+ "(?:" + PATH_AND_QUERY + ")?" + "(?:" + PATH_AND_QUERY + ")?"
+ WORD_BOUNDARY
+ ")"; + ")";
/** /**
* Regular expression to match strings that start with a supported protocol. Rules for domain * Regular expression to match strings that start with a supported protocol. Rules for domain
@ -367,7 +366,6 @@ public class Patterns {
+ "(?:" + PORT_NUMBER + ")?" + "(?:" + PORT_NUMBER + ")?"
+ ")" + ")"
+ "(?:" + PATH_AND_QUERY + ")?" + "(?:" + PATH_AND_QUERY + ")?"
+ WORD_BOUNDARY
+ ")"; + ")";
/** /**
* Regular expression pattern to match IRIs. If a string starts with http(s):// the expression * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression