From 2ec1d0cc0942b89d865e75c56c5da439e514cc38 Mon Sep 17 00:00:00 2001 From: Daniel Gultsch Date: Mon, 16 Sep 2019 15:13:53 +0200 Subject: [PATCH] warn when using _only_ ambiguous cyrillic --- .../utils/IrregularUnicodeDetector.java | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java b/src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java index 42329c41a..b2ef794c8 100644 --- a/src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java +++ b/src/main/java/eu/siacs/conversations/utils/IrregularUnicodeDetector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Daniel Gultsch All rights reserved. + * Copyright (c) 2018-2019, Daniel Gultsch All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ import android.text.style.ForegroundColorSpan; import android.util.LruCache; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -57,6 +59,7 @@ public class IrregularUnicodeDetector { private static final Map NORMALIZATION_MAP; private static final LruCache CACHE = new LruCache<>(4096); + private static final List AMBIGUOUS_CYRILLIC = Arrays.asList("а","г","е","ѕ","і","q","о","р","с","у"); static { Map temp = new HashMap<>(); @@ -185,13 +188,41 @@ public class IrregularUnicodeDetector { private static Set findIrregularCodePoints(String word) { Set codePoints; if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) { - codePoints = eliminateFirstAndGetCodePointsCompat(mapCompat(word)); + final Map> map = mapCompat(word); + final Set set = asSet(map); + if (containsOnlyAmbiguousCyrillic(set)) { + return set; + } + codePoints = eliminateFirstAndGetCodePointsCompat(map); } else { - codePoints = eliminateFirstAndGetCodePoints(map(word)); + final Map> map = map(word); + final Set set = asSet(map); + if (containsOnlyAmbiguousCyrillic(set)) { + return set; + } + codePoints = eliminateFirstAndGetCodePoints(map); } return codePoints; } + private static Set asSet(Map> map) { + final Set flat = new HashSet<>(); + for(List value : map.values()) { + flat.addAll(value); + } + return flat; + } + + + private static boolean containsOnlyAmbiguousCyrillic(Collection codePoints) { + for (String codePoint : codePoints) { + if (!AMBIGUOUS_CYRILLIC.contains(codePoint)) { + return false; + } + } + return true; + } + private static PatternTuple find(Jid jid) { synchronized (CACHE) { PatternTuple pattern = CACHE.get(jid);