fix emojis not rendering correctly with trailing variant selector. fixes #3819

This commit is contained in:
Daniel Gultsch 2020-07-09 17:43:30 +02:00
parent 14bb8b0cf1
commit 71a56002fe
1 changed files with 228 additions and 227 deletions

View File

@ -29,280 +29,281 @@
package eu.siacs.conversations.utils; package eu.siacs.conversations.utils;
import android.support.annotation.NonNull;
import android.util.LruCache; import android.util.LruCache;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class Emoticons { public class Emoticons {
private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF); private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF); private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F); private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F);
private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF); private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF); private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF); private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF); private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF); private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF); private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF); private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF); private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF); private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F); private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D); private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139); private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39)); private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks( private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
GEOMETRIC_SHAPES, GEOMETRIC_SHAPES,
LATIN_SUPPLEMENT, LATIN_SUPPLEMENT,
CYK_SYMBOLS_AND_PUNCTUATION, CYK_SYMBOLS_AND_PUNCTUATION,
LETTERLIKE_SYMBOLS, LETTERLIKE_SYMBOLS,
KEYCAP_COMBINEABLE); KEYCAP_COMBINEABLE);
private static final UnicodeBlocks EMOJIS = new UnicodeBlocks( private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
MISC_SYMBOLS_AND_PICTOGRAPHS, MISC_SYMBOLS_AND_PICTOGRAPHS,
SUPPLEMENTAL_SYMBOLS, SUPPLEMENTAL_SYMBOLS,
EMOTICONS, EMOTICONS,
TRANSPORT_SYMBOLS, TRANSPORT_SYMBOLS,
MISC_SYMBOLS, MISC_SYMBOLS,
DINGBATS, DINGBATS,
ENCLOSED_ALPHANUMERIC_SUPPLEMENT, ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
MISC_TECHNICAL); MISC_TECHNICAL);
private static final int MAX_EMOIJS = 42; private static final int MAX_EMOIJS = 42;
private static final int ZWJ = 0x200D; private static final int ZWJ = 0x200D;
private static final int VARIATION_16 = 0xFE0F; private static final int VARIATION_16 = 0xFE0F;
private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3; private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
private static final int BLACK_FLAG = 0x1F3F4; private static final int BLACK_FLAG = 0x1F3F4;
private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF); private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
private static final LruCache<CharSequence,Pattern> CACHE = new LruCache<>(256); private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
private static List<Symbol> parse(String input) { private static List<Symbol> parse(String input) {
List<Symbol> symbols = new ArrayList<>(); List<Symbol> symbols = new ArrayList<>();
Builder builder = new Builder(); Builder builder = new Builder();
boolean needsFinalBuild = false; boolean needsFinalBuild = false;
for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) { for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
cp = input.codePointAt(i); cp = input.codePointAt(i);
if (builder.offer(cp)) { if (builder.offer(cp)) {
needsFinalBuild = true; needsFinalBuild = true;
} else { } else {
symbols.add(builder.build()); symbols.add(builder.build());
builder = new Builder(); builder = new Builder();
if (builder.offer(cp)) { if (builder.offer(cp)) {
needsFinalBuild = true; needsFinalBuild = true;
} }
} }
} }
if (needsFinalBuild) { if (needsFinalBuild) {
symbols.add(builder.build()); symbols.add(builder.build());
} }
return symbols; return symbols;
} }
public static Pattern getEmojiPattern(CharSequence input) { public static Pattern getEmojiPattern(final CharSequence input) {
Pattern pattern = CACHE.get(input); Pattern pattern = CACHE.get(input);
if (pattern == null) { if (pattern == null) {
pattern = generatePattern(input); pattern = generatePattern(input);
CACHE.put(input, pattern); CACHE.put(input, pattern);
} }
return pattern; return pattern;
} }
private static Pattern generatePattern(CharSequence input) { private static Pattern generatePattern(CharSequence input) {
final HashSet<String> emojis = new HashSet<>(); final HashSet<String> emojis = new HashSet<>();
int i = 0; int i = 0;
for(Symbol symbol : parse(input.toString())) { for (final Symbol symbol : parse(input.toString())) {
if (symbol instanceof Emoji) { if (symbol instanceof Emoji) {
emojis.add(symbol.toString()); emojis.add(symbol.toString());
if (++i >= MAX_EMOIJS) { if (++i >= MAX_EMOIJS) {
return Pattern.compile(""); return Pattern.compile("");
} }
} }
} }
final StringBuilder pattern = new StringBuilder(); final StringBuilder pattern = new StringBuilder();
for(String emoji : emojis) { for (String emoji : emojis) {
if (pattern.length() != 0) { if (pattern.length() != 0) {
pattern.append('|'); pattern.append('|');
} }
pattern.append(Pattern.quote(emoji)); pattern.append(Pattern.quote(emoji));
} }
return Pattern.compile(pattern.toString()); return Pattern.compile(pattern.toString());
} }
public static boolean isEmoji(String input) { public static boolean isEmoji(String input) {
List<Symbol> symbols = parse(input); List<Symbol> symbols = parse(input);
return symbols.size() == 1 && symbols.get(0).isEmoji(); return symbols.size() == 1 && symbols.get(0).isEmoji();
} }
public static boolean isOnlyEmoji(String input) { public static boolean isOnlyEmoji(String input) {
List<Symbol> symbols = parse(input); List<Symbol> symbols = parse(input);
for(Symbol symbol : symbols) { for (Symbol symbol : symbols) {
if (!symbol.isEmoji()) { if (!symbol.isEmoji()) {
return false; return false;
} }
} }
return symbols.size() > 0; return symbols.size() > 0;
} }
private static abstract class Symbol { private static abstract class Symbol {
private final String value; private final String value;
public Symbol(List<Integer> codepoints) { Symbol(List<Integer> codepoints) {
StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
for(Integer codepoint : codepoints) { for (final Integer codepoint : codepoints) {
builder.appendCodePoint(codepoint); builder.appendCodePoint(codepoint);
} }
this.value = builder.toString(); this.value = builder.toString();
} }
abstract boolean isEmoji(); abstract boolean isEmoji();
@Override @NonNull
public String toString() { @Override
return value; public String toString() {
} return value;
} }
}
public static class Emoji extends Symbol { public static class Emoji extends Symbol {
public Emoji(List<Integer> codepoints) { Emoji(List<Integer> codepoints) {
super(codepoints); super(codepoints);
} }
@Override @Override
boolean isEmoji() { boolean isEmoji() {
return true; return true;
} }
} }
public static class Other extends Symbol { public static class Other extends Symbol {
public Other(List<Integer> codepoints) { public Other(List<Integer> codepoints) {
super(codepoints); super(codepoints);
} }
@Override @Override
boolean isEmoji() { boolean isEmoji() {
return false; return false;
} }
} }
private static class Builder { private static class Builder {
private final List<Integer> codepoints = new ArrayList<>(); private final List<Integer> codepoints = new ArrayList<>();
public boolean offer(int codepoint) { public boolean offer(int codepoint) {
boolean add = false; boolean add = false;
if (this.codepoints.size() == 0) { if (this.codepoints.size() == 0) {
if (SYMBOLIZE.contains(codepoint)) { if (SYMBOLIZE.contains(codepoint)) {
add = true; add = true;
} else if (REGIONAL_INDICATORS.contains(codepoint)) { } else if (REGIONAL_INDICATORS.contains(codepoint)) {
add = true; add = true;
} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) { } else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
add = true; add = true;
} }
} else { } else {
int previous = codepoints.get(codepoints.size() -1); int previous = codepoints.get(codepoints.size() - 1);
if (codepoints.get(0) == BLACK_FLAG) { if (codepoints.get(0) == BLACK_FLAG) {
add = TAGS.contains(codepoint); add = TAGS.contains(codepoint);
} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) { } else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16; add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
} else if (SYMBOLIZE.contains(previous)) { } else if (SYMBOLIZE.contains(previous)) {
add = codepoint == VARIATION_16; add = codepoint == VARIATION_16;
} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) { } else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
add = codepoints.size() == 1; add = codepoints.size() == 1;
} else if (previous == VARIATION_16) { } else if (previous == VARIATION_16) {
add = isMerger(codepoint); add = isMerger(codepoint) || codepoint == VARIATION_16;
} else if (FITZPATRICK.contains(previous)) { } else if (FITZPATRICK.contains(previous)) {
add = codepoint == ZWJ; add = codepoint == ZWJ;
} else if (ZWJ == previous) { } else if (ZWJ == previous) {
add = EMOJIS.contains(codepoint); add = EMOJIS.contains(codepoint);
} else if (isMerger(codepoint)) { } else if (isMerger(codepoint)) {
add = true; add = true;
} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) { } else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
add = true; add = true;
} }
} }
if (add) { if (add) {
codepoints.add(codepoint); codepoints.add(codepoint);
return true; return true;
} else { } else {
return false; return false;
} }
} }
private static boolean isMerger(int codepoint) { private static boolean isMerger(int codepoint) {
return codepoint == ZWJ || FITZPATRICK.contains(codepoint); return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
} }
public Symbol build() { public Symbol build() {
if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) { if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
return new Other(codepoints); return new Other(codepoints);
} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) { } else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
return new Other(codepoints); return new Other(codepoints);
} }
return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints); return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
} }
} }
public static class UnicodeBlocks implements UnicodeSet { public static class UnicodeBlocks implements UnicodeSet {
final UnicodeSet[] unicodeSets; final UnicodeSet[] unicodeSets;
public UnicodeBlocks(UnicodeSet... sets) { UnicodeBlocks(final UnicodeSet... sets) {
this.unicodeSets = sets; this.unicodeSets = sets;
} }
@Override @Override
public boolean contains(int codepoint) { public boolean contains(int codepoint) {
for(UnicodeSet unicodeSet : unicodeSets) { for (UnicodeSet unicodeSet : unicodeSets) {
if (unicodeSet.contains(codepoint)) { if (unicodeSet.contains(codepoint)) {
return true; return true;
} }
} }
return false; return false;
} }
} }
public interface UnicodeSet { public interface UnicodeSet {
boolean contains(int codepoint); boolean contains(int codepoint);
} }
public static class UnicodeList implements UnicodeSet { public static class UnicodeList implements UnicodeSet {
private final List<Integer> list; private final List<Integer> list;
public UnicodeList(Integer... codes) { UnicodeList(final Integer... codes) {
this.list = Arrays.asList(codes); this.list = Arrays.asList(codes);
} }
@Override @Override
public boolean contains(int codepoint) { public boolean contains(int codepoint) {
return this.list.contains(codepoint); return this.list.contains(codepoint);
} }
} }
public static class UnicodeRange implements UnicodeSet { public static class UnicodeRange implements UnicodeSet {
private final int lower; private final int lower;
private final int upper; private final int upper;
UnicodeRange(int lower, int upper) { UnicodeRange(int lower, int upper) {
this.lower = lower; this.lower = lower;
this.upper = upper; this.upper = upper;
} }
public boolean contains(int codePoint) { public boolean contains(int codePoint) {
return codePoint >= lower && codePoint <= upper; return codePoint >= lower && codePoint <= upper;
} }
} }
} }