fix emojis not rendering correctly with trailing variant selector. fixes #3819
This commit is contained in:
parent
14bb8b0cf1
commit
71a56002fe
|
@ -29,280 +29,281 @@
|
||||||
|
|
||||||
package eu.siacs.conversations.utils;
|
package eu.siacs.conversations.utils;
|
||||||
|
|
||||||
|
import android.support.annotation.NonNull;
|
||||||
import android.util.LruCache;
|
import android.util.LruCache;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class Emoticons {
|
public class Emoticons {
|
||||||
|
|
||||||
private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300,0x1F5FF);
|
private static final UnicodeRange MISC_SYMBOLS_AND_PICTOGRAPHS = new UnicodeRange(0x1F300, 0x1F5FF);
|
||||||
private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900,0x1F9FF);
|
private static final UnicodeRange SUPPLEMENTAL_SYMBOLS = new UnicodeRange(0x1F900, 0x1F9FF);
|
||||||
private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600,0x1F64F);
|
private static final UnicodeRange EMOTICONS = new UnicodeRange(0x1F600, 0x1F64F);
|
||||||
private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680,0x1F6FF);
|
private static final UnicodeRange TRANSPORT_SYMBOLS = new UnicodeRange(0x1F680, 0x1F6FF);
|
||||||
private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600,0x26FF);
|
private static final UnicodeRange MISC_SYMBOLS = new UnicodeRange(0x2600, 0x26FF);
|
||||||
private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700,0x27BF);
|
private static final UnicodeRange DINGBATS = new UnicodeRange(0x2700, 0x27BF);
|
||||||
private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100,0x1F1FF);
|
private static final UnicodeRange ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeRange(0x1F100, 0x1F1FF);
|
||||||
private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200,0x1F2FF);
|
private static final UnicodeRange ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeRange(0x1F200, 0x1F2FF);
|
||||||
private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6,0x1F1FF);
|
private static final UnicodeRange REGIONAL_INDICATORS = new UnicodeRange(0x1F1E6, 0x1F1FF);
|
||||||
private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0,0x25FF);
|
private static final UnicodeRange GEOMETRIC_SHAPES = new UnicodeRange(0x25A0, 0x25FF);
|
||||||
private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80,0xFF);
|
private static final UnicodeRange LATIN_SUPPLEMENT = new UnicodeRange(0x80, 0xFF);
|
||||||
private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300,0x23FF);
|
private static final UnicodeRange MISC_TECHNICAL = new UnicodeRange(0x2300, 0x23FF);
|
||||||
private static final UnicodeRange TAGS = new UnicodeRange(0xE0020,0xE007F);
|
private static final UnicodeRange TAGS = new UnicodeRange(0xE0020, 0xE007F);
|
||||||
private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030,0x303D);
|
private static final UnicodeList CYK_SYMBOLS_AND_PUNCTUATION = new UnicodeList(0x3030, 0x303D);
|
||||||
private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122,0x2139);
|
private static final UnicodeList LETTERLIKE_SYMBOLS = new UnicodeList(0x2122, 0x2139);
|
||||||
|
|
||||||
private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23),new UnicodeList(0x2A),new UnicodeRange(0x30,0x39));
|
private static final UnicodeBlocks KEYCAP_COMBINEABLE = new UnicodeBlocks(new UnicodeList(0x23), new UnicodeList(0x2A), new UnicodeRange(0x30, 0x39));
|
||||||
|
|
||||||
private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
|
private static final UnicodeBlocks SYMBOLIZE = new UnicodeBlocks(
|
||||||
GEOMETRIC_SHAPES,
|
GEOMETRIC_SHAPES,
|
||||||
LATIN_SUPPLEMENT,
|
LATIN_SUPPLEMENT,
|
||||||
CYK_SYMBOLS_AND_PUNCTUATION,
|
CYK_SYMBOLS_AND_PUNCTUATION,
|
||||||
LETTERLIKE_SYMBOLS,
|
LETTERLIKE_SYMBOLS,
|
||||||
KEYCAP_COMBINEABLE);
|
KEYCAP_COMBINEABLE);
|
||||||
private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
|
private static final UnicodeBlocks EMOJIS = new UnicodeBlocks(
|
||||||
MISC_SYMBOLS_AND_PICTOGRAPHS,
|
MISC_SYMBOLS_AND_PICTOGRAPHS,
|
||||||
SUPPLEMENTAL_SYMBOLS,
|
SUPPLEMENTAL_SYMBOLS,
|
||||||
EMOTICONS,
|
EMOTICONS,
|
||||||
TRANSPORT_SYMBOLS,
|
TRANSPORT_SYMBOLS,
|
||||||
MISC_SYMBOLS,
|
MISC_SYMBOLS,
|
||||||
DINGBATS,
|
DINGBATS,
|
||||||
ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
|
ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
|
||||||
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
|
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
|
||||||
MISC_TECHNICAL);
|
MISC_TECHNICAL);
|
||||||
|
|
||||||
private static final int MAX_EMOIJS = 42;
|
private static final int MAX_EMOIJS = 42;
|
||||||
|
|
||||||
private static final int ZWJ = 0x200D;
|
private static final int ZWJ = 0x200D;
|
||||||
private static final int VARIATION_16 = 0xFE0F;
|
private static final int VARIATION_16 = 0xFE0F;
|
||||||
private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
|
private static final int COMBINING_ENCLOSING_KEYCAP = 0x20E3;
|
||||||
private static final int BLACK_FLAG = 0x1F3F4;
|
private static final int BLACK_FLAG = 0x1F3F4;
|
||||||
private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB,0x1F3FF);
|
private static final UnicodeRange FITZPATRICK = new UnicodeRange(0x1F3FB, 0x1F3FF);
|
||||||
|
|
||||||
private static final LruCache<CharSequence,Pattern> CACHE = new LruCache<>(256);
|
private static final LruCache<CharSequence, Pattern> CACHE = new LruCache<>(256);
|
||||||
|
|
||||||
private static List<Symbol> parse(String input) {
|
private static List<Symbol> parse(String input) {
|
||||||
List<Symbol> symbols = new ArrayList<>();
|
List<Symbol> symbols = new ArrayList<>();
|
||||||
Builder builder = new Builder();
|
Builder builder = new Builder();
|
||||||
boolean needsFinalBuild = false;
|
boolean needsFinalBuild = false;
|
||||||
for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
|
for (int cp, i = 0; i < input.length(); i += Character.charCount(cp)) {
|
||||||
cp = input.codePointAt(i);
|
cp = input.codePointAt(i);
|
||||||
if (builder.offer(cp)) {
|
if (builder.offer(cp)) {
|
||||||
needsFinalBuild = true;
|
needsFinalBuild = true;
|
||||||
} else {
|
} else {
|
||||||
symbols.add(builder.build());
|
symbols.add(builder.build());
|
||||||
builder = new Builder();
|
builder = new Builder();
|
||||||
if (builder.offer(cp)) {
|
if (builder.offer(cp)) {
|
||||||
needsFinalBuild = true;
|
needsFinalBuild = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (needsFinalBuild) {
|
if (needsFinalBuild) {
|
||||||
symbols.add(builder.build());
|
symbols.add(builder.build());
|
||||||
}
|
}
|
||||||
return symbols;
|
return symbols;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pattern getEmojiPattern(CharSequence input) {
|
public static Pattern getEmojiPattern(final CharSequence input) {
|
||||||
Pattern pattern = CACHE.get(input);
|
Pattern pattern = CACHE.get(input);
|
||||||
if (pattern == null) {
|
if (pattern == null) {
|
||||||
pattern = generatePattern(input);
|
pattern = generatePattern(input);
|
||||||
CACHE.put(input, pattern);
|
CACHE.put(input, pattern);
|
||||||
}
|
}
|
||||||
return pattern;
|
return pattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Pattern generatePattern(CharSequence input) {
|
private static Pattern generatePattern(CharSequence input) {
|
||||||
final HashSet<String> emojis = new HashSet<>();
|
final HashSet<String> emojis = new HashSet<>();
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for(Symbol symbol : parse(input.toString())) {
|
for (final Symbol symbol : parse(input.toString())) {
|
||||||
if (symbol instanceof Emoji) {
|
if (symbol instanceof Emoji) {
|
||||||
emojis.add(symbol.toString());
|
emojis.add(symbol.toString());
|
||||||
if (++i >= MAX_EMOIJS) {
|
if (++i >= MAX_EMOIJS) {
|
||||||
return Pattern.compile("");
|
return Pattern.compile("");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final StringBuilder pattern = new StringBuilder();
|
final StringBuilder pattern = new StringBuilder();
|
||||||
for(String emoji : emojis) {
|
for (String emoji : emojis) {
|
||||||
if (pattern.length() != 0) {
|
if (pattern.length() != 0) {
|
||||||
pattern.append('|');
|
pattern.append('|');
|
||||||
}
|
}
|
||||||
pattern.append(Pattern.quote(emoji));
|
pattern.append(Pattern.quote(emoji));
|
||||||
}
|
}
|
||||||
return Pattern.compile(pattern.toString());
|
return Pattern.compile(pattern.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isEmoji(String input) {
|
public static boolean isEmoji(String input) {
|
||||||
List<Symbol> symbols = parse(input);
|
List<Symbol> symbols = parse(input);
|
||||||
return symbols.size() == 1 && symbols.get(0).isEmoji();
|
return symbols.size() == 1 && symbols.get(0).isEmoji();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isOnlyEmoji(String input) {
|
public static boolean isOnlyEmoji(String input) {
|
||||||
List<Symbol> symbols = parse(input);
|
List<Symbol> symbols = parse(input);
|
||||||
for(Symbol symbol : symbols) {
|
for (Symbol symbol : symbols) {
|
||||||
if (!symbol.isEmoji()) {
|
if (!symbol.isEmoji()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return symbols.size() > 0;
|
return symbols.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static abstract class Symbol {
|
private static abstract class Symbol {
|
||||||
|
|
||||||
private final String value;
|
private final String value;
|
||||||
|
|
||||||
public Symbol(List<Integer> codepoints) {
|
Symbol(List<Integer> codepoints) {
|
||||||
StringBuilder builder = new StringBuilder();
|
final StringBuilder builder = new StringBuilder();
|
||||||
for(Integer codepoint : codepoints) {
|
for (final Integer codepoint : codepoints) {
|
||||||
builder.appendCodePoint(codepoint);
|
builder.appendCodePoint(codepoint);
|
||||||
}
|
}
|
||||||
this.value = builder.toString();
|
this.value = builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract boolean isEmoji();
|
abstract boolean isEmoji();
|
||||||
|
|
||||||
@Override
|
@NonNull
|
||||||
public String toString() {
|
@Override
|
||||||
return value;
|
public String toString() {
|
||||||
}
|
return value;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class Emoji extends Symbol {
|
public static class Emoji extends Symbol {
|
||||||
|
|
||||||
public Emoji(List<Integer> codepoints) {
|
Emoji(List<Integer> codepoints) {
|
||||||
super(codepoints);
|
super(codepoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
boolean isEmoji() {
|
boolean isEmoji() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Other extends Symbol {
|
public static class Other extends Symbol {
|
||||||
|
|
||||||
public Other(List<Integer> codepoints) {
|
public Other(List<Integer> codepoints) {
|
||||||
super(codepoints);
|
super(codepoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
boolean isEmoji() {
|
boolean isEmoji() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class Builder {
|
private static class Builder {
|
||||||
private final List<Integer> codepoints = new ArrayList<>();
|
private final List<Integer> codepoints = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
public boolean offer(int codepoint) {
|
public boolean offer(int codepoint) {
|
||||||
boolean add = false;
|
boolean add = false;
|
||||||
if (this.codepoints.size() == 0) {
|
if (this.codepoints.size() == 0) {
|
||||||
if (SYMBOLIZE.contains(codepoint)) {
|
if (SYMBOLIZE.contains(codepoint)) {
|
||||||
add = true;
|
add = true;
|
||||||
} else if (REGIONAL_INDICATORS.contains(codepoint)) {
|
} else if (REGIONAL_INDICATORS.contains(codepoint)) {
|
||||||
add = true;
|
add = true;
|
||||||
} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
|
} else if (EMOJIS.contains(codepoint) && !FITZPATRICK.contains(codepoint) && codepoint != ZWJ) {
|
||||||
add = true;
|
add = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int previous = codepoints.get(codepoints.size() -1);
|
int previous = codepoints.get(codepoints.size() - 1);
|
||||||
if (codepoints.get(0) == BLACK_FLAG) {
|
if (codepoints.get(0) == BLACK_FLAG) {
|
||||||
add = TAGS.contains(codepoint);
|
add = TAGS.contains(codepoint);
|
||||||
} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
|
} else if (COMBINING_ENCLOSING_KEYCAP == codepoint) {
|
||||||
add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
|
add = KEYCAP_COMBINEABLE.contains(previous) || previous == VARIATION_16;
|
||||||
} else if (SYMBOLIZE.contains(previous)) {
|
} else if (SYMBOLIZE.contains(previous)) {
|
||||||
add = codepoint == VARIATION_16;
|
add = codepoint == VARIATION_16;
|
||||||
} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
|
} else if (REGIONAL_INDICATORS.contains(previous) && REGIONAL_INDICATORS.contains(codepoint)) {
|
||||||
add = codepoints.size() == 1;
|
add = codepoints.size() == 1;
|
||||||
} else if (previous == VARIATION_16) {
|
} else if (previous == VARIATION_16) {
|
||||||
add = isMerger(codepoint);
|
add = isMerger(codepoint) || codepoint == VARIATION_16;
|
||||||
} else if (FITZPATRICK.contains(previous)) {
|
} else if (FITZPATRICK.contains(previous)) {
|
||||||
add = codepoint == ZWJ;
|
add = codepoint == ZWJ;
|
||||||
} else if (ZWJ == previous) {
|
} else if (ZWJ == previous) {
|
||||||
add = EMOJIS.contains(codepoint);
|
add = EMOJIS.contains(codepoint);
|
||||||
} else if (isMerger(codepoint)) {
|
} else if (isMerger(codepoint)) {
|
||||||
add = true;
|
add = true;
|
||||||
} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
|
} else if (codepoint == VARIATION_16 && EMOJIS.contains(previous)) {
|
||||||
add = true;
|
add = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (add) {
|
if (add) {
|
||||||
codepoints.add(codepoint);
|
codepoints.add(codepoint);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isMerger(int codepoint) {
|
private static boolean isMerger(int codepoint) {
|
||||||
return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
|
return codepoint == ZWJ || FITZPATRICK.contains(codepoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Symbol build() {
|
public Symbol build() {
|
||||||
if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
|
if (codepoints.size() > 0 && SYMBOLIZE.contains(codepoints.get(codepoints.size() - 1))) {
|
||||||
return new Other(codepoints);
|
return new Other(codepoints);
|
||||||
} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
|
} else if (codepoints.size() > 1 && KEYCAP_COMBINEABLE.contains(codepoints.get(0)) && codepoints.get(codepoints.size() - 1) != COMBINING_ENCLOSING_KEYCAP) {
|
||||||
return new Other(codepoints);
|
return new Other(codepoints);
|
||||||
}
|
}
|
||||||
return codepoints.size() == 0 ? new Other(codepoints): new Emoji(codepoints);
|
return codepoints.size() == 0 ? new Other(codepoints) : new Emoji(codepoints);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class UnicodeBlocks implements UnicodeSet {
|
public static class UnicodeBlocks implements UnicodeSet {
|
||||||
final UnicodeSet[] unicodeSets;
|
final UnicodeSet[] unicodeSets;
|
||||||
|
|
||||||
public UnicodeBlocks(UnicodeSet... sets) {
|
UnicodeBlocks(final UnicodeSet... sets) {
|
||||||
this.unicodeSets = sets;
|
this.unicodeSets = sets;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(int codepoint) {
|
public boolean contains(int codepoint) {
|
||||||
for(UnicodeSet unicodeSet : unicodeSets) {
|
for (UnicodeSet unicodeSet : unicodeSets) {
|
||||||
if (unicodeSet.contains(codepoint)) {
|
if (unicodeSet.contains(codepoint)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface UnicodeSet {
|
public interface UnicodeSet {
|
||||||
boolean contains(int codepoint);
|
boolean contains(int codepoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class UnicodeList implements UnicodeSet {
|
public static class UnicodeList implements UnicodeSet {
|
||||||
|
|
||||||
private final List<Integer> list;
|
private final List<Integer> list;
|
||||||
|
|
||||||
public UnicodeList(Integer... codes) {
|
UnicodeList(final Integer... codes) {
|
||||||
this.list = Arrays.asList(codes);
|
this.list = Arrays.asList(codes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(int codepoint) {
|
public boolean contains(int codepoint) {
|
||||||
return this.list.contains(codepoint);
|
return this.list.contains(codepoint);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static class UnicodeRange implements UnicodeSet {
|
public static class UnicodeRange implements UnicodeSet {
|
||||||
|
|
||||||
private final int lower;
|
private final int lower;
|
||||||
private final int upper;
|
private final int upper;
|
||||||
|
|
||||||
UnicodeRange(int lower, int upper) {
|
UnicodeRange(int lower, int upper) {
|
||||||
this.lower = lower;
|
this.lower = lower;
|
||||||
this.upper = upper;
|
this.upper = upper;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains(int codePoint) {
|
public boolean contains(int codePoint) {
|
||||||
return codePoint >= lower && codePoint <= upper;
|
return codePoint >= lower && codePoint <= upper;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue