From 1e8488167c26ac1e780e374961d08f1fb1d1e880 Mon Sep 17 00:00:00 2001 From: Andrew Clemons Date: Thu, 9 Mar 2017 21:48:18 +1300 Subject: [PATCH] Add support for "narrow" python to emoji_builder.py The code currently only works with wide builds since it does not take UTF-16 surrogate pairs into account. --- third_party/color_emoji/emoji_builder.py | 41 ++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/third_party/color_emoji/emoji_builder.py b/third_party/color_emoji/emoji_builder.py index c118e83..3d3e0c8 100644 --- a/third_party/color_emoji/emoji_builder.py +++ b/third_party/color_emoji/emoji_builder.py @@ -25,10 +25,36 @@ from nototools import font_data +def myunichr(cp): + if sys.maxunicode < 0x10FFFF and cp > 0xFFFF: + return ("\\U" + hex(cp)[2:].zfill(8)).decode("unicode-escape") + return unichr(cp) + +def myord(high, low): + return (ord(high) - 0xD800) * 0x400 + (ord(low) - 0xDC00) + 0x10000 + +def begins_with_surrogate(string): + return sys.maxunicode < 0x10FFFF and len(string) > 1 and (0xD800 <= ord(string[0]) <= 0xDBFF) and (0xDC00 <= ord(string[1]) <= 0xDFFF) + def get_glyph_name_from_gsub (string, font, cmap_dict): ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures - first_glyph = cmap_dict[ord (string[0])] - rest_of_glyphs = [cmap_dict[ord (ch)] for ch in string[1:]] + + if begins_with_surrogate(string): + first_glyph = cmap_dict[myord(string[0], string[1])] + string = string[2:] + else: + first_glyph = cmap_dict[ord (string[0])] + string = string[1:] + + rest_of_glyphs = [] + while (len(string) > 0): + if begins_with_surrogate(string): + rest_of_glyphs.append(cmap_dict[myord(string[0], string[1])]) + string = string[2:] + else: + rest_of_glyphs.append(cmap_dict[ord (string[0])]) + string = string[1:] + for ligature in ligatures[first_glyph]: if ligature.Component == rest_of_glyphs: return ligature.LigGlyph @@ -462,13 +488,13 @@ def is_vs(cp): if "_" in codes: pieces = codes.split ("_") cps = [int(code, 16) for code in pieces] - uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)]) + uchars = "".join ([myunichr(cp) for cp in cps if not is_vs(cp)]) else: cp = int(codes, 16) if is_vs(cp): print "ignoring unexpected vs input %04x" % cp continue - uchars = unichr(cp) + uchars = myunichr(cp) img_files[uchars] = img_file if not img_files: raise Exception ("No image files found in '%s'." % glb) @@ -483,6 +509,13 @@ def is_vs(cp): except: print "no cmap entry for %x" % ord(uchars) raise ValueError("%x" % ord(uchars)) + elif len (uchars) == 2 and begins_with_surrogate(uchars): + cp = myord(uchars[0], uchars[1]) + try: + glyph_name = unicode_cmap.cmap[cp] + except: + print "no cmap entry for %x" % cp + raise ValueError("%x" % ord(uchars)) else: glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap) glyph_id = font.getGlyphID (glyph_name)