1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
From 1e8488167c26ac1e780e374961d08f1fb1d1e880 Mon Sep 17 00:00:00 2001
From: Andrew Clemons <andrew.clemons@gmail.com>
Date: Thu, 9 Mar 2017 21:48:18 +1300
Subject: [PATCH] Add support for "narrow" python to emoji_builder.py
The code currently only works with wide builds since it does not take
UTF-16 surrogate pairs into account.
---
third_party/color_emoji/emoji_builder.py | 41 ++++++++++++++++++++++++++++----
1 file changed, 37 insertions(+), 4 deletions(-)
diff --git a/third_party/color_emoji/emoji_builder.py b/third_party/color_emoji/emoji_builder.py
index c118e83..3d3e0c8 100644
--- a/third_party/color_emoji/emoji_builder.py
+++ b/third_party/color_emoji/emoji_builder.py
@@ -25,10 +25,36 @@
from nototools import font_data
+def myunichr(cp):
+ if sys.maxunicode < 0x10FFFF and cp > 0xFFFF:
+ return ("\\U" + hex(cp)[2:].zfill(8)).decode("unicode-escape")
+ return unichr(cp)
+
+def myord(high, low):
+ return (ord(high) - 0xD800) * 0x400 + (ord(low) - 0xDC00) + 0x10000
+
+def begins_with_surrogate(string):
+ return sys.maxunicode < 0x10FFFF and len(string) > 1 and (0xD800 <= ord(string[0]) <= 0xDBFF) and (0xDC00 <= ord(string[1]) <= 0xDFFF)
+
def get_glyph_name_from_gsub (string, font, cmap_dict):
ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures
- first_glyph = cmap_dict[ord (string[0])]
- rest_of_glyphs = [cmap_dict[ord (ch)] for ch in string[1:]]
+
+ if begins_with_surrogate(string):
+ first_glyph = cmap_dict[myord(string[0], string[1])]
+ string = string[2:]
+ else:
+ first_glyph = cmap_dict[ord (string[0])]
+ string = string[1:]
+
+ rest_of_glyphs = []
+ while (len(string) > 0):
+ if begins_with_surrogate(string):
+ rest_of_glyphs.append(cmap_dict[myord(string[0], string[1])])
+ string = string[2:]
+ else:
+ rest_of_glyphs.append(cmap_dict[ord (string[0])])
+ string = string[1:]
+
for ligature in ligatures[first_glyph]:
if ligature.Component == rest_of_glyphs:
return ligature.LigGlyph
@@ -462,13 +488,13 @@ def is_vs(cp):
if "_" in codes:
pieces = codes.split ("_")
cps = [int(code, 16) for code in pieces]
- uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)])
+ uchars = "".join ([myunichr(cp) for cp in cps if not is_vs(cp)])
else:
cp = int(codes, 16)
if is_vs(cp):
print "ignoring unexpected vs input %04x" % cp
continue
- uchars = unichr(cp)
+ uchars = myunichr(cp)
img_files[uchars] = img_file
if not img_files:
raise Exception ("No image files found in '%s'." % glb)
@@ -483,6 +509,13 @@ def is_vs(cp):
except:
print "no cmap entry for %x" % ord(uchars)
raise ValueError("%x" % ord(uchars))
+ elif len (uchars) == 2 and begins_with_surrogate(uchars):
+ cp = myord(uchars[0], uchars[1])
+ try:
+ glyph_name = unicode_cmap.cmap[cp]
+ except:
+ print "no cmap entry for %x" % cp
+ raise ValueError("%x" % ord(uchars))
else:
glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap)
glyph_id = font.getGlyphID (glyph_name)
|