mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-04-05 07:15:30 -04:00
scripts: generate-emoji-variation-sequences: don't assume single codepoint sequences
Right now (Unicode 15.1), all valid variation sequences consist of a single Unicode codepoint (followed by either VS-15 or VS-16). Don't assume this is the case. We don't actually handle longer sequences. But now we at least catch such escapes, and error out.
This commit is contained in:
parent
aed9c392eb
commit
085c60a334
1 changed files with 11 additions and 4 deletions
|
|
@ -30,11 +30,18 @@ def main():
|
||||||
if line[0] == '#':
|
if line[0] == '#':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cp, vs, _ = line.split(' ', maxsplit=2)
|
# Example: "0023 FE0E ; text style; # (1.1) NUMBER SIGN"
|
||||||
cp = int(cp, 16)
|
cps, _ = line.split(';', maxsplit=1) # cps = "0023 FE0F "
|
||||||
vs = int(vs, 16)
|
cps = cps.strip().split(' ') # cps = ["0023", "FE0F"]
|
||||||
|
|
||||||
assert vs == 0xfe0e or vs == 0xfe0f
|
if len(cps) != 2:
|
||||||
|
raise NotImplementedError(f'emoji variation sequences with more than one base codepoint: {cps}')
|
||||||
|
|
||||||
|
cp, vs = cps # cp = "0023", vs = "FE0F"
|
||||||
|
cp = int(cp, 16) # cp = 0x23
|
||||||
|
vs = int(vs, 16) # vs = 0xfe0f
|
||||||
|
|
||||||
|
assert vs in [0xfe0e, 0xfe0f]
|
||||||
|
|
||||||
if cp not in codepoints:
|
if cp not in codepoints:
|
||||||
codepoints[cp] = Codepoint(cp)
|
codepoints[cp] = Codepoint(cp)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue