mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-04 04:06:06 -05:00
scripts: generate-emoji-variation-sequences: don't assume single codepoint sequences
Right now (Unicode 15.1), all valid variation sequences consist of a single Unicode codepoint (followed by either VS-15 or VS-16). Don't assume this is the case. We don't actually handle longer sequences. But now we at least catch such escapes, and error out.
This commit is contained in:
parent
aed9c392eb
commit
085c60a334
1 changed files with 11 additions and 4 deletions
|
|
@ -30,11 +30,18 @@ def main():
|
|||
if line[0] == '#':
|
||||
continue
|
||||
|
||||
cp, vs, _ = line.split(' ', maxsplit=2)
|
||||
cp = int(cp, 16)
|
||||
vs = int(vs, 16)
|
||||
# Example: "0023 FE0E ; text style; # (1.1) NUMBER SIGN"
|
||||
cps, _ = line.split(';', maxsplit=1) # cps = "0023 FE0F "
|
||||
cps = cps.strip().split(' ') # cps = ["0023", "FE0F"]
|
||||
|
||||
assert vs == 0xfe0e or vs == 0xfe0f
|
||||
if len(cps) != 2:
|
||||
raise NotImplementedError(f'emoji variation sequences with more than one base codepoint: {cps}')
|
||||
|
||||
cp, vs = cps # cp = "0023", vs = "FE0F"
|
||||
cp = int(cp, 16) # cp = 0x23
|
||||
vs = int(vs, 16) # vs = 0xfe0f
|
||||
|
||||
assert vs in [0xfe0e, 0xfe0f]
|
||||
|
||||
if cp not in codepoints:
|
||||
codepoints[cp] = Codepoint(cp)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue