2024-06-24 21:18:37 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Codepoint:
|
2025-12-26 13:13:01 +01:00
|
|
|
def __init__(self, start: int, end: None | int = None) -> None:
|
2024-06-24 21:18:37 +02:00
|
|
|
self.start = start
|
|
|
|
|
self.end = start if end is None else end
|
|
|
|
|
self.vs15 = False
|
|
|
|
|
self.vs16 = False
|
|
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
return f'{self.start:x}-{self.end:x}, vs15={self.vs15}, vs16={self.vs16}'
|
|
|
|
|
|
|
|
|
|
|
2025-12-26 13:13:01 +01:00
|
|
|
def main() -> None:
|
2024-06-24 21:18:37 +02:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
|
parser.add_argument('input', type=argparse.FileType('r'))
|
|
|
|
|
parser.add_argument('output', type=argparse.FileType('w'))
|
|
|
|
|
opts = parser.parse_args()
|
|
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
codepoints: dict[int, Codepoint] = {}
|
2024-06-24 21:18:37 +02:00
|
|
|
|
|
|
|
|
for line in opts.input:
|
|
|
|
|
line = line.rstrip()
|
|
|
|
|
if not line:
|
|
|
|
|
continue
|
|
|
|
|
if line[0] == '#':
|
|
|
|
|
continue
|
|
|
|
|
|
2024-06-26 18:30:17 +02:00
|
|
|
# Example: "0023 FE0E ; text style; # (1.1) NUMBER SIGN"
|
|
|
|
|
cps, _ = line.split(';', maxsplit=1) # cps = "0023 FE0F "
|
|
|
|
|
cps = cps.strip().split(' ') # cps = ["0023", "FE0F"]
|
2024-06-24 21:18:37 +02:00
|
|
|
|
2024-06-26 18:30:17 +02:00
|
|
|
if len(cps) != 2:
|
|
|
|
|
raise NotImplementedError(f'emoji variation sequences with more than one base codepoint: {cps}')
|
|
|
|
|
|
|
|
|
|
cp, vs = cps # cp = "0023", vs = "FE0F"
|
|
|
|
|
cp = int(cp, 16) # cp = 0x23
|
|
|
|
|
vs = int(vs, 16) # vs = 0xfe0f
|
|
|
|
|
|
|
|
|
|
assert vs in [0xfe0e, 0xfe0f]
|
2024-06-24 21:18:37 +02:00
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
if cp not in codepoints:
|
|
|
|
|
codepoints[cp] = Codepoint(cp)
|
|
|
|
|
|
|
|
|
|
assert codepoints[cp].start == cp
|
2024-06-24 21:18:37 +02:00
|
|
|
|
|
|
|
|
if vs == 0xfe0e:
|
2024-06-25 08:23:27 +02:00
|
|
|
codepoints[cp].vs15 = True
|
2024-06-24 21:18:37 +02:00
|
|
|
else:
|
2024-06-25 08:23:27 +02:00
|
|
|
codepoints[cp].vs16 = True
|
2024-06-24 21:18:37 +02:00
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
sorted_list = sorted(codepoints.values(), key=lambda cp: cp.start)
|
2024-06-24 21:18:37 +02:00
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
compacted: list[Codepoint] = []
|
|
|
|
|
for i, cp in enumerate(sorted_list):
|
2024-06-24 21:18:37 +02:00
|
|
|
assert cp.end == cp.start
|
|
|
|
|
|
|
|
|
|
if i == 0:
|
2024-06-25 08:23:27 +02:00
|
|
|
compacted.append(cp)
|
2024-06-24 21:18:37 +02:00
|
|
|
continue
|
|
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
last_cp = compacted[-1]
|
2024-06-24 21:18:37 +02:00
|
|
|
if last_cp.end == cp.start - 1 and last_cp.vs15 == cp.vs15 and last_cp.vs16 == cp.vs16:
|
2024-06-25 08:23:27 +02:00
|
|
|
compacted[-1].end = cp.start
|
2024-06-24 21:18:37 +02:00
|
|
|
else:
|
2024-06-25 08:23:27 +02:00
|
|
|
compacted.append(cp)
|
2024-06-24 21:18:37 +02:00
|
|
|
|
|
|
|
|
opts.output.write('#pragma once\n')
|
|
|
|
|
opts.output.write('#include <stdint.h>\n')
|
|
|
|
|
opts.output.write('#include <stdbool.h>\n')
|
|
|
|
|
opts.output.write('\n')
|
|
|
|
|
opts.output.write('struct emoji_vs {\n')
|
2024-06-25 16:22:22 +02:00
|
|
|
opts.output.write(' uint32_t start:21;\n')
|
|
|
|
|
opts.output.write(' uint32_t end:21;\n')
|
2024-06-24 21:18:37 +02:00
|
|
|
opts.output.write(' bool vs15:1;\n')
|
|
|
|
|
opts.output.write(' bool vs16:1;\n')
|
|
|
|
|
opts.output.write('} __attribute__((packed));\n')
|
2024-06-25 16:22:22 +02:00
|
|
|
opts.output.write('_Static_assert(sizeof(struct emoji_vs) == 6, "unexpected struct size");\n')
|
2024-06-24 21:18:37 +02:00
|
|
|
opts.output.write('\n')
|
|
|
|
|
opts.output.write('#if defined(FOOT_GRAPHEME_CLUSTERING)\n')
|
|
|
|
|
opts.output.write('\n')
|
|
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
opts.output.write(f'static const struct emoji_vs emoji_vs[{len(compacted)}] = {{\n')
|
2024-06-24 21:18:37 +02:00
|
|
|
|
2024-06-25 08:23:27 +02:00
|
|
|
for cp in compacted:
|
2024-06-24 21:18:37 +02:00
|
|
|
opts.output.write(' {\n')
|
|
|
|
|
opts.output.write(f' .start = 0x{cp.start:X},\n')
|
|
|
|
|
opts.output.write(f' .end = 0x{cp.end:x},\n')
|
|
|
|
|
opts.output.write(f' .vs15 = {"true" if cp.vs15 else "false"},\n')
|
|
|
|
|
opts.output.write(f' .vs16 = {"true" if cp.vs16 else "false"},\n')
|
|
|
|
|
opts.output.write(' },\n')
|
|
|
|
|
|
|
|
|
|
opts.output.write('};\n')
|
|
|
|
|
opts.output.write('\n')
|
|
|
|
|
opts.output.write('#endif /* FOOT_GRAPHEME_CLUSTERING */\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2025-12-26 13:13:01 +01:00
|
|
|
main()
|