From a24a3520c7e0230b0b3b853db0cb2871427d1ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 2 May 2020 18:25:15 +0200 Subject: [PATCH] script: generate-unicode-precompose: generate a complete header file --- scripts/generate-unicode-precompose.sh | 29 ++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/scripts/generate-unicode-precompose.sh b/scripts/generate-unicode-precompose.sh index 830e9379..49e12650 100755 --- a/scripts/generate-unicode-precompose.sh +++ b/scripts/generate-unicode-precompose.sh @@ -1,8 +1,33 @@ #!/usr/bin/sh -cut - -d ";" -f 1,6 | +unicodedata_txt="${1}" +output="${2}" + +cat < "${output}" +#pragma once + +#include + +static const struct { + wchar_t replacement; + wchar_t base; + wchar_t comb; +} precompose_table[] = { +EOF + +# extract canonical decomposition data from UnicodeData.txt, +# - pad hex values to 5 digits, +# - sort numerically on base character, then combining character, +# - then reduce to 4 digits again where possible +# +# "borrowed" from xterm/unicode/make-precompose.sh + +cut "${unicodedata_txt}" -d ";" -f 1,6 | grep ";[0-9,A-F]" | grep " " | sed -e "s/ /, 0x/;s/^/{ 0x/;s/;/, 0x/;s/$/},/" | sed -e "s,0x\(....\)\([^0-9A-Fa-f]\),0x0\1\2,g" | (sort -k 3 || sort +2) | - sed -e "s,0x0\(...[0-9A-Fa-f]\),0x\1,g" + sed -e "s,0x0\(...[0-9A-Fa-f]\),0x\1,g" | + sed 's/^/ /' >> "${output}" + +echo "};" >> "${output}"