unicode-precompose: use fcft's precompose functionality

This allows us more options when determining whether to use a
pre-composed character or not:

We now only use the pre-composed character if it's from the primary
font, or if at least one of the base or combining characters are from
a fallback font.

I.e. use glyphs from the primary font if possible. But, if one or more
of the decomposed glyphs are from a fallback font, use the
pre-composed character anyway.
This commit is contained in:
Daniel Eklöf 2020-05-08 23:36:33 +02:00
parent c090a0664f
commit b1b32152c1
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
4 changed files with 26 additions and 33878 deletions

File diff suppressed because it is too large Load diff

View file

@ -92,17 +92,6 @@ foreach prot : [
command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
endforeach
if get_option('unicode-precompose')
generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
unicode_data = custom_target(
'unicode-data',
input: 'UnicodeData.txt',
output: 'unicode-compose-table.h',
command: [generate_unicode_precompose_sh, '@INPUT@', '@OUTPUT@'])
else
unicode_data = []
endif
generate_version_sh = files('generate-version.sh')
version = custom_target(
'generate_version',
@ -138,7 +127,7 @@ executable(
'tokenize.c', 'tokenize.h',
'vt.c', 'vt.h',
'wayland.c', 'wayland.h',
wl_proto_src + wl_proto_headers, version, unicode_data,
wl_proto_src + wl_proto_headers, version,
dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb, tllist, fcft],
install: true)

View file

@ -1,33 +0,0 @@
#!/usr/bin/sh
unicodedata_txt="${1}"
output="${2}"
cat <<EOF > "${output}"
#pragma once
#include <wchar.h>
static const struct {
wchar_t replacement;
wchar_t base;
wchar_t comb;
} precompose_table[] = {
EOF
# extract canonical decomposition data from UnicodeData.txt,
# - pad hex values to 5 digits,
# - sort numerically on base character, then combining character,
# - then reduce to 4 digits again where possible
#
# "borrowed" from xterm/unicode/make-precompose.sh
cut "${unicodedata_txt}" -d ";" -f 1,6 |
grep ";[0-9,A-F]" | grep " " |
sed -e "s/ /, 0x/;s/^/{ 0x/;s/;/, 0x/;s/$/},/" |
sed -e "s,0x\(....\)\([^0-9A-Fa-f]\),0x0\1\2,g" |
(sort -k 3 || sort +2) |
sed -e "s,0x0\(...[0-9A-Fa-f]\),0x\1,g" |
sed 's/^/ /' >> "${output}"
echo "};" >> "${output}"

61
vt.c
View file

@ -14,10 +14,6 @@
#include "osc.h"
#include "util.h"
#if FOOT_UNICODE_PRECOMPOSE
#include "unicode-compose-table.h"
#endif
#define UNHANDLED() LOG_DBG("unhandled: %s", esc_as_string(term, final))
/* https://vt100.net/emu/dec_ansi_parser */
@ -527,36 +523,6 @@ action_utf8_4_entry(struct terminal *term, uint8_t c)
term->vt.utf8.data[term->vt.utf8.idx++] = c;
}
#if FOOT_UNICODE_PRECOMPOSE
static wchar_t
precompose(wchar_t base, wchar_t comb)
{
static_assert(2 * sizeof(wchar_t) <= sizeof(uint64_t),
"two wchars does not fit in an uint64_t");
const uint64_t match = (uint64_t)base << 32 | comb;
ssize_t start = 0;
ssize_t end = ALEN(precompose_table) - 1;
while (start <= end) {
size_t middle = (start + end) / 2;
const uint64_t maybe =
(uint64_t)precompose_table[middle].base << 32 | precompose_table[middle].comb;
if (maybe < match)
start = middle + 1;
else if (maybe > match)
end = middle - 1;
else
return precompose_table[middle].replacement;
}
return (wchar_t)-1;
}
#endif
static void
action_utf8_print(struct terminal *term, uint8_t c)
{
@ -631,9 +597,32 @@ action_utf8_print(struct terminal *term, uint8_t c)
#if FOOT_UNICODE_PRECOMPOSE
if (composed == NULL) {
wchar_t precomposed = precompose(base, wc);
bool base_from_primary;
bool comb_from_primary;
bool pre_from_primary;
wchar_t precomposed = fcft_precompose(
term->fonts[0], base, wc, &base_from_primary,
&comb_from_primary, &pre_from_primary);
int precomposed_width = wcwidth(precomposed);
if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
/*
* Only use the pre-composed character if:
*
* 1. we *have* a pre-composed character
* 2. the width matches the base characters width
* 3. it's in the primary font, OR one of the base or
* combining characters are *not* from the primary
* font
*/
if (precomposed != (wchar_t)-1 &&
precomposed_width == base_width &&
(pre_from_primary ||
!base_from_primary ||
!comb_from_primary))
{
term_print(term, precomposed, precomposed_width);
return;
}