Merge branch 'fcft-precompose'

2026-02-05 04:06:08 -05:00 · 2020-05-13 13:03:20 +02:00 · 2020-05-13 13:03:20 +02:00 · 980606233b
commit 980606233b
parent 6d0f8e52cb 00df12f1a3
8 changed files with 37 additions and 33920 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -22,9 +22,7 @@
 * Right mouse button extends the current selection.
 * `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
  `11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
-* Unicode combining characters. Parts of this feature are compile time
-  optional. See [README.md](README.md#user-content-unicode-combining)
-  for details.
+* Unicode combining characters.

 ### Changed

--- a/2
+++ b/2
@ -5,7 +5,7 @@ arch=('x86_64')
 url=https://codeberg.org/dnkl/foot
 license=(mit)
 makedepends=('meson' 'ninja' 'scdoc' 'python' 'ncurses' 'wayland-protocols' 'tllist>=1.0.1')
-depends=('libxkbcommon' 'wayland' 'pixman' 'fcft>=2.0.0')
+depends=('libxkbcommon' 'wayland' 'pixman' 'fcft>=2.1.0')
 source=()

 pkgver() {
--- a/README.md
+++ b/README.md
@ -17,7 +17,6 @@ The fast, lightweight and minimalistic Wayland terminal emulator.
 1. [Backspace](#backspace)
 1. [DPI and font size](#dpi-and-font-size)
 1. [Supported OSCs](#supported-oscs)
-1. [Unicode combining](#unicode-combining)
 1. [Requirements](#requirements)
   1. [Running](#running)
   1. [Building](#building)
@ -287,28 +286,6 @@ with the terminal emulator itself. Foot implements the following OSCs:
 * `OSC 555` - flash screen (**foot specific**)


-## Unicode combining
-
-When the client prints Unicode combining characters, e.g `a\\u0308`
-('a' + `COMBINING DIAERESIS`), foot will be default try to create a
-pre-composed character. For example, `\\u0061\\u0308` (`a\\u0308`)
-will be transformed into `\\u00e5` (`å`).
-
-This is to improve the looks of the rendered grapheme. When rendering
-a decomposed string, `a\\u0308`, the glyphs for `a` and `\\u0308` are
-rendered independently, on top off each other. The result if often not
-optimal, with e.g. diacritics looking a bit out of place. If we are
-really unlucky, the base character and the combining characters may be
-picked from different fonts, making the result look even more awkward.
-
-When rendering a pre-composed character, we are rendering a single
-glyph only and thus it is guaranteed to look the way the font designer
-intended it to.
-
-Still, if you do not want this, you can disable pre-composing at
-**compile time** with `-Dunicode-precompose=false`.
-
-
 ## Requirements

 ### Running
--- a/UnicodeData.txt
+++ b/UnicodeData.txt
--- a/meson.build
+++ b/meson.build
@ -13,7 +13,8 @@ is_debug_build = get_option('buildtype').startswith('debug')
 cc = meson.get_compiler('c')

 add_project_arguments(
-  ['-D_GNU_SOURCE=200809L'] + (is_debug_build ? ['-D_DEBUG'] : []) +
+  ['-D_POSIX_C_SOURCE=200809L', '-D_GNU_SOURCE=200809L'] +
+  (is_debug_build ? ['-D_DEBUG'] : []) +
  cc.get_supported_arguments(
    ['-fstrict-aliasing',
     '-Wstrict-aliasing',
@ -57,11 +58,8 @@ wayland_client = dependency('wayland-client')
 wayland_cursor = dependency('wayland-cursor')
 xkb = dependency('xkbcommon')

-add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
-  get_option('unicode-precompose')), language: 'c')
-
 tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
-fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
+fcft = dependency('fcft', version: ['>=2.1.0', '<3.0.0'], fallback: 'fcft')

 wayland_protocols_datadir = wayland_protocols.get_pkgconfig_variable('pkgdatadir')

@ -92,17 +90,6 @@ foreach prot : [
    command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
 endforeach

-if get_option('unicode-precompose')
-  generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
-  unicode_data = custom_target(
-    'unicode-data',
-    input: 'UnicodeData.txt',
-    output: 'unicode-compose-table.h',
-    command: [generate_unicode_precompose_sh, '@INPUT@', '@OUTPUT@'])
-else
-  unicode_data = []
-endif
-
 generate_version_sh = files('generate-version.sh')
 version = custom_target(
  'generate_version',
@ -138,7 +125,7 @@ executable(
  'tokenize.c', 'tokenize.h',
  'vt.c', 'vt.h',
  'wayland.c', 'wayland.h',
-  wl_proto_src + wl_proto_headers, version, unicode_data,
+  wl_proto_src + wl_proto_headers, version,
  dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb, tllist, fcft],
  install: true)

@ -163,9 +150,9 @@ install_data('footrc', install_dir: join_paths(get_option('datadir'), 'foot'))
 subdir('completions')
 subdir('doc')

-summary(
-  {
-    'Unicode precompose': get_option('unicode-precompose'),
-  },
-  bool_yn: true
-)
+# summary(
+#   {
+#     '<feature>': false,
+#   },
+#   bool_yn: true
+# )
--- a/meson_options.txt
+++ b/meson_options.txt
@ -1,2 +0,0 @@
-option('unicode-precompose', type: 'boolean', value: true,
-       description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')
--- a/scripts/generate-unicode-precompose.sh
+++ b/scripts/generate-unicode-precompose.sh
@ -1,33 +0,0 @@
-#!/bin/sh
-
-unicodedata_txt="${1}"
-output="${2}"
-
-cat <<EOF > "${output}"
-#pragma once
-
-#include <wchar.h>
-
-static const struct {
-    wchar_t replacement;
-    wchar_t base;
-    wchar_t comb;
-} precompose_table[] = {
-EOF
-
-# extract canonical decomposition data from UnicodeData.txt,
-# - pad hex values to 5 digits,
-# - sort numerically on base character, then combining character,
-# - then reduce to 4 digits again where possible
-#
-# "borrowed" from xterm/unicode/make-precompose.sh
-
-cut -d ";" -f 1,6 "${unicodedata_txt}" |
-    grep ";[0-9,A-F]" | grep " " |
-    sed -e "s/ /, 0x/;s/^/{ 0x/;s/;/, 0x/;s/$/},/" |
-    sed -e "s,0x\(....\)\([^0-9A-Fa-f]\),0x0\1\2,g" |
-    (sort -k 3 || sort +2) |
-    sed -e "s,0x0\(...[0-9A-Fa-f]\),0x\1,g" |
-    sed 's/^/    /' >> "${output}"
-
-echo "};" >> "${output}"
--- a/vt.c
+++ b/vt.c
@ -14,10 +14,6 @@
 #include "osc.h"
 #include "util.h"

-#if FOOT_UNICODE_PRECOMPOSE
- #include "unicode-compose-table.h"
-#endif
-
 #define UNHANDLED() LOG_DBG("unhandled: %s", esc_as_string(term, final))

 /* https://vt100.net/emu/dec_ansi_parser */
@ -527,36 +523,6 @@ action_utf8_4_entry(struct terminal *term, uint8_t c)
    term->vt.utf8.data[term->vt.utf8.idx++] = c;
 }

-#if FOOT_UNICODE_PRECOMPOSE
-static wchar_t
-precompose(wchar_t base, wchar_t comb)
-{
-    static_assert(2 * sizeof(wchar_t) <= sizeof(uint64_t),
-                  "two wchars does not fit in an uint64_t");
-
-    const uint64_t match = (uint64_t)base << 32 | comb;
-
-    ssize_t start = 0;
-    ssize_t end = ALEN(precompose_table) - 1;
-
-    while (start <= end) {
-        size_t middle = (start + end) / 2;
-
-        const uint64_t maybe =
-            (uint64_t)precompose_table[middle].base << 32 | precompose_table[middle].comb;
-
-        if (maybe < match)
-            start = middle + 1;
-        else if (maybe > match)
-            end = middle - 1;
-        else
-            return precompose_table[middle].replacement;
-    }
-
-    return (wchar_t)-1;
-}
-#endif
-
 static void
 action_utf8_print(struct terminal *term, uint8_t c)
 {
@ -629,16 +595,37 @@ action_utf8_print(struct terminal *term, uint8_t c)
            term->grid->cursor.point.col = base_col;
            term->grid->cursor.lcf = false;

-#if FOOT_UNICODE_PRECOMPOSE
            if (composed == NULL) {
-                wchar_t precomposed = precompose(base, wc);
+                bool base_from_primary;
+                bool comb_from_primary;
+                bool pre_from_primary;
+
+                wchar_t precomposed = fcft_precompose(
+                    term->fonts[0], base, wc, &base_from_primary,
+                    &comb_from_primary, &pre_from_primary);
+
                int precomposed_width = wcwidth(precomposed);
-                if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
+
+                /*
+                 * Only use the pre-composed character if:
+                 *
+                 *  1. we *have* a pre-composed character
+                 *  2. the width matches the base characters width
+                 *  3. it's in the primary font, OR one of the base or
+                 *     combining characters are *not* from the primary
+                 *     font
+                 */
+
+                if (precomposed != (wchar_t)-1 &&
+                    precomposed_width == base_width &&
+                    (pre_from_primary ||
+                     !base_from_primary ||
+                     !comb_from_primary))
+                {
                    term_print(term, precomposed, precomposed_width);
                    return;
                }
            }
-#endif

            size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
            if (wanted_count > ALEN(composed->combining)) {