term+vt: refactor: move "utf8" char processing to term_process_and_print_non_ascii()

This function "prints" any non-ascii character (i.e. any character that ends up in the action_utf8_print() function in vt.c) to the grid. This includes grapheme cluster processing etc. action_utf8_print() now simply calls this function. This allows us to re-use the same functionality from other places (like the text-sizing protocol).
2026-02-04 04:06:06 -05:00 · 2025-01-26 07:33:53 +01:00 · 2025-01-26 07:33:53 +01:00 · d3f692990e
commit d3f692990e
parent 7a8d2b5e01
4 changed files with 261 additions and 251 deletions
--- a/osc.c
+++ b/osc.c
@ -1207,7 +1207,10 @@ kitty_text_size(struct terminal *term, char *string)
        free(wchars);
    }

-    term_print(term, CELL_COMB_CHARS_LO + composed->key, composed->forced_width > 0 ? composed->forced_width : composed->width);
+    term_print(
+        term, CELL_COMB_CHARS_LO + composed->key,
+        composed->forced_width > 0 ? composed->forced_width : composed->width,
+        false);
 }

 void
--- a/terminal.c
+++ b/terminal.c
@ -27,6 +27,7 @@
 #include "commands.h"
 #include "config.h"
 #include "debug.h"
+#include "emoji-variation-sequences.h"
 #include "extract.h"
 #include "grid.h"
 #include "ime.h"
@ -4073,6 +4074,260 @@ term_single_shift(struct terminal *term, enum charset_designator idx)
    term->ascii_printer = &ascii_printer_single_shift;
 }

+#if defined(FOOT_GRAPHEME_CLUSTERING)
+static int
+emoji_vs_compare(const void *_key, const void *_entry)
+{
+    const struct emoji_vs *key = _key;
+    const struct emoji_vs *entry = _entry;
+
+    uint32_t cp = key->start;
+
+    if (cp < entry->start)
+        return -1;
+    else if (cp > entry->end)
+        return 1;
+    else
+        return 0;
+}
+
+UNITTEST
+{
+    /* Verify the emoji_vs list is sorted */
+    int64_t last_end = -1;
+
+    for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
+        const struct emoji_vs *vs = &emoji_vs[i];
+        xassert(vs->start <= vs->end);
+        xassert(vs->start > last_end);
+        xassert(vs->vs15 || vs->vs16);
+        last_end = vs->end;
+    }
+}
+#endif
+
+void
+term_process_and_print_non_ascii(struct terminal *term, char32_t wc)
+{
+    int width = c32width(wc);
+    bool insert_mode_disable = false;
+    const bool grapheme_clustering = term->grapheme_shaping;
+
+#if !defined(FOOT_GRAPHEME_CLUSTERING)
+    xassert(!grapheme_clustering);
+#endif
+
+    if (term->grid->cursor.point.col > 0 &&
+        (grapheme_clustering ||
+         (!grapheme_clustering && width == 0 && wc >= 0x300)))
+    {
+        int col = term->grid->cursor.point.col;
+        if (!term->grid->cursor.lcf)
+            col--;
+
+        /* Skip past spacers */
+        struct row *row = term->grid->cur_row;
+        while (row->cells[col].wc >= CELL_SPACER && col > 0)
+            col--;
+
+        xassert(col >= 0 && col < term->cols);
+        char32_t base = row->cells[col].wc;
+        char32_t UNUSED last = base;
+
+        /* Is base cell already a cluster? */
+        const struct composed *composed =
+            (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
+            ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
+            : NULL;
+
+        uint32_t key;
+
+        if (composed != NULL) {
+            base = composed->chars[0];
+            last = composed->chars[composed->count - 1];
+            key = composed_key_from_key(composed->key, wc);
+        } else
+            key = composed_key_from_key(base, wc);
+
+#if defined(FOOT_GRAPHEME_CLUSTERING)
+        if (grapheme_clustering) {
+            /* Check if we're on a grapheme cluster break */
+            if (utf8proc_grapheme_break_stateful(
+                    last, wc, &term->vt.grapheme_state))
+            {
+                term_reset_grapheme_state(term);
+                goto out;
+            }
+        }
+#endif
+
+        int base_width = c32width(base);
+        if (base_width > 0) {
+            term->grid->cursor.point.col = col;
+            term->grid->cursor.lcf = false;
+            insert_mode_disable = true;
+
+            if (composed == NULL) {
+                bool base_from_primary;
+                bool comb_from_primary;
+                bool pre_from_primary;
+
+                char32_t precomposed = term->fonts[0] != NULL
+                    ? fcft_precompose(
+                        term->fonts[0], base, wc, &base_from_primary,
+                        &comb_from_primary, &pre_from_primary)
+                    : (char32_t)-1;
+
+                int precomposed_width = c32width(precomposed);
+
+                /*
+                 * Only use the pre-composed character if:
+                 *
+                 *  1. we *have* a pre-composed character
+                 *  2. the width matches the base characters width
+                 *  3. it's in the primary font, OR one of the base or
+                 *     combining characters are *not* from the primary
+                 *     font
+                 */
+
+                if (precomposed != (char32_t)-1 &&
+                    precomposed_width == base_width &&
+                    (pre_from_primary ||
+                     !base_from_primary ||
+                     !comb_from_primary))
+                {
+                    wc = precomposed;
+                    width = precomposed_width;
+                    term_reset_grapheme_state(term);
+                    goto out;
+                }
+            }
+
+            size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
+            if (wanted_count > 255) {
+                xassert(composed != NULL);
+
+#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
+                LOG_WARN("combining character overflow:");
+                LOG_WARN("  base: 0x%04x", composed->chars[0]);
+                for (size_t i = 1; i < composed->count; i++)
+                    LOG_WARN("    cc: 0x%04x", composed->chars[i]);
+                LOG_ERR("   new: 0x%04x", wc);
+#endif
+                /* This is going to break anyway... */
+                wanted_count--;
+            }
+
+            xassert(wanted_count <= 255);
+
+            /* Check if we already have a match for the entire compose chain */
+            const struct composed *cc =
+                composed_lookup_without_collision(
+                    term->composed, &key,
+                    composed != NULL ? composed->chars : &(char32_t){base},
+                    composed != NULL ? composed->count : 1,
+                    wc, 0);
+
+            if (cc != NULL) {
+                /* We *do* have a match! */
+                wc = CELL_COMB_CHARS_LO + cc->key;
+                width = cc->width;
+                goto out;
+            } else {
+                /* No match - allocate a new chain below */
+            }
+
+            if (unlikely(term->composed_count >=
+                         (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
+            {
+                /* We reached our maximum number of allowed composed
+                 * character chains. Fall through here and print the
+                 * current zero-width character to the current cell */
+                LOG_WARN("maximum number of composed characters reached");
+                term_reset_grapheme_state(term);
+                goto out;
+            }
+
+            /* Allocate new chain */
+            struct composed *new_cc = xmalloc(sizeof(*new_cc));
+            new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
+            new_cc->key = key;
+            new_cc->count = wanted_count;
+            new_cc->chars[0] = base;
+            new_cc->chars[wanted_count - 1] = wc;
+            new_cc->forced_width = 0;
+
+            if (composed != NULL) {
+                memcpy(&new_cc->chars[1], &composed->chars[1],
+                       (wanted_count - 2) * sizeof(new_cc->chars[0]));
+            }
+
+            const int grapheme_width =
+                composed != NULL ? composed->width : base_width;
+
+            switch (term->conf->tweak.grapheme_width_method) {
+            case GRAPHEME_WIDTH_MAX:
+                new_cc->width = max(grapheme_width, width);
+                break;
+
+            case GRAPHEME_WIDTH_DOUBLE:
+                new_cc->width = min(grapheme_width + width, 2);
+
+#if defined(FOOT_GRAPHEME_CLUSTERING)
+                /* Handle VS-15 and VS-16 variation selectors */
+                if (unlikely(grapheme_clustering &&
+                             (wc == 0xfe0e || wc == 0xfe0f) &&
+                             new_cc->count == 2))
+                {
+                    const struct emoji_vs *vs =
+                        bsearch(
+                            &(struct emoji_vs){.start = new_cc->chars[0]},
+                            emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
+                            sizeof(struct emoji_vs),
+                            &emoji_vs_compare);
+
+                    if (vs != NULL) {
+                        xassert(new_cc->chars[0] >= vs->start &&
+                                new_cc->chars[0] <= vs->end);
+
+                        /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
+                        if (wc == 0xfe0e) {
+                            if (vs->vs15)
+                                new_cc->width = 1;
+                        } else if (wc == 0xfe0f) {
+                            if (vs->vs16)
+                                new_cc->width = 2;
+                        }
+                    }
+                }
+#endif
+
+                break;
+
+            case GRAPHEME_WIDTH_WCSWIDTH:
+                new_cc->width = grapheme_width + width;
+                break;
+            }
+
+            term->composed_count++;
+            composed_insert(&term->composed, new_cc);
+
+            wc = CELL_COMB_CHARS_LO + new_cc->key;
+            width = new_cc->width;
+
+            xassert(wc >= CELL_COMB_CHARS_LO);
+            xassert(wc <= CELL_COMB_CHARS_HI);
+            goto out;
+        }
+    } else
+        term_reset_grapheme_state(term);
+
+
+out:
+    if (width > 0)
+        term_print(term, wc, width, insert_mode_disable);
+}
+
 enum term_surface
 term_surface_kind(const struct terminal *term, const struct wl_surface *surface)
 {
--- a/terminal.h
+++ b/terminal.h
@ -894,6 +894,7 @@ void term_cursor_up(struct terminal *term, int count);
 void term_cursor_down(struct terminal *term, int count);
 void term_cursor_blink_update(struct terminal *term);

+void term_process_and_print_non_ascii(struct terminal *term, char32_t wc);
 void term_print(struct terminal *term, char32_t wc, int width,
                bool insert_mode_disable);
 void term_fill(struct terminal *term, int row, int col, uint8_t c, size_t count,
--- a/vt.c
+++ b/vt.c
@ -16,7 +16,6 @@
 #include "csi.h"
 #include "dcs.h"
 #include "debug.h"
-#include "emoji-variation-sequences.h"
 #include "osc.h"
 #include "sixel.h"
 #include "util.h"
@ -647,258 +646,10 @@ action_put(struct terminal *term, uint8_t c)
    dcs_put(term, c);
 }

-#if defined(FOOT_GRAPHEME_CLUSTERING)
-static int
-emoji_vs_compare(const void *_key, const void *_entry)
-{
-    const struct emoji_vs *key = _key;
-    const struct emoji_vs *entry = _entry;
-
-    uint32_t cp = key->start;
-
-    if (cp < entry->start)
-        return -1;
-    else if (cp > entry->end)
-        return 1;
-    else
-        return 0;
-}
-
-UNITTEST
-{
-    /* Verify the emoji_vs list is sorted */
-    int64_t last_end = -1;
-
-    for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
-        const struct emoji_vs *vs = &emoji_vs[i];
-        xassert(vs->start <= vs->end);
-        xassert(vs->start > last_end);
-        xassert(vs->vs15 || vs->vs16);
-        last_end = vs->end;
-    }
-}
-#endif
-
 static void
 action_utf8_print(struct terminal *term, char32_t wc)
 {
-    int width = c32width(wc);
-    bool insert_mode_disable = false;
-    const bool grapheme_clustering = term->grapheme_shaping;
-
-#if !defined(FOOT_GRAPHEME_CLUSTERING)
-    xassert(!grapheme_clustering);
-#endif
-
-    if (term->grid->cursor.point.col > 0 &&
-        (grapheme_clustering ||
-         (!grapheme_clustering && width == 0 && wc >= 0x300)))
-    {
-        int col = term->grid->cursor.point.col;
-        if (!term->grid->cursor.lcf)
-            col--;
-
-        /* Skip past spacers */
-        struct row *row = term->grid->cur_row;
-        while (row->cells[col].wc >= CELL_SPACER && col > 0)
-            col--;
-
-        xassert(col >= 0 && col < term->cols);
-        char32_t base = row->cells[col].wc;
-        char32_t UNUSED last = base;
-
-        /* Is base cell already a cluster? */
-        const struct composed *composed =
-            (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
-            ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
-            : NULL;
-
-        uint32_t key;
-
-        if (composed != NULL) {
-            base = composed->chars[0];
-            last = composed->chars[composed->count - 1];
-            key = composed_key_from_key(composed->key, wc);
-        } else
-            key = composed_key_from_key(base, wc);
-
-#if defined(FOOT_GRAPHEME_CLUSTERING)
-        if (grapheme_clustering) {
-            /* Check if we're on a grapheme cluster break */
-            if (utf8proc_grapheme_break_stateful(
-                    last, wc, &term->vt.grapheme_state))
-            {
-                term_reset_grapheme_state(term);
-                goto out;
-            }
-        }
-#endif
-
-        int base_width = c32width(base);
-        if (base_width > 0) {
-            term->grid->cursor.point.col = col;
-            term->grid->cursor.lcf = false;
-            insert_mode_disable = true;
-
-            if (composed == NULL) {
-                bool base_from_primary;
-                bool comb_from_primary;
-                bool pre_from_primary;
-
-                char32_t precomposed = term->fonts[0] != NULL
-                    ? fcft_precompose(
-                        term->fonts[0], base, wc, &base_from_primary,
-                        &comb_from_primary, &pre_from_primary)
-                    : (char32_t)-1;
-
-                int precomposed_width = c32width(precomposed);
-
-                /*
-                 * Only use the pre-composed character if:
-                 *
-                 *  1. we *have* a pre-composed character
-                 *  2. the width matches the base characters width
-                 *  3. it's in the primary font, OR one of the base or
-                 *     combining characters are *not* from the primary
-                 *     font
-                 */
-
-                if (precomposed != (char32_t)-1 &&
-                    precomposed_width == base_width &&
-                    (pre_from_primary ||
-                     !base_from_primary ||
-                     !comb_from_primary))
-                {
-                    wc = precomposed;
-                    width = precomposed_width;
-                    term_reset_grapheme_state(term);
-                    goto out;
-                }
-            }
-
-            size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
-            if (wanted_count > 255) {
-                xassert(composed != NULL);
-
-#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
-                LOG_WARN("combining character overflow:");
-                LOG_WARN("  base: 0x%04x", composed->chars[0]);
-                for (size_t i = 1; i < composed->count; i++)
-                    LOG_WARN("    cc: 0x%04x", composed->chars[i]);
-                LOG_ERR("   new: 0x%04x", wc);
-#endif
-                /* This is going to break anyway... */
-                wanted_count--;
-            }
-
-            xassert(wanted_count <= 255);
-
-            /* Check if we already have a match for the entire compose chain */
-            const struct composed *cc =
-                composed_lookup_without_collision(
-                    term->composed, &key,
-                    composed != NULL ? composed->chars : &(char32_t){base},
-                    composed != NULL ? composed->count : 1,
-                    wc, 0);
-
-            if (cc != NULL) {
-                /* We *do* have a match! */
-                wc = CELL_COMB_CHARS_LO + cc->key;
-                width = cc->width;
-                goto out;
-            } else {
-                /* No match - allocate a new chain below */
-            }
-
-            if (unlikely(term->composed_count >=
-                         (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
-            {
-                /* We reached our maximum number of allowed composed
-                 * character chains. Fall through here and print the
-                 * current zero-width character to the current cell */
-                LOG_WARN("maximum number of composed characters reached");
-                term_reset_grapheme_state(term);
-                goto out;
-            }
-
-            /* Allocate new chain */
-            struct composed *new_cc = xmalloc(sizeof(*new_cc));
-            new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
-            new_cc->key = key;
-            new_cc->count = wanted_count;
-            new_cc->chars[0] = base;
-            new_cc->chars[wanted_count - 1] = wc;
-            new_cc->forced_width = 0;
-
-            if (composed != NULL) {
-                memcpy(&new_cc->chars[1], &composed->chars[1],
-                       (wanted_count - 2) * sizeof(new_cc->chars[0]));
-            }
-
-            const int grapheme_width =
-                composed != NULL ? composed->width : base_width;
-
-            switch (term->conf->tweak.grapheme_width_method) {
-            case GRAPHEME_WIDTH_MAX:
-                new_cc->width = max(grapheme_width, width);
-                break;
-
-            case GRAPHEME_WIDTH_DOUBLE:
-                new_cc->width = min(grapheme_width + width, 2);
-
-#if defined(FOOT_GRAPHEME_CLUSTERING)
-                /* Handle VS-15 and VS-16 variation selectors */
-                if (unlikely(grapheme_clustering &&
-                             (wc == 0xfe0e || wc == 0xfe0f) &&
-                             new_cc->count == 2))
-                {
-                    const struct emoji_vs *vs =
-                        bsearch(
-                            &(struct emoji_vs){.start = new_cc->chars[0]},
-                            emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
-                            sizeof(struct emoji_vs),
-                            &emoji_vs_compare);
-
-                    if (vs != NULL) {
-                        xassert(new_cc->chars[0] >= vs->start &&
-                                new_cc->chars[0] <= vs->end);
-
-                        /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
-                        if (wc == 0xfe0e) {
-                            if (vs->vs15)
-                                new_cc->width = 1;
-                        } else if (wc == 0xfe0f) {
-                            if (vs->vs16)
-                                new_cc->width = 2;
-                        }
-                    }
-                }
-#endif
-
-                break;
-
-            case GRAPHEME_WIDTH_WCSWIDTH:
-                new_cc->width = grapheme_width + width;
-                break;
-            }
-
-            term->composed_count++;
-            composed_insert(&term->composed, new_cc);
-
-            wc = CELL_COMB_CHARS_LO + new_cc->key;
-            width = new_cc->width;
-
-            xassert(wc >= CELL_COMB_CHARS_LO);
-            xassert(wc <= CELL_COMB_CHARS_HI);
-            goto out;
-        }
-    } else
-        term_reset_grapheme_state(term);
-
-
-out:
-    if (width > 0)
-        term_print(term, wc, width, insert_mode_disable);
+    term_process_and_print_non_ascii(term, wc);
 }

 static void