Merge branch 'text-width-protocol'

2026-02-05 04:06:08 -05:00 · 2025-02-06 14:03:33 +01:00 · 2025-02-06 14:03:33 +01:00 · d84b0d4c6a
commit d84b0d4c6a
parent 88dcde3ed8 8d20b82721
10 changed files with 514 additions and 333 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -65,8 +65,9 @@
 * Support for the new Wayland protocol `xdg-system-bell-v1` protocol
  (added in wayland-protocols 1.38), via the new config option
  `bell.system=no|yes` (defaults to `yes`).
-* Added support for custom regex matching ([#1386][1386],
+* Support for custom regex matching ([#1386][1386],
  [#1872][1872])
+* Support for kitty's text-sizing protocol (`w`, width, only), OSC-66.

 [1386]: https://codeberg.org/dnkl/foot/issues/1386
 [1872]: https://codeberg.org/dnkl/foot/issues/1872
--- a/composed.c
+++ b/composed.c
@ -4,8 +4,54 @@
 #include <stdbool.h>

 #include "debug.h"
+#include "terminal.h"

-struct composed *
+uint32_t
+composed_key_from_chars(const uint32_t chars[], size_t count)
+{
+    if (count == 0)
+        return 0;
+
+    uint32_t key = chars[0];
+    for (size_t i = 1; i < count; i++)
+        key = composed_key_from_key(key, chars[i]);
+
+    return key;
+}
+
+uint32_t
+composed_key_from_key(uint32_t prev_key, uint32_t next_char)
+{
+    unsigned bits = 32 - __builtin_clz(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO);
+
+    /* Rotate old key 8 bits */
+    uint32_t new_key = (prev_key << 8) | (prev_key >> (bits - 8));
+
+    /* xor with new char */
+    new_key ^= next_char;
+
+    /* Multiply with magic hash constant */
+    new_key *= 2654435761ul;
+
+    /* And mask, to ensure the new value is within range */
+    new_key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
+    return new_key;
+}
+
+UNITTEST
+{
+    const char32_t chars[] = U"abcdef";
+
+    uint32_t k1 = composed_key_from_key(chars[0], chars[1]);
+    uint32_t k2 = composed_key_from_chars(chars, 2);
+    xassert(k1 == k2);
+
+    uint32_t k3 = composed_key_from_key(k2, chars[2]);
+    uint32_t k4 = composed_key_from_chars(chars, 3);
+    xassert(k3 == k4);
+}
+
+const struct composed *
 composed_lookup(struct composed *root, uint32_t key)
 {
    struct composed *node = root;
@ -20,6 +66,41 @@ composed_lookup(struct composed *root, uint32_t key)
    return NULL;
 }

+const struct composed *
+composed_lookup_without_collision(struct composed *root, uint32_t *key,
+                                  const char32_t *prefix_text, size_t prefix_len,
+                                  char32_t wc, int forced_width)
+{
+    while (true) {
+        const struct composed *cc = composed_lookup(root, *key);
+        if (cc == NULL)
+            return NULL;
+
+        bool match = cc->count == prefix_len + 1 &&
+                     cc->forced_width == forced_width &&
+                     cc->chars[prefix_len] == wc;
+
+        if (match) {
+            for (size_t i = 0; i < prefix_len; i++) {
+                if (cc->chars[i] != prefix_text[i]) {
+                    match = false;
+                    break;
+                }
+            }
+        }
+
+        if (match)
+            return cc;
+
+        (*key)++;
+        *key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
+
+        /* TODO: this will loop infinitely if the composed table is full */
+    }
+
+    return NULL;
+}
+
 void
 composed_insert(struct composed **root, struct composed *node)
 {
--- a/composed.h
+++ b/composed.h
@ -10,9 +10,16 @@ struct composed {
    uint32_t key;
    uint8_t count;
    uint8_t width;
+    uint8_t forced_width;
 };

-struct composed *composed_lookup(struct composed *root, uint32_t key);
+uint32_t composed_key_from_chars(const uint32_t chars[], size_t count);
+uint32_t composed_key_from_key(uint32_t prev_key, uint32_t next_char);
+
+const struct composed *composed_lookup(struct composed *root, uint32_t key);
+const struct composed *composed_lookup_without_collision(
+    struct composed *root, uint32_t *key,
+    const char32_t *prefix, size_t prefix_len, char32_t wc, int forced_width);
 void composed_insert(struct composed **root, struct composed *node);

 void composed_free(struct composed *root);
--- a/doc/foot-ctlseqs.7.scd
+++ b/doc/foot-ctlseqs.7.scd
@ -729,7 +729,10 @@ All _OSC_ sequences begin with *\\E]*, sometimes abbreviated _OSC_.
 :  Copy _Pd_ (base64 encoded text) to the clipboard. _Pc_ denotes the
   target: *c* targets the clipboard and *s* and *p* the primary
   selection.
-|  \\E] 99 ; _params_ ; _payload_ \\E\\
+|  \\E] 66 ; _params_ ; text \\E\\ 
+:  kitty
+:  Text sizing protocol (only 'w', width, supported)
+|  \\E] 99 ; _params_ ; _payload_ \\E\\ 
 :  kitty
 :  Desktop notification; uses *desktop-notifications.command* in
   *foot.ini*(5).
--- a/grid.c
+++ b/grid.c
@ -1052,16 +1052,16 @@ grid_resize_and_reflow(
                     */
                    while (
                        unlikely(
-                            amount > 1 &&
+                            amount > 0 &&
                            from + amount < old_cols &&
                            old_row->cells[from + amount].wc >= CELL_SPACER + 1))
                    {
+                        spacers = old_row->cells[from + amount].wc - CELL_SPACER + 1;
                        amount--;
-                        spacers++;
                    }

                    xassert(
-                        amount == 1 ||
+                        amount <= 1 ||
                        old_row->cells[from + amount - 1].wc <= CELL_SPACER + 1);
                }

@ -1084,11 +1084,9 @@ grid_resize_and_reflow(
                if (unlikely(spacers > 0)) {
                    xassert(new_col_idx + spacers == new_cols);

-                    const struct cell *cell = &old_row->cells[from - 1];
-
                    for (int i = 0; i < spacers; i++, new_col_idx++) {
                        new_row->cells[new_col_idx].wc = CELL_SPACER;
-                        new_row->cells[new_col_idx].attrs = cell->attrs;
+                        new_row->cells[new_col_idx].attrs = (struct attributes){0};
                    }
                }
            }
--- a/osc.c
+++ b/osc.c
@ -610,7 +610,6 @@ verify_kitty_id_is_valid(const char *id)
 }
 UNIGNORE_WARNINGS

-
 static void
 kitty_notification(struct terminal *term, char *string)
 {
@ -1135,6 +1134,134 @@ out:
    free(sound_name);
 }

+static void
+kitty_text_size(struct terminal *term, char *string)
+{
+    char *text = strchr(string, ';');
+    if (text == NULL)
+        return;
+
+    char *parameters = string;
+    *text = '\0';
+    text++;
+
+    char32_t *wchars = ambstoc32(text);
+    if (wchars == NULL)
+        return;
+
+    int forced_width = 0;
+
+    char *ctx = NULL;
+    for (char *param = strtok_r(parameters, ":", &ctx);
+         param != NULL;
+         param = strtok_r(NULL, ":", &ctx))
+    {
+        /* All parameters are on the form X=value, where X is always
+           exactly one character */
+        if (param[0] == '\0' || param[1] != '=')
+            continue;
+
+        char *value = &param[2];
+
+        switch (param[0]) {
+        case 'w': {
+            errno = 0;
+            char *end = NULL;
+            unsigned long w = strtoul(value, &end, 10);
+
+            if (*end == '\0' && errno == 0 && w <= 7) {
+                forced_width = (int)w;
+                break;
+            } else
+                LOG_ERR("OSC-66: invalid 'w' value, ignoring");
+            break;
+        }
+
+        case 's':
+        case 'n':
+        case 'd':
+        case 'v':
+            LOG_WARN("OSC-66: unsupported: '%c' parameter, ignoring", param[0]);
+            break;
+        }
+    }
+
+    const size_t len = c32len(wchars);
+
+    if (forced_width == 0) {
+        /*
+         * w=0 means we split the text up as we'd normally do... Since
+         * we don't support any other parameters of the text-sizing
+         * protocol, that means we just process the string as if it
+         * has been printed without this OSC.
+         */
+        for (size_t i = 0; i < len; i++)
+            term_process_and_print_non_ascii(term, wchars[i]);
+        free(wchars);
+        return;
+    }
+
+    size_t max_cp_width = 0;
+    size_t all_cp_width = 0;
+
+    for (size_t i = 0; i < len; i++) {
+        const size_t cp_width = c32width(wchars[i]);
+        all_cp_width += cp_width;
+        max_cp_width = max(max_cp_width, cp_width);
+    }
+
+    size_t calculated_width = 0;
+    switch (term->conf->tweak.grapheme_width_method) {
+    case GRAPHEME_WIDTH_WCSWIDTH: calculated_width = all_cp_width; break;
+    case GRAPHEME_WIDTH_MAX:      calculated_width = max_cp_width; break;
+    case GRAPHEME_WIDTH_DOUBLE:   calculated_width = min(max_cp_width, 2); break;
+    }
+
+    const size_t width = forced_width == 0 ? calculated_width : forced_width;
+
+    LOG_DBG("len=%zu, forced=%d, calculated=%zu, using=%zu",
+            len, forced_width, calculated_width, width);
+
+#if 0
+    if (len == 1 && calculated_width == forced_width) {
+        /*
+         * Optimization: if there's a single codepoint, and either
+         * w=0, or the 'w' matches the calculated width, print
+         * codepoint directly instead of creating a combining
+         * character.
+         */
+        term_print(term, wchars[0], width);
+        free(wchars);
+        return;
+    }
+#endif
+
+    uint32_t key = composed_key_from_chars(wchars, len);
+
+    const struct composed *composed = composed_lookup_without_collision(
+        term->composed, &key, wchars, len - 1, wchars[len - 1], forced_width);
+
+    if (composed == NULL) {
+        struct composed *new_cc = xmalloc(sizeof(*new_cc));
+        new_cc->chars = wchars;
+        new_cc->count = len;
+        new_cc->key = key;
+        new_cc->width = width;
+        new_cc->forced_width = forced_width;
+
+        term->composed_count++;
+        composed_insert(&term->composed, new_cc);
+        composed = new_cc;
+    } else if (composed->width == width) {
+        free(wchars);
+    }
+
+    term_print(
+        term, CELL_COMB_CHARS_LO + composed->key,
+        composed->forced_width > 0 ? composed->forced_width : composed->width,
+        false);
+}
+
 void
 osc_dispatch(struct terminal *term)
 {
@ -1371,6 +1498,10 @@ osc_dispatch(struct terminal *term)
        osc_selection(term, string);
        break;

+    case 66:  /* text-size protocol (kitty) */
+        kitty_text_size(term, string);
+        break;
+
    case 99:  /* Kitty notifications */
        kitty_notification(term, string);
        break;
--- a/render.c
+++ b/render.c
@ -869,11 +869,16 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag
            }

            if (grapheme != NULL) {
-                cell_cols = composed->width;
+                const int forced_width = composed->forced_width;
+
+                cell_cols = forced_width > 0 ? forced_width : composed->width;

                composed = NULL;
                glyphs = grapheme->glyphs;
                glyph_count = grapheme->count;
+
+                if (forced_width > 0)
+                    glyph_count = min(glyph_count, forced_width);
            }
        }

@ -890,7 +895,9 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag
                } else {
                    glyph_count = 1;
                    glyphs = &single;
-                    cell_cols = single->cols;
+
+                    const size_t forced_width = composed != NULL ? composed->forced_width : 0;
+                    cell_cols = forced_width > 0 ? forced_width : single->cols;
                }
            }
        }
@ -972,7 +979,7 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag
        int g_x = glyph->x;
        int g_y = glyph->y;

-        if (i > 0 && glyph->x >= 0)
+        if (i > 0 && glyph->x >= 0 && cell_cols == 1)
            g_x -= term->cell_width;

        if (unlikely(pixman_image_get_format(glyph->pix) == PIXMAN_a8r8g8b8)) {
@ -993,9 +1000,9 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag
            if (composed != NULL) {
                assert(glyph_count == 1);

-                for (size_t i = 1; i < composed->count; i++) {
+                for (size_t j = 1; j < composed->count; j++) {
                    const struct fcft_glyph *g = fcft_rasterize_char_utf32(
-                        font, composed->chars[i], term->font_subpixel);
+                        font, composed->chars[j], term->font_subpixel);

                    if (g == NULL)
                        continue;
@ -1017,22 +1024,26 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag
                     * somewhat deal with double-width glyphs we use
                     * an offset of *one* cell.
                     */
-                    int x_ofs = g->x < 0
-                        ? cell_cols * term->cell_width
-                        : (cell_cols - 1) * term->cell_width;
+                    int x_ofs = cell_cols == 1
+                        ? g->x < 0
+                            ? cell_cols * term->cell_width
+                            : (cell_cols - 1) * term->cell_width
+                        : 0;
+
+                    if (cell_cols > 1)
+                        pen_x += term->cell_width;

                    pixman_image_composite32(
                        PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
                        /* Some fonts use a negative offset, while others use a
                         * "normal" offset */
-                        pen_x + x_ofs + g->x,
-                        y + term->font_baseline - g->y,
-                        g->width, g->height);
+                        pen_x + letter_x_ofs + x_ofs + g->x,
+                        y + term->font_baseline - g->y, g->width, g->height);
                }
            }
        }

-        pen_x += glyph->advance.x;
+        pen_x += cell_cols > 1 ? term->cell_width : glyph->advance.x;
    }

    pixman_image_unref(clr_pix);
@ -4398,7 +4409,7 @@ render_resize(struct terminal *term, int width, int height, uint8_t opts)
    }

    /* Don't shrink grid too much */
-    const int min_cols = 2;
+    const int min_cols = 7;  /* See OSC-66 */
    const int min_rows = 1;

    /* Minimum window size (must be divisible by the scaling factor)*/
--- a/terminal.c
+++ b/terminal.c
@ -27,6 +27,7 @@
 #include "commands.h"
 #include "config.h"
 #include "debug.h"
+#include "emoji-variation-sequences.h"
 #include "extract.h"
 #include "grid.h"
 #include "ime.h"
@ -3826,7 +3827,7 @@ print_spacer(struct terminal *term, int col, int remaining)
    struct cell *cell = &row->cells[col];

    cell->wc = CELL_SPACER + remaining;
-    cell->attrs = term->vt.attrs;
+    cell->attrs = (struct attributes){0};
 }

 /*
@ -4073,6 +4074,260 @@ term_single_shift(struct terminal *term, enum charset_designator idx)
    term->ascii_printer = &ascii_printer_single_shift;
 }

+#if defined(FOOT_GRAPHEME_CLUSTERING)
+static int
+emoji_vs_compare(const void *_key, const void *_entry)
+{
+    const struct emoji_vs *key = _key;
+    const struct emoji_vs *entry = _entry;
+
+    uint32_t cp = key->start;
+
+    if (cp < entry->start)
+        return -1;
+    else if (cp > entry->end)
+        return 1;
+    else
+        return 0;
+}
+
+UNITTEST
+{
+    /* Verify the emoji_vs list is sorted */
+    int64_t last_end = -1;
+
+    for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
+        const struct emoji_vs *vs = &emoji_vs[i];
+        xassert(vs->start <= vs->end);
+        xassert(vs->start > last_end);
+        xassert(vs->vs15 || vs->vs16);
+        last_end = vs->end;
+    }
+}
+#endif
+
+void
+term_process_and_print_non_ascii(struct terminal *term, char32_t wc)
+{
+    int width = c32width(wc);
+    bool insert_mode_disable = false;
+    const bool grapheme_clustering = term->grapheme_shaping;
+
+#if !defined(FOOT_GRAPHEME_CLUSTERING)
+    xassert(!grapheme_clustering);
+#endif
+
+    if (term->grid->cursor.point.col > 0 &&
+        (grapheme_clustering ||
+         (!grapheme_clustering && width == 0 && wc >= 0x300)))
+    {
+        int col = term->grid->cursor.point.col;
+        if (!term->grid->cursor.lcf)
+            col--;
+
+        /* Skip past spacers */
+        struct row *row = term->grid->cur_row;
+        while (row->cells[col].wc >= CELL_SPACER && col > 0)
+            col--;
+
+        xassert(col >= 0 && col < term->cols);
+        char32_t base = row->cells[col].wc;
+        char32_t UNUSED last = base;
+
+        /* Is base cell already a cluster? */
+        const struct composed *composed =
+            (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
+            ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
+            : NULL;
+
+        uint32_t key;
+
+        if (composed != NULL) {
+            base = composed->chars[0];
+            last = composed->chars[composed->count - 1];
+            key = composed_key_from_key(composed->key, wc);
+        } else
+            key = composed_key_from_key(base, wc);
+
+#if defined(FOOT_GRAPHEME_CLUSTERING)
+        if (grapheme_clustering) {
+            /* Check if we're on a grapheme cluster break */
+            if (utf8proc_grapheme_break_stateful(
+                    last, wc, &term->vt.grapheme_state))
+            {
+                term_reset_grapheme_state(term);
+                goto out;
+            }
+        }
+#endif
+
+        int base_width = c32width(base);
+        if (base_width > 0) {
+            term->grid->cursor.point.col = col;
+            term->grid->cursor.lcf = false;
+            insert_mode_disable = true;
+
+            if (composed == NULL) {
+                bool base_from_primary;
+                bool comb_from_primary;
+                bool pre_from_primary;
+
+                char32_t precomposed = term->fonts[0] != NULL
+                    ? fcft_precompose(
+                        term->fonts[0], base, wc, &base_from_primary,
+                        &comb_from_primary, &pre_from_primary)
+                    : (char32_t)-1;
+
+                int precomposed_width = c32width(precomposed);
+
+                /*
+                 * Only use the pre-composed character if:
+                 *
+                 *  1. we *have* a pre-composed character
+                 *  2. the width matches the base characters width
+                 *  3. it's in the primary font, OR one of the base or
+                 *     combining characters are *not* from the primary
+                 *     font
+                 */
+
+                if (precomposed != (char32_t)-1 &&
+                    precomposed_width == base_width &&
+                    (pre_from_primary ||
+                     !base_from_primary ||
+                     !comb_from_primary))
+                {
+                    wc = precomposed;
+                    width = precomposed_width;
+                    term_reset_grapheme_state(term);
+                    goto out;
+                }
+            }
+
+            size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
+            if (wanted_count > 255) {
+                xassert(composed != NULL);
+
+#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
+                LOG_WARN("combining character overflow:");
+                LOG_WARN("  base: 0x%04x", composed->chars[0]);
+                for (size_t i = 1; i < composed->count; i++)
+                    LOG_WARN("    cc: 0x%04x", composed->chars[i]);
+                LOG_ERR("   new: 0x%04x", wc);
+#endif
+                /* This is going to break anyway... */
+                wanted_count--;
+            }
+
+            xassert(wanted_count <= 255);
+
+            /* Check if we already have a match for the entire compose chain */
+            const struct composed *cc =
+                composed_lookup_without_collision(
+                    term->composed, &key,
+                    composed != NULL ? composed->chars : &(char32_t){base},
+                    composed != NULL ? composed->count : 1,
+                    wc, 0);
+
+            if (cc != NULL) {
+                /* We *do* have a match! */
+                wc = CELL_COMB_CHARS_LO + cc->key;
+                width = cc->width;
+                goto out;
+            } else {
+                /* No match - allocate a new chain below */
+            }
+
+            if (unlikely(term->composed_count >=
+                         (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
+            {
+                /* We reached our maximum number of allowed composed
+                 * character chains. Fall through here and print the
+                 * current zero-width character to the current cell */
+                LOG_WARN("maximum number of composed characters reached");
+                term_reset_grapheme_state(term);
+                goto out;
+            }
+
+            /* Allocate new chain */
+            struct composed *new_cc = xmalloc(sizeof(*new_cc));
+            new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
+            new_cc->key = key;
+            new_cc->count = wanted_count;
+            new_cc->chars[0] = base;
+            new_cc->chars[wanted_count - 1] = wc;
+            new_cc->forced_width = composed != NULL ? composed->forced_width : 0;
+
+            if (composed != NULL) {
+                memcpy(&new_cc->chars[1], &composed->chars[1],
+                       (wanted_count - 2) * sizeof(new_cc->chars[0]));
+            }
+
+            const int grapheme_width =
+                composed != NULL ? composed->width : base_width;
+
+            switch (term->conf->tweak.grapheme_width_method) {
+            case GRAPHEME_WIDTH_MAX:
+                new_cc->width = max(grapheme_width, width);
+                break;
+
+            case GRAPHEME_WIDTH_DOUBLE:
+                new_cc->width = min(grapheme_width + width, 2);
+
+#if defined(FOOT_GRAPHEME_CLUSTERING)
+                /* Handle VS-15 and VS-16 variation selectors */
+                if (unlikely(grapheme_clustering &&
+                             (wc == 0xfe0e || wc == 0xfe0f) &&
+                             new_cc->count == 2))
+                {
+                    const struct emoji_vs *vs =
+                        bsearch(
+                            &(struct emoji_vs){.start = new_cc->chars[0]},
+                            emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
+                            sizeof(struct emoji_vs),
+                            &emoji_vs_compare);
+
+                    if (vs != NULL) {
+                        xassert(new_cc->chars[0] >= vs->start &&
+                                new_cc->chars[0] <= vs->end);
+
+                        /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
+                        if (wc == 0xfe0e) {
+                            if (vs->vs15)
+                                new_cc->width = 1;
+                        } else if (wc == 0xfe0f) {
+                            if (vs->vs16)
+                                new_cc->width = 2;
+                        }
+                    }
+                }
+#endif
+
+                break;
+
+            case GRAPHEME_WIDTH_WCSWIDTH:
+                new_cc->width = grapheme_width + width;
+                break;
+            }
+
+            term->composed_count++;
+            composed_insert(&term->composed, new_cc);
+
+            wc = CELL_COMB_CHARS_LO + new_cc->key;
+            width = new_cc->forced_width > 0 ? new_cc->forced_width : new_cc->width;
+
+            xassert(wc >= CELL_COMB_CHARS_LO);
+            xassert(wc <= CELL_COMB_CHARS_HI);
+            goto out;
+        }
+    } else
+        term_reset_grapheme_state(term);
+
+
+out:
+    if (width > 0)
+        term_print(term, wc, width, insert_mode_disable);
+}
+
 enum term_surface
 term_surface_kind(const struct terminal *term, const struct wl_surface *surface)
 {
--- a/terminal.h
+++ b/terminal.h
@ -894,6 +894,7 @@ void term_cursor_up(struct terminal *term, int count);
 void term_cursor_down(struct terminal *term, int count);
 void term_cursor_blink_update(struct terminal *term);

+void term_process_and_print_non_ascii(struct terminal *term, char32_t wc);
 void term_print(struct terminal *term, char32_t wc, int width,
                bool insert_mode_disable);
 void term_fill(struct terminal *term, int row, int col, uint8_t c, size_t count,
--- a/vt.c
+++ b/vt.c
@ -16,7 +16,6 @@
 #include "csi.h"
 #include "dcs.h"
 #include "debug.h"
-#include "emoji-variation-sequences.h"
 #include "osc.h"
 #include "sixel.h"
 #include "util.h"
@ -647,316 +646,10 @@ action_put(struct terminal *term, uint8_t c)
    dcs_put(term, c);
 }

-static inline uint32_t
-chain_key(uint32_t old_key, uint32_t new_wc)
-{
-    unsigned bits = 32 - __builtin_clz(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO);
-
-    /* Rotate old key 8 bits */
-    uint32_t new_key = (old_key << 8) | (old_key >> (bits - 8));
-
-    /* xor with new char */
-    new_key ^= new_wc;
-
-    /* Multiply with magic hash constant */
-    new_key *= 2654435761ul;
-
-    /* And mask, to ensure the new value is within range */
-    new_key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
-
-    return new_key;
-}
-
-#if defined(FOOT_GRAPHEME_CLUSTERING)
-static int
-emoji_vs_compare(const void *_key, const void *_entry)
-{
-    const struct emoji_vs *key = _key;
-    const struct emoji_vs *entry = _entry;
-
-    uint32_t cp = key->start;
-
-    if (cp < entry->start)
-        return -1;
-    else if (cp > entry->end)
-        return 1;
-    else
-        return 0;
-}
-
-UNITTEST
-{
-    /* Verify the emoji_vs list is sorted */
-    int64_t last_end = -1;
-
-    for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
-        const struct emoji_vs *vs = &emoji_vs[i];
-        xassert(vs->start <= vs->end);
-        xassert(vs->start > last_end);
-        xassert(vs->vs15 || vs->vs16);
-        last_end = vs->end;
-    }
-}
-#endif
-
 static void
 action_utf8_print(struct terminal *term, char32_t wc)
 {
-    int width = c32width(wc);
-    bool insert_mode_disable = false;
-    const bool grapheme_clustering = term->grapheme_shaping;
-
-#if !defined(FOOT_GRAPHEME_CLUSTERING)
-    xassert(!grapheme_clustering);
-#endif
-
-    if (term->grid->cursor.point.col > 0 &&
-        (grapheme_clustering ||
-         (!grapheme_clustering && width == 0 && wc >= 0x300)))
-    {
-        int col = term->grid->cursor.point.col;
-        if (!term->grid->cursor.lcf)
-            col--;
-
-        /* Skip past spacers */
-        struct row *row = term->grid->cur_row;
-        while (row->cells[col].wc >= CELL_SPACER && col > 0)
-            col--;
-
-        xassert(col >= 0 && col < term->cols);
-        char32_t base = row->cells[col].wc;
-        char32_t UNUSED last = base;
-
-        /* Is base cell already a cluster? */
-        const struct composed *composed =
-            (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
-            ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
-            : NULL;
-
-        uint32_t key;
-
-        if (composed != NULL) {
-            base = composed->chars[0];
-            last = composed->chars[composed->count - 1];
-            key = chain_key(composed->key, wc);
-        } else
-            key = chain_key(base, wc);
-
-#if defined(FOOT_GRAPHEME_CLUSTERING)
-        if (grapheme_clustering) {
-            /* Check if we're on a grapheme cluster break */
-            if (utf8proc_grapheme_break_stateful(
-                    last, wc, &term->vt.grapheme_state))
-            {
-                term_reset_grapheme_state(term);
-                goto out;
-            }
-        }
-#endif
-
-        int base_width = c32width(base);
-        if (base_width > 0) {
-            term->grid->cursor.point.col = col;
-            term->grid->cursor.lcf = false;
-            insert_mode_disable = true;
-
-            if (composed == NULL) {
-                bool base_from_primary;
-                bool comb_from_primary;
-                bool pre_from_primary;
-
-                char32_t precomposed = term->fonts[0] != NULL
-                    ? fcft_precompose(
-                        term->fonts[0], base, wc, &base_from_primary,
-                        &comb_from_primary, &pre_from_primary)
-                    : (char32_t)-1;
-
-                int precomposed_width = c32width(precomposed);
-
-                /*
-                 * Only use the pre-composed character if:
-                 *
-                 *  1. we *have* a pre-composed character
-                 *  2. the width matches the base characters width
-                 *  3. it's in the primary font, OR one of the base or
-                 *     combining characters are *not* from the primary
-                 *     font
-                 */
-
-                if (precomposed != (char32_t)-1 &&
-                    precomposed_width == base_width &&
-                    (pre_from_primary ||
-                     !base_from_primary ||
-                     !comb_from_primary))
-                {
-                    wc = precomposed;
-                    width = precomposed_width;
-                    term_reset_grapheme_state(term);
-                    goto out;
-                }
-            }
-
-            size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
-            if (wanted_count > 255) {
-                xassert(composed != NULL);
-
-#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
-                LOG_WARN("combining character overflow:");
-                LOG_WARN("  base: 0x%04x", composed->chars[0]);
-                for (size_t i = 1; i < composed->count; i++)
-                    LOG_WARN("    cc: 0x%04x", composed->chars[i]);
-                LOG_ERR("   new: 0x%04x", wc);
-#endif
-                /* This is going to break anyway... */
-                wanted_count--;
-            }
-
-            xassert(wanted_count <= 255);
-
-            size_t collision_count = 0;
-
-            /* Look for existing combining chain */
-            while (true) {
-                if (unlikely(collision_count > 128)) {
-                    static bool have_warned = false;
-                    if (!have_warned) {
-                        have_warned = true;
-                        LOG_WARN("ignoring composed character: "
-                                 "too many collisions in hash table");
-                    }
-                    return;
-                }
-
-                const struct composed *cc = composed_lookup(term->composed, key);
-                if (cc == NULL)
-                    break;
-
-                /*
-                 * We may have a key collisison, so need to check that
-                 * it's a true match. If not, bump the key and try
-                 * again.
-                 */
-
-                xassert(key == cc->key);
-                if (cc->chars[0] != base ||
-                    cc->count != wanted_count ||
-                    cc->chars[wanted_count - 1] != wc)
-                {
-#if 0
-                    LOG_WARN("COLLISION: base: %04x/%04x, count: %d/%zu, last: %04x/%04x",
-                             cc->chars[0], base, cc->count, wanted_count, cc->chars[wanted_count - 1], wc);
-#endif
-                    key++;
-                    key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
-                    collision_count++;
-                    continue;
-                }
-
-                bool match = composed != NULL
-                    ? memcmp(&cc->chars[1], &composed->chars[1],
-                             (wanted_count - 2) * sizeof(cc->chars[0])) == 0
-                    : true;
-
-                if (!match) {
-                    key++;
-                    key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
-                    collision_count++;
-                    continue;
-                }
-
-                wc = CELL_COMB_CHARS_LO + cc->key;
-                width = cc->width;
-                goto out;
-            }
-
-            if (unlikely(term->composed_count >=
-                         (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
-            {
-                /* We reached our maximum number of allowed composed
-                 * character chains. Fall through here and print the
-                 * current zero-width character to the current cell */
-                LOG_WARN("maximum number of composed characters reached");
-                term_reset_grapheme_state(term);
-                goto out;
-            }
-
-            /* Allocate new chain */
-            struct composed *new_cc = xmalloc(sizeof(*new_cc));
-            new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
-            new_cc->key = key;
-            new_cc->count = wanted_count;
-            new_cc->chars[0] = base;
-            new_cc->chars[wanted_count - 1] = wc;
-
-            if (composed != NULL) {
-                memcpy(&new_cc->chars[1], &composed->chars[1],
-                       (wanted_count - 2) * sizeof(new_cc->chars[0]));
-            }
-
-            const int grapheme_width =
-                composed != NULL ? composed->width : base_width;
-
-            switch (term->conf->tweak.grapheme_width_method) {
-            case GRAPHEME_WIDTH_MAX:
-                new_cc->width = max(grapheme_width, width);
-                break;
-
-            case GRAPHEME_WIDTH_DOUBLE:
-                new_cc->width = min(grapheme_width + width, 2);
-
-#if defined(FOOT_GRAPHEME_CLUSTERING)
-                /* Handle VS-15 and VS-16 variation selectors */
-                if (unlikely(grapheme_clustering &&
-                             (wc == 0xfe0e || wc == 0xfe0f) &&
-                             new_cc->count == 2))
-                {
-                    const struct emoji_vs *vs =
-                        bsearch(
-                            &(struct emoji_vs){.start = new_cc->chars[0]},
-                            emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
-                            sizeof(struct emoji_vs),
-                            &emoji_vs_compare);
-
-                    if (vs != NULL) {
-                        xassert(new_cc->chars[0] >= vs->start &&
-                                new_cc->chars[0] <= vs->end);
-
-                        /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
-                        if (wc == 0xfe0e) {
-                            if (vs->vs15)
-                                new_cc->width = 1;
-                        } else if (wc == 0xfe0f) {
-                            if (vs->vs16)
-                                new_cc->width = 2;
-                        }
-                    }
-                }
-#endif
-
-                break;
-
-            case GRAPHEME_WIDTH_WCSWIDTH:
-                new_cc->width = grapheme_width + width;
-                break;
-            }
-
-            term->composed_count++;
-            composed_insert(&term->composed, new_cc);
-
-            wc = CELL_COMB_CHARS_LO + key;
-            width = new_cc->width;
-
-            xassert(wc >= CELL_COMB_CHARS_LO);
-            xassert(wc <= CELL_COMB_CHARS_HI);
-            goto out;
-        }
-    } else
-        term_reset_grapheme_state(term);
-
-
-out:
-    if (width > 0)
-        term_print(term, wc, width, insert_mode_disable);
+    term_process_and_print_non_ascii(term, wc);
 }

 static void