From 1181f74d19f6f9e881b539ed9fdb8cc17d03f7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Fri, 24 Jan 2025 09:52:57 +0100 Subject: [PATCH 01/13] composed: re-factor: break out key calculation from vt.c --- composed.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ composed.h | 3 +++ vt.c | 24 ++---------------------- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/composed.c b/composed.c index 442325ea..7a36275e 100644 --- a/composed.c +++ b/composed.c @@ -4,6 +4,52 @@ #include #include "debug.h" +#include "terminal.h" + +uint32_t +composed_key_from_chars(const uint32_t chars[], size_t count) +{ + if (count == 0) + return 0; + + uint32_t key = chars[0]; + for (size_t i = 1; i < count; i++) + key = composed_key_from_key(key, chars[i]); + + return key; +} + +uint32_t +composed_key_from_key(uint32_t prev_key, uint32_t next_char) +{ + unsigned bits = 32 - __builtin_clz(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO); + + /* Rotate old key 8 bits */ + uint32_t new_key = (prev_key << 8) | (prev_key >> (bits - 8)); + + /* xor with new char */ + new_key ^= next_char; + + /* Multiply with magic hash constant */ + new_key *= 2654435761ul; + + /* And mask, to ensure the new value is within range */ + new_key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; + return new_key; +} + +UNITTEST +{ + const char32_t chars[] = U"abcdef"; + + uint32_t k1 = composed_key_from_key(chars[0], chars[1]); + uint32_t k2 = composed_key_from_chars(chars, 2); + xassert(k1 == k2); + + uint32_t k3 = composed_key_from_key(k2, chars[2]); + uint32_t k4 = composed_key_from_chars(chars, 3); + xassert(k3 == k4); +} struct composed * composed_lookup(struct composed *root, uint32_t key) diff --git a/composed.h b/composed.h index 17158407..fcaf87d4 100644 --- a/composed.h +++ b/composed.h @@ -12,6 +12,9 @@ struct composed { uint8_t width; }; +uint32_t composed_key_from_chars(const uint32_t chars[], size_t count); +uint32_t composed_key_from_key(uint32_t prev_key, uint32_t next_char); + struct composed *composed_lookup(struct composed *root, uint32_t key); void composed_insert(struct composed **root, struct composed *node); diff --git a/vt.c b/vt.c index bd1cf4ca..8f5d27d9 100644 --- a/vt.c +++ b/vt.c @@ -647,26 +647,6 @@ action_put(struct terminal *term, uint8_t c) dcs_put(term, c); } -static inline uint32_t -chain_key(uint32_t old_key, uint32_t new_wc) -{ - unsigned bits = 32 - __builtin_clz(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO); - - /* Rotate old key 8 bits */ - uint32_t new_key = (old_key << 8) | (old_key >> (bits - 8)); - - /* xor with new char */ - new_key ^= new_wc; - - /* Multiply with magic hash constant */ - new_key *= 2654435761ul; - - /* And mask, to ensure the new value is within range */ - new_key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; - - return new_key; -} - #if defined(FOOT_GRAPHEME_CLUSTERING) static int emoji_vs_compare(const void *_key, const void *_entry) @@ -738,9 +718,9 @@ action_utf8_print(struct terminal *term, char32_t wc) if (composed != NULL) { base = composed->chars[0]; last = composed->chars[composed->count - 1]; - key = chain_key(composed->key, wc); + key = composed_key_from_key(composed->key, wc); } else - key = chain_key(base, wc); + key = composed_key_from_key(base, wc); #if defined(FOOT_GRAPHEME_CLUSTERING) if (grapheme_clustering) { From e248e73753d61bfd24f1af8e824231434db63c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Fri, 24 Jan 2025 14:15:01 +0100 Subject: [PATCH 02/13] composed: refactor: break out lookup with collision detection --- composed.c | 37 ++++++++++++++++++++++++++++++- composed.h | 6 ++++- vt.c | 64 +++++++++++------------------------------------------- 3 files changed, 54 insertions(+), 53 deletions(-) diff --git a/composed.c b/composed.c index 7a36275e..2d9ed47d 100644 --- a/composed.c +++ b/composed.c @@ -51,7 +51,7 @@ UNITTEST xassert(k3 == k4); } -struct composed * +const struct composed * composed_lookup(struct composed *root, uint32_t key) { struct composed *node = root; @@ -66,6 +66,41 @@ composed_lookup(struct composed *root, uint32_t key) return NULL; } +const struct composed * +composed_lookup_without_collision(struct composed *root, uint32_t *key, + const char32_t *prefix_text, size_t prefix_len, + char32_t wc, int forced_width) +{ + while (true) { + const struct composed *cc = composed_lookup(root, *key); + if (cc == NULL) + return NULL; + + bool match = cc->count == prefix_len + 1 && + cc->forced_width == forced_width && + cc->chars[prefix_len] == wc; + + if (match) { + for (size_t i = 0; i < prefix_len; i++) { + if (cc->chars[i] != prefix_text[i]) { + match = false; + break; + } + } + } + + if (match) + return cc; + + (*key)++; + *key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; + + /* TODO: this will loop infinitly if the composed table is full */ + } + + return NULL; +} + void composed_insert(struct composed **root, struct composed *node) { diff --git a/composed.h b/composed.h index fcaf87d4..18afb146 100644 --- a/composed.h +++ b/composed.h @@ -10,12 +10,16 @@ struct composed { uint32_t key; uint8_t count; uint8_t width; + uint8_t forced_width; }; uint32_t composed_key_from_chars(const uint32_t chars[], size_t count); uint32_t composed_key_from_key(uint32_t prev_key, uint32_t next_char); -struct composed *composed_lookup(struct composed *root, uint32_t key); +const struct composed *composed_lookup(struct composed *root, uint32_t key); +const struct composed *composed_lookup_without_collision( + struct composed *root, uint32_t *key, + const char32_t *prefix, size_t prefix_len, char32_t wc, int forced_width); void composed_insert(struct composed **root, struct composed *node); void composed_free(struct composed *root); diff --git a/vt.c b/vt.c index 8f5d27d9..5447493a 100644 --- a/vt.c +++ b/vt.c @@ -793,60 +793,21 @@ action_utf8_print(struct terminal *term, char32_t wc) xassert(wanted_count <= 255); - size_t collision_count = 0; - - /* Look for existing combining chain */ - while (true) { - if (unlikely(collision_count > 128)) { - static bool have_warned = false; - if (!have_warned) { - have_warned = true; - LOG_WARN("ignoring composed character: " - "too many collisions in hash table"); - } - return; - } - - const struct composed *cc = composed_lookup(term->composed, key); - if (cc == NULL) - break; - - /* - * We may have a key collisison, so need to check that - * it's a true match. If not, bump the key and try - * again. - */ - - xassert(key == cc->key); - if (cc->chars[0] != base || - cc->count != wanted_count || - cc->chars[wanted_count - 1] != wc) - { -#if 0 - LOG_WARN("COLLISION: base: %04x/%04x, count: %d/%zu, last: %04x/%04x", - cc->chars[0], base, cc->count, wanted_count, cc->chars[wanted_count - 1], wc); -#endif - key++; - key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; - collision_count++; - continue; - } - - bool match = composed != NULL - ? memcmp(&cc->chars[1], &composed->chars[1], - (wanted_count - 2) * sizeof(cc->chars[0])) == 0 - : true; - - if (!match) { - key++; - key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; - collision_count++; - continue; - } + /* Check if we already have a match for the entire compose chain */ + const struct composed *cc = + composed_lookup_without_collision( + term->composed, &key, + composed != NULL ? composed->chars : &(char32_t){base}, + composed != NULL ? composed->count : 1, + wc, 0); + if (cc != NULL) { + /* We *do* have a match! */ wc = CELL_COMB_CHARS_LO + cc->key; width = cc->width; goto out; + } else { + /* No match - allocate a new chain below */ } if (unlikely(term->composed_count >= @@ -867,6 +828,7 @@ action_utf8_print(struct terminal *term, char32_t wc) new_cc->count = wanted_count; new_cc->chars[0] = base; new_cc->chars[wanted_count - 1] = wc; + new_cc->forced_width = 0; if (composed != NULL) { memcpy(&new_cc->chars[1], &composed->chars[1], @@ -923,7 +885,7 @@ action_utf8_print(struct terminal *term, char32_t wc) term->composed_count++; composed_insert(&term->composed, new_cc); - wc = CELL_COMB_CHARS_LO + key; + wc = CELL_COMB_CHARS_LO + new_cc->key; width = new_cc->width; xassert(wc >= CELL_COMB_CHARS_LO); From 1111f7e918a3b41512d11023ef5bf9585fa30eb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 25 Jan 2025 14:06:30 +0100 Subject: [PATCH 03/13] grid: reflow: handle composed characters longer than 2 cells The logic that tries to ensure we don't break a line in the middle of a multi-cell character was flawed when the number of cells were larger than 2. In particular, if the number of cells to copy were limited by the number of cells left on the current (new) line, and were less than the length of the multi-cell character, then we failed to insert the correct number of spacers, and also ended up misplacing the multi-cell character; instead of pushing it to the next line, it was inserted on the current line, even though it doesn't fit. Also change how trailing SPACER cells are rendered (cells that are "fillers" at then end of a line, when a multi-column character was pushed over to the next line): don't copy the previous cell's attributes (which may be wrong anyway), use default attributes instead. --- grid.c | 8 +++----- terminal.c | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/grid.c b/grid.c index b7c0447c..eb24869d 100644 --- a/grid.c +++ b/grid.c @@ -1052,7 +1052,7 @@ grid_resize_and_reflow( */ while ( unlikely( - amount > 1 && + amount > 0 && from + amount < old_cols && old_row->cells[from + amount].wc >= CELL_SPACER + 1)) { @@ -1061,7 +1061,7 @@ grid_resize_and_reflow( } xassert( - amount == 1 || + amount <= 1 || old_row->cells[from + amount - 1].wc <= CELL_SPACER + 1); } @@ -1084,11 +1084,9 @@ grid_resize_and_reflow( if (unlikely(spacers > 0)) { xassert(new_col_idx + spacers == new_cols); - const struct cell *cell = &old_row->cells[from - 1]; - for (int i = 0; i < spacers; i++, new_col_idx++) { new_row->cells[new_col_idx].wc = CELL_SPACER; - new_row->cells[new_col_idx].attrs = cell->attrs; + new_row->cells[new_col_idx].attrs = (struct attributes){0}; } } } diff --git a/terminal.c b/terminal.c index b88a794e..bf70a37e 100644 --- a/terminal.c +++ b/terminal.c @@ -3826,7 +3826,7 @@ print_spacer(struct terminal *term, int col, int remaining) struct cell *cell = &row->cells[col]; cell->wc = CELL_SPACER + remaining; - cell->attrs = term->vt.attrs; + cell->attrs = (struct attributes){0}; } /* From 7a8d2b5e012636def9545075e01cdf9a6f309355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 25 Jan 2025 14:09:35 +0100 Subject: [PATCH 04/13] osc: wip: kitty text size protocol This brings initial support for the new kitty text-sizing protocol. Note hat only the width-parameter ('w') is supported. That is, no font scaling, and no multi-line cells. For now, only explicit widths are supported. That is, w=0 does not yet work. There are a couple of changes to the renderer, to handle e.g. OSC 66 ; w=6 ; foobar ST There are two ways this can get rendered, depending on whether grapheme shaping has been enabled. We either shape it, and get an array of glyphs back that we render. Or, we rasterize each codepoint ourselves, and render each resulting glyph. The two cases ends up in two different renderer loops, that worked somewhat different. In particular, the first case has probably never been tested/used at all... With this patch, both are changed, and now uses some heuristic to differentiate between multi-cell text strings (like in the example above), or single-cell combining characters. The difference is mainly in which offset to use for the secondary glyphs. In a multi-cell string, each glyph is mapped to its own cell, while in the combining case, we try to map all glyphs to the same cell. --- osc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- render.c | 37 +++++++++++++++++--------- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/osc.c b/osc.c index e335dc61..6d8bb40c 100644 --- a/osc.c +++ b/osc.c @@ -610,7 +610,6 @@ verify_kitty_id_is_valid(const char *id) } UNIGNORE_WARNINGS - static void kitty_notification(struct terminal *term, char *string) { @@ -1135,6 +1134,82 @@ out: free(sound_name); } +static void +kitty_text_size(struct terminal *term, char *string) +{ + char *text = strchr(string, ';'); + if (text == NULL) + return; + + char *parameters = string; + *text = '\0'; + text++; + + char32_t *wchars = ambstoc32(text); + if (wchars == NULL) + return; + + int width = 0; + + char *ctx = NULL; + for (char *param = strtok_r(parameters, ":", &ctx); + param != NULL; + param = strtok_r(NULL, ":", &ctx)) + { + /* All parameters are on the form X=value, where X is always + exactly one character */ + if (param[0] == '\0' || param[1] != '=') + continue; + + char *value = ¶m[2]; + + switch (param[0]) { + case 'w': { + errno = 0; + char *end = NULL; + unsigned long w = strtoul(value, &end, 10); + + if (*end == '\0' && errno == 0 && w <= 7) { + width = (int)w; + break; + } else + LOG_ERR("OSC-66: invalid 'w' value, ignoring"); + break; + } + + case 's': + case 'n': + case 'd': + case 'v': + LOG_WARN("OSC-66: unsupported: '%c' parameter, ignoring", param[0]); + break; + } + } + + const size_t len = c32len(wchars); + uint32_t key = composed_key_from_chars(wchars, len); + + const struct composed *composed = composed_lookup_without_collision( + term->composed, &key, wchars, len - 1, wchars[len - 1], width); + + if (composed == NULL) { + struct composed *new_cc = xmalloc(sizeof(*new_cc)); + new_cc->chars = wchars; + new_cc->count = len; + new_cc->key = key; + new_cc->width = width; + new_cc->forced_width = width; + + term->composed_count++; + composed_insert(&term->composed, new_cc); + composed = new_cc; + } else if (composed->width == width) { + free(wchars); + } + + term_print(term, CELL_COMB_CHARS_LO + composed->key, composed->forced_width > 0 ? composed->forced_width : composed->width); +} + void osc_dispatch(struct terminal *term) { @@ -1371,6 +1446,10 @@ osc_dispatch(struct terminal *term) osc_selection(term, string); break; + case 66: /* text-size protocol (kitty) */ + kitty_text_size(term, string); + break; + case 99: /* Kitty notifications */ kitty_notification(term, string); break; diff --git a/render.c b/render.c index 0cca0643..13e9d708 100644 --- a/render.c +++ b/render.c @@ -869,11 +869,16 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag } if (grapheme != NULL) { - cell_cols = composed->width; + const int forced_width = composed->forced_width; + + cell_cols = forced_width > 0 ? forced_width : composed->width; composed = NULL; glyphs = grapheme->glyphs; glyph_count = grapheme->count; + + if (forced_width > 0) + glyph_count = min(glyph_count, forced_width); } } @@ -890,7 +895,9 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag } else { glyph_count = 1; glyphs = &single; - cell_cols = single->cols; + + const size_t forced_width = composed != NULL ? composed->forced_width : 0; + cell_cols = forced_width > 0 ? forced_width : single->cols; } } } @@ -972,7 +979,7 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag int g_x = glyph->x; int g_y = glyph->y; - if (i > 0 && glyph->x >= 0) + if (i > 0 && glyph->x >= 0 && cell_cols == 1) g_x -= term->cell_width; if (unlikely(pixman_image_get_format(glyph->pix) == PIXMAN_a8r8g8b8)) { @@ -993,9 +1000,9 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag if (composed != NULL) { assert(glyph_count == 1); - for (size_t i = 1; i < composed->count; i++) { + for (size_t j = 1; j < composed->count; j++) { const struct fcft_glyph *g = fcft_rasterize_char_utf32( - font, composed->chars[i], term->font_subpixel); + font, composed->chars[j], term->font_subpixel); if (g == NULL) continue; @@ -1017,22 +1024,26 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_region32_t *damag * somewhat deal with double-width glyphs we use * an offset of *one* cell. */ - int x_ofs = g->x < 0 - ? cell_cols * term->cell_width - : (cell_cols - 1) * term->cell_width; + int x_ofs = cell_cols == 1 + ? g->x < 0 + ? cell_cols * term->cell_width + : (cell_cols - 1) * term->cell_width + : 0; + + if (cell_cols > 1) + pen_x += term->cell_width; pixman_image_composite32( PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, /* Some fonts use a negative offset, while others use a * "normal" offset */ - pen_x + x_ofs + g->x, - y + term->font_baseline - g->y, - g->width, g->height); + pen_x + letter_x_ofs + x_ofs + g->x, + y + term->font_baseline - g->y, g->width, g->height); } } } - pen_x += glyph->advance.x; + pen_x += cell_cols > 1 ? term->cell_width : glyph->advance.x; } pixman_image_unref(clr_pix); @@ -4398,7 +4409,7 @@ render_resize(struct terminal *term, int width, int height, uint8_t opts) } /* Don't shrink grid too much */ - const int min_cols = 2; + const int min_cols = 7; const int min_rows = 1; /* Minimum window size (must be divisible by the scaling factor)*/ From d3f692990ef66f550bb3a0ade2e84107cfbeca47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sun, 26 Jan 2025 07:33:53 +0100 Subject: [PATCH 05/13] term+vt: refactor: move "utf8" char processing to term_process_and_print_non_ascii() This function "prints" any non-ascii character (i.e. any character that ends up in the action_utf8_print() function in vt.c) to the grid. This includes grapheme cluster processing etc. action_utf8_print() now simply calls this function. This allows us to re-use the same functionality from other places (like the text-sizing protocol). --- osc.c | 5 +- terminal.c | 255 +++++++++++++++++++++++++++++++++++++++++++++++++++++ terminal.h | 1 + vt.c | 251 +--------------------------------------------------- 4 files changed, 261 insertions(+), 251 deletions(-) diff --git a/osc.c b/osc.c index 6d8bb40c..49bdba67 100644 --- a/osc.c +++ b/osc.c @@ -1207,7 +1207,10 @@ kitty_text_size(struct terminal *term, char *string) free(wchars); } - term_print(term, CELL_COMB_CHARS_LO + composed->key, composed->forced_width > 0 ? composed->forced_width : composed->width); + term_print( + term, CELL_COMB_CHARS_LO + composed->key, + composed->forced_width > 0 ? composed->forced_width : composed->width, + false); } void diff --git a/terminal.c b/terminal.c index bf70a37e..96a215ba 100644 --- a/terminal.c +++ b/terminal.c @@ -27,6 +27,7 @@ #include "commands.h" #include "config.h" #include "debug.h" +#include "emoji-variation-sequences.h" #include "extract.h" #include "grid.h" #include "ime.h" @@ -4073,6 +4074,260 @@ term_single_shift(struct terminal *term, enum charset_designator idx) term->ascii_printer = &ascii_printer_single_shift; } +#if defined(FOOT_GRAPHEME_CLUSTERING) +static int +emoji_vs_compare(const void *_key, const void *_entry) +{ + const struct emoji_vs *key = _key; + const struct emoji_vs *entry = _entry; + + uint32_t cp = key->start; + + if (cp < entry->start) + return -1; + else if (cp > entry->end) + return 1; + else + return 0; +} + +UNITTEST +{ + /* Verify the emoji_vs list is sorted */ + int64_t last_end = -1; + + for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) { + const struct emoji_vs *vs = &emoji_vs[i]; + xassert(vs->start <= vs->end); + xassert(vs->start > last_end); + xassert(vs->vs15 || vs->vs16); + last_end = vs->end; + } +} +#endif + +void +term_process_and_print_non_ascii(struct terminal *term, char32_t wc) +{ + int width = c32width(wc); + bool insert_mode_disable = false; + const bool grapheme_clustering = term->grapheme_shaping; + +#if !defined(FOOT_GRAPHEME_CLUSTERING) + xassert(!grapheme_clustering); +#endif + + if (term->grid->cursor.point.col > 0 && + (grapheme_clustering || + (!grapheme_clustering && width == 0 && wc >= 0x300))) + { + int col = term->grid->cursor.point.col; + if (!term->grid->cursor.lcf) + col--; + + /* Skip past spacers */ + struct row *row = term->grid->cur_row; + while (row->cells[col].wc >= CELL_SPACER && col > 0) + col--; + + xassert(col >= 0 && col < term->cols); + char32_t base = row->cells[col].wc; + char32_t UNUSED last = base; + + /* Is base cell already a cluster? */ + const struct composed *composed = + (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI) + ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO) + : NULL; + + uint32_t key; + + if (composed != NULL) { + base = composed->chars[0]; + last = composed->chars[composed->count - 1]; + key = composed_key_from_key(composed->key, wc); + } else + key = composed_key_from_key(base, wc); + +#if defined(FOOT_GRAPHEME_CLUSTERING) + if (grapheme_clustering) { + /* Check if we're on a grapheme cluster break */ + if (utf8proc_grapheme_break_stateful( + last, wc, &term->vt.grapheme_state)) + { + term_reset_grapheme_state(term); + goto out; + } + } +#endif + + int base_width = c32width(base); + if (base_width > 0) { + term->grid->cursor.point.col = col; + term->grid->cursor.lcf = false; + insert_mode_disable = true; + + if (composed == NULL) { + bool base_from_primary; + bool comb_from_primary; + bool pre_from_primary; + + char32_t precomposed = term->fonts[0] != NULL + ? fcft_precompose( + term->fonts[0], base, wc, &base_from_primary, + &comb_from_primary, &pre_from_primary) + : (char32_t)-1; + + int precomposed_width = c32width(precomposed); + + /* + * Only use the pre-composed character if: + * + * 1. we *have* a pre-composed character + * 2. the width matches the base characters width + * 3. it's in the primary font, OR one of the base or + * combining characters are *not* from the primary + * font + */ + + if (precomposed != (char32_t)-1 && + precomposed_width == base_width && + (pre_from_primary || + !base_from_primary || + !comb_from_primary)) + { + wc = precomposed; + width = precomposed_width; + term_reset_grapheme_state(term); + goto out; + } + } + + size_t wanted_count = composed != NULL ? composed->count + 1 : 2; + if (wanted_count > 255) { + xassert(composed != NULL); + +#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG + LOG_WARN("combining character overflow:"); + LOG_WARN(" base: 0x%04x", composed->chars[0]); + for (size_t i = 1; i < composed->count; i++) + LOG_WARN(" cc: 0x%04x", composed->chars[i]); + LOG_ERR(" new: 0x%04x", wc); +#endif + /* This is going to break anyway... */ + wanted_count--; + } + + xassert(wanted_count <= 255); + + /* Check if we already have a match for the entire compose chain */ + const struct composed *cc = + composed_lookup_without_collision( + term->composed, &key, + composed != NULL ? composed->chars : &(char32_t){base}, + composed != NULL ? composed->count : 1, + wc, 0); + + if (cc != NULL) { + /* We *do* have a match! */ + wc = CELL_COMB_CHARS_LO + cc->key; + width = cc->width; + goto out; + } else { + /* No match - allocate a new chain below */ + } + + if (unlikely(term->composed_count >= + (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO))) + { + /* We reached our maximum number of allowed composed + * character chains. Fall through here and print the + * current zero-width character to the current cell */ + LOG_WARN("maximum number of composed characters reached"); + term_reset_grapheme_state(term); + goto out; + } + + /* Allocate new chain */ + struct composed *new_cc = xmalloc(sizeof(*new_cc)); + new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0])); + new_cc->key = key; + new_cc->count = wanted_count; + new_cc->chars[0] = base; + new_cc->chars[wanted_count - 1] = wc; + new_cc->forced_width = 0; + + if (composed != NULL) { + memcpy(&new_cc->chars[1], &composed->chars[1], + (wanted_count - 2) * sizeof(new_cc->chars[0])); + } + + const int grapheme_width = + composed != NULL ? composed->width : base_width; + + switch (term->conf->tweak.grapheme_width_method) { + case GRAPHEME_WIDTH_MAX: + new_cc->width = max(grapheme_width, width); + break; + + case GRAPHEME_WIDTH_DOUBLE: + new_cc->width = min(grapheme_width + width, 2); + +#if defined(FOOT_GRAPHEME_CLUSTERING) + /* Handle VS-15 and VS-16 variation selectors */ + if (unlikely(grapheme_clustering && + (wc == 0xfe0e || wc == 0xfe0f) && + new_cc->count == 2)) + { + const struct emoji_vs *vs = + bsearch( + &(struct emoji_vs){.start = new_cc->chars[0]}, + emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]), + sizeof(struct emoji_vs), + &emoji_vs_compare); + + if (vs != NULL) { + xassert(new_cc->chars[0] >= vs->start && + new_cc->chars[0] <= vs->end); + + /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */ + if (wc == 0xfe0e) { + if (vs->vs15) + new_cc->width = 1; + } else if (wc == 0xfe0f) { + if (vs->vs16) + new_cc->width = 2; + } + } + } +#endif + + break; + + case GRAPHEME_WIDTH_WCSWIDTH: + new_cc->width = grapheme_width + width; + break; + } + + term->composed_count++; + composed_insert(&term->composed, new_cc); + + wc = CELL_COMB_CHARS_LO + new_cc->key; + width = new_cc->width; + + xassert(wc >= CELL_COMB_CHARS_LO); + xassert(wc <= CELL_COMB_CHARS_HI); + goto out; + } + } else + term_reset_grapheme_state(term); + + +out: + if (width > 0) + term_print(term, wc, width, insert_mode_disable); +} + enum term_surface term_surface_kind(const struct terminal *term, const struct wl_surface *surface) { diff --git a/terminal.h b/terminal.h index d8e7cf94..a69a8d0f 100644 --- a/terminal.h +++ b/terminal.h @@ -894,6 +894,7 @@ void term_cursor_up(struct terminal *term, int count); void term_cursor_down(struct terminal *term, int count); void term_cursor_blink_update(struct terminal *term); +void term_process_and_print_non_ascii(struct terminal *term, char32_t wc); void term_print(struct terminal *term, char32_t wc, int width, bool insert_mode_disable); void term_fill(struct terminal *term, int row, int col, uint8_t c, size_t count, diff --git a/vt.c b/vt.c index 5447493a..9c758c55 100644 --- a/vt.c +++ b/vt.c @@ -16,7 +16,6 @@ #include "csi.h" #include "dcs.h" #include "debug.h" -#include "emoji-variation-sequences.h" #include "osc.h" #include "sixel.h" #include "util.h" @@ -647,258 +646,10 @@ action_put(struct terminal *term, uint8_t c) dcs_put(term, c); } -#if defined(FOOT_GRAPHEME_CLUSTERING) -static int -emoji_vs_compare(const void *_key, const void *_entry) -{ - const struct emoji_vs *key = _key; - const struct emoji_vs *entry = _entry; - - uint32_t cp = key->start; - - if (cp < entry->start) - return -1; - else if (cp > entry->end) - return 1; - else - return 0; -} - -UNITTEST -{ - /* Verify the emoji_vs list is sorted */ - int64_t last_end = -1; - - for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) { - const struct emoji_vs *vs = &emoji_vs[i]; - xassert(vs->start <= vs->end); - xassert(vs->start > last_end); - xassert(vs->vs15 || vs->vs16); - last_end = vs->end; - } -} -#endif - static void action_utf8_print(struct terminal *term, char32_t wc) { - int width = c32width(wc); - bool insert_mode_disable = false; - const bool grapheme_clustering = term->grapheme_shaping; - -#if !defined(FOOT_GRAPHEME_CLUSTERING) - xassert(!grapheme_clustering); -#endif - - if (term->grid->cursor.point.col > 0 && - (grapheme_clustering || - (!grapheme_clustering && width == 0 && wc >= 0x300))) - { - int col = term->grid->cursor.point.col; - if (!term->grid->cursor.lcf) - col--; - - /* Skip past spacers */ - struct row *row = term->grid->cur_row; - while (row->cells[col].wc >= CELL_SPACER && col > 0) - col--; - - xassert(col >= 0 && col < term->cols); - char32_t base = row->cells[col].wc; - char32_t UNUSED last = base; - - /* Is base cell already a cluster? */ - const struct composed *composed = - (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI) - ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO) - : NULL; - - uint32_t key; - - if (composed != NULL) { - base = composed->chars[0]; - last = composed->chars[composed->count - 1]; - key = composed_key_from_key(composed->key, wc); - } else - key = composed_key_from_key(base, wc); - -#if defined(FOOT_GRAPHEME_CLUSTERING) - if (grapheme_clustering) { - /* Check if we're on a grapheme cluster break */ - if (utf8proc_grapheme_break_stateful( - last, wc, &term->vt.grapheme_state)) - { - term_reset_grapheme_state(term); - goto out; - } - } -#endif - - int base_width = c32width(base); - if (base_width > 0) { - term->grid->cursor.point.col = col; - term->grid->cursor.lcf = false; - insert_mode_disable = true; - - if (composed == NULL) { - bool base_from_primary; - bool comb_from_primary; - bool pre_from_primary; - - char32_t precomposed = term->fonts[0] != NULL - ? fcft_precompose( - term->fonts[0], base, wc, &base_from_primary, - &comb_from_primary, &pre_from_primary) - : (char32_t)-1; - - int precomposed_width = c32width(precomposed); - - /* - * Only use the pre-composed character if: - * - * 1. we *have* a pre-composed character - * 2. the width matches the base characters width - * 3. it's in the primary font, OR one of the base or - * combining characters are *not* from the primary - * font - */ - - if (precomposed != (char32_t)-1 && - precomposed_width == base_width && - (pre_from_primary || - !base_from_primary || - !comb_from_primary)) - { - wc = precomposed; - width = precomposed_width; - term_reset_grapheme_state(term); - goto out; - } - } - - size_t wanted_count = composed != NULL ? composed->count + 1 : 2; - if (wanted_count > 255) { - xassert(composed != NULL); - -#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG - LOG_WARN("combining character overflow:"); - LOG_WARN(" base: 0x%04x", composed->chars[0]); - for (size_t i = 1; i < composed->count; i++) - LOG_WARN(" cc: 0x%04x", composed->chars[i]); - LOG_ERR(" new: 0x%04x", wc); -#endif - /* This is going to break anyway... */ - wanted_count--; - } - - xassert(wanted_count <= 255); - - /* Check if we already have a match for the entire compose chain */ - const struct composed *cc = - composed_lookup_without_collision( - term->composed, &key, - composed != NULL ? composed->chars : &(char32_t){base}, - composed != NULL ? composed->count : 1, - wc, 0); - - if (cc != NULL) { - /* We *do* have a match! */ - wc = CELL_COMB_CHARS_LO + cc->key; - width = cc->width; - goto out; - } else { - /* No match - allocate a new chain below */ - } - - if (unlikely(term->composed_count >= - (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO))) - { - /* We reached our maximum number of allowed composed - * character chains. Fall through here and print the - * current zero-width character to the current cell */ - LOG_WARN("maximum number of composed characters reached"); - term_reset_grapheme_state(term); - goto out; - } - - /* Allocate new chain */ - struct composed *new_cc = xmalloc(sizeof(*new_cc)); - new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0])); - new_cc->key = key; - new_cc->count = wanted_count; - new_cc->chars[0] = base; - new_cc->chars[wanted_count - 1] = wc; - new_cc->forced_width = 0; - - if (composed != NULL) { - memcpy(&new_cc->chars[1], &composed->chars[1], - (wanted_count - 2) * sizeof(new_cc->chars[0])); - } - - const int grapheme_width = - composed != NULL ? composed->width : base_width; - - switch (term->conf->tweak.grapheme_width_method) { - case GRAPHEME_WIDTH_MAX: - new_cc->width = max(grapheme_width, width); - break; - - case GRAPHEME_WIDTH_DOUBLE: - new_cc->width = min(grapheme_width + width, 2); - -#if defined(FOOT_GRAPHEME_CLUSTERING) - /* Handle VS-15 and VS-16 variation selectors */ - if (unlikely(grapheme_clustering && - (wc == 0xfe0e || wc == 0xfe0f) && - new_cc->count == 2)) - { - const struct emoji_vs *vs = - bsearch( - &(struct emoji_vs){.start = new_cc->chars[0]}, - emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]), - sizeof(struct emoji_vs), - &emoji_vs_compare); - - if (vs != NULL) { - xassert(new_cc->chars[0] >= vs->start && - new_cc->chars[0] <= vs->end); - - /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */ - if (wc == 0xfe0e) { - if (vs->vs15) - new_cc->width = 1; - } else if (wc == 0xfe0f) { - if (vs->vs16) - new_cc->width = 2; - } - } - } -#endif - - break; - - case GRAPHEME_WIDTH_WCSWIDTH: - new_cc->width = grapheme_width + width; - break; - } - - term->composed_count++; - composed_insert(&term->composed, new_cc); - - wc = CELL_COMB_CHARS_LO + new_cc->key; - width = new_cc->width; - - xassert(wc >= CELL_COMB_CHARS_LO); - xassert(wc <= CELL_COMB_CHARS_HI); - goto out; - } - } else - term_reset_grapheme_state(term); - - -out: - if (width > 0) - term_print(term, wc, width, insert_mode_disable); + term_process_and_print_non_ascii(term, wc); } static void From 1260004330359619113aac3cb3ea1a7fe2fddb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sun, 26 Jan 2025 07:36:11 +0100 Subject: [PATCH 06/13] osc: text-sizing: implement w=0, plus optimize single-codepoint cases If there's a single codepoint in the text portion of the OSC sequence, and its calculated width matches the forced width, print it directly to the grid instead of emitting a combining character. When w=0, we split up the text string "as we normally would". Since we don't support any other text-sizing parameters, this means simply printing each codepoint to the grid. --- osc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/osc.c b/osc.c index 49bdba67..f6398165 100644 --- a/osc.c +++ b/osc.c @@ -1149,7 +1149,7 @@ kitty_text_size(struct terminal *term, char *string) if (wchars == NULL) return; - int width = 0; + int forced_width = 0; char *ctx = NULL; for (char *param = strtok_r(parameters, ":", &ctx); @@ -1170,7 +1170,7 @@ kitty_text_size(struct terminal *term, char *string) unsigned long w = strtoul(value, &end, 10); if (*end == '\0' && errno == 0 && w <= 7) { - width = (int)w; + forced_width = (int)w; break; } else LOG_ERR("OSC-66: invalid 'w' value, ignoring"); @@ -1187,10 +1187,57 @@ kitty_text_size(struct terminal *term, char *string) } const size_t len = c32len(wchars); + + if (forced_width == 0) { + /* + * w=0 means we split the text up as we'd normally do... Since + * we don't support any other parameters of the text-sizing + * protocol, that means we just process the string as if it + * has been printed without this OSC. + */ + for (size_t i = 0; i < len; i++) + term_process_and_print_non_ascii(term, wchars[i]); + free(wchars); + return; + } + + size_t max_cp_width = 0; + size_t all_cp_width = 0; + + for (size_t i = 0; i < len; i++) { + const size_t cp_width = c32width(wchars[i]); + all_cp_width += cp_width; + max_cp_width = max(max_cp_width, cp_width); + } + + size_t calculated_width = 0; + switch (term->conf->tweak.grapheme_width_method) { + case GRAPHEME_WIDTH_WCSWIDTH: calculated_width = all_cp_width; break; + case GRAPHEME_WIDTH_MAX: calculated_width = max_cp_width; break; + case GRAPHEME_WIDTH_DOUBLE: calculated_width = min(max_cp_width, 2); break; + } + + const size_t width = forced_width == 0 ? calculated_width : forced_width; + + LOG_DBG("len=%zu, forced=%d, calculated=%zu, using=%zu", + len, forced_width, calculated_width, width); + + if (len == 1 && calculated_width == forced_width) { + /* + * Optimization: if there's a single codepoint, and either + * w=0, or the 'w' matches the calculated width, print + * codepoint directly instead of creating a combining + * character. + */ + term_print(term, wchars[0], width); + free(wchars); + return; + } + uint32_t key = composed_key_from_chars(wchars, len); const struct composed *composed = composed_lookup_without_collision( - term->composed, &key, wchars, len - 1, wchars[len - 1], width); + term->composed, &key, wchars, len - 1, wchars[len - 1], forced_width); if (composed == NULL) { struct composed *new_cc = xmalloc(sizeof(*new_cc)); @@ -1198,7 +1245,7 @@ kitty_text_size(struct terminal *term, char *string) new_cc->count = len; new_cc->key = key; new_cc->width = width; - new_cc->forced_width = width; + new_cc->forced_width = forced_width; term->composed_count++; composed_insert(&term->composed, new_cc); From 3998f8570caaf7d92cecc3b29b76c3351087930b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 27 Jan 2025 07:35:10 +0100 Subject: [PATCH 07/13] composed: codespell: infinitely --- composed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composed.c b/composed.c index 2d9ed47d..fc7dfa00 100644 --- a/composed.c +++ b/composed.c @@ -95,7 +95,7 @@ composed_lookup_without_collision(struct composed *root, uint32_t *key, (*key)++; *key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; - /* TODO: this will loop infinitly if the composed table is full */ + /* TODO: this will loop infinitely if the composed table is full */ } return NULL; From ed35a238d62473d77729748ca6d39ab3f3d42602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 27 Jan 2025 10:12:26 +0100 Subject: [PATCH 08/13] doc: ctlseq: add OSC 66 (kitty text sizing) --- doc/foot-ctlseqs.7.scd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/foot-ctlseqs.7.scd b/doc/foot-ctlseqs.7.scd index f8eb1222..6c702738 100644 --- a/doc/foot-ctlseqs.7.scd +++ b/doc/foot-ctlseqs.7.scd @@ -729,7 +729,10 @@ All _OSC_ sequences begin with *\\E]*, sometimes abbreviated _OSC_. : Copy _Pd_ (base64 encoded text) to the clipboard. _Pc_ denotes the target: *c* targets the clipboard and *s* and *p* the primary selection. -| \\E] 99 ; _params_ ; _payload_ \\E\\ +| \\E] 66 ; _params_ ; text \\E\\ +: kitty +: Text sizing protocol (only 'w', width, supported) +| \\E] 99 ; _params_ ; _payload_ \\E\\ : kitty : Desktop notification; uses *desktop-notifications.command* in *foot.ini*(5). From 0f93766614bde055ce61e1b7ffc8f6b5aeef91d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 3 Feb 2025 15:30:00 +0100 Subject: [PATCH 09/13] osc: text-size: disable optimization The optimization prevents the forced-width to be set on the new combining character, causing issues when followed by more zero-width codepoints. --- osc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/osc.c b/osc.c index f6398165..eaf6e33e 100644 --- a/osc.c +++ b/osc.c @@ -1222,6 +1222,7 @@ kitty_text_size(struct terminal *term, char *string) LOG_DBG("len=%zu, forced=%d, calculated=%zu, using=%zu", len, forced_width, calculated_width, width); +#if 0 if (len == 1 && calculated_width == forced_width) { /* * Optimization: if there's a single codepoint, and either @@ -1233,6 +1234,7 @@ kitty_text_size(struct terminal *term, char *string) free(wchars); return; } +#endif uint32_t key = composed_key_from_chars(wchars, len); From 98402040977435b0029dbe4952c083e65eb8ef69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 3 Feb 2025 15:31:03 +0100 Subject: [PATCH 10/13] term: print-non-ascii: propagate existing forced-width When appending to an existing composed character, "inherit" its forced width, if set. Also make sure to actually _use_ the forced width, if set, rather than the calculated width. This fixes an issue when appending zero-width codepoints to a forced-width combining character. --- terminal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terminal.c b/terminal.c index 96a215ba..c8e49663 100644 --- a/terminal.c +++ b/terminal.c @@ -4255,7 +4255,7 @@ term_process_and_print_non_ascii(struct terminal *term, char32_t wc) new_cc->count = wanted_count; new_cc->chars[0] = base; new_cc->chars[wanted_count - 1] = wc; - new_cc->forced_width = 0; + new_cc->forced_width = composed != NULL ? composed->forced_width : 0; if (composed != NULL) { memcpy(&new_cc->chars[1], &composed->chars[1], @@ -4313,7 +4313,7 @@ term_process_and_print_non_ascii(struct terminal *term, char32_t wc) composed_insert(&term->composed, new_cc); wc = CELL_COMB_CHARS_LO + new_cc->key; - width = new_cc->width; + width = new_cc->forced_width > 0 ? new_cc->forced_width : new_cc->width; xassert(wc >= CELL_COMB_CHARS_LO); xassert(wc <= CELL_COMB_CHARS_HI); From d7e8f29ee24365a5aeeca4460afa228a32308e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Wed, 5 Feb 2025 11:36:53 +0100 Subject: [PATCH 11/13] grid: reflow: get number of spacers to insert from the old grid When checking if we're breaking in the middle of a multi-column character, we counted spacers starting from the break point. But, the character may be wider than that. Use the fact that the spacers cells encode how many *more* there are after them; when we get to the first one, we know exactly how wide the character is. --- grid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grid.c b/grid.c index eb24869d..2dc4fcd5 100644 --- a/grid.c +++ b/grid.c @@ -1056,8 +1056,8 @@ grid_resize_and_reflow( from + amount < old_cols && old_row->cells[from + amount].wc >= CELL_SPACER + 1)) { + spacers = old_row->cells[from + amount].wc - CELL_SPACER + 1; amount--; - spacers++; } xassert( From a3a404a2570b72636da0583492d9cf87b700ef6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Wed, 5 Feb 2025 11:38:29 +0100 Subject: [PATCH 12/13] render: resize: note why min_cols=7 --- render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/render.c b/render.c index 13e9d708..9ff9c681 100644 --- a/render.c +++ b/render.c @@ -4409,7 +4409,7 @@ render_resize(struct terminal *term, int width, int height, uint8_t opts) } /* Don't shrink grid too much */ - const int min_cols = 7; + const int min_cols = 7; /* See OSC-66 */ const int min_rows = 1; /* Minimum window size (must be divisible by the scaling factor)*/ From 8d20b82721ac95cea89a3ec87c8ec32e00f224e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Thu, 6 Feb 2025 14:02:04 +0100 Subject: [PATCH 13/13] changelog: text-sizing protocol --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9b89bf3..c707d5a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,8 +65,9 @@ * Support for the new Wayland protocol `xdg-system-bell-v1` protocol (added in wayland-protocols 1.38), via the new config option `bell.system=no|yes` (defaults to `yes`). -* Added support for custom regex matching ([#1386][1386], +* Support for custom regex matching ([#1386][1386], [#1872][1872]) +* Support for kitty's text-sizing protocol (`w`, width, only), OSC-66. [1386]: https://codeberg.org/dnkl/foot/issues/1386 [1872]: https://codeberg.org/dnkl/foot/issues/1872