From d3f692990ef66f550bb3a0ade2e84107cfbeca47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sun, 26 Jan 2025 07:33:53 +0100 Subject: [PATCH] term+vt: refactor: move "utf8" char processing to term_process_and_print_non_ascii() This function "prints" any non-ascii character (i.e. any character that ends up in the action_utf8_print() function in vt.c) to the grid. This includes grapheme cluster processing etc. action_utf8_print() now simply calls this function. This allows us to re-use the same functionality from other places (like the text-sizing protocol). --- osc.c | 5 +- terminal.c | 255 +++++++++++++++++++++++++++++++++++++++++++++++++++++ terminal.h | 1 + vt.c | 251 +--------------------------------------------------- 4 files changed, 261 insertions(+), 251 deletions(-) diff --git a/osc.c b/osc.c index 6d8bb40c..49bdba67 100644 --- a/osc.c +++ b/osc.c @@ -1207,7 +1207,10 @@ kitty_text_size(struct terminal *term, char *string) free(wchars); } - term_print(term, CELL_COMB_CHARS_LO + composed->key, composed->forced_width > 0 ? composed->forced_width : composed->width); + term_print( + term, CELL_COMB_CHARS_LO + composed->key, + composed->forced_width > 0 ? composed->forced_width : composed->width, + false); } void diff --git a/terminal.c b/terminal.c index bf70a37e..96a215ba 100644 --- a/terminal.c +++ b/terminal.c @@ -27,6 +27,7 @@ #include "commands.h" #include "config.h" #include "debug.h" +#include "emoji-variation-sequences.h" #include "extract.h" #include "grid.h" #include "ime.h" @@ -4073,6 +4074,260 @@ term_single_shift(struct terminal *term, enum charset_designator idx) term->ascii_printer = &ascii_printer_single_shift; } +#if defined(FOOT_GRAPHEME_CLUSTERING) +static int +emoji_vs_compare(const void *_key, const void *_entry) +{ + const struct emoji_vs *key = _key; + const struct emoji_vs *entry = _entry; + + uint32_t cp = key->start; + + if (cp < entry->start) + return -1; + else if (cp > entry->end) + return 1; + else + return 0; +} + +UNITTEST +{ + /* Verify the emoji_vs list is sorted */ + int64_t last_end = -1; + + for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) { + const struct emoji_vs *vs = &emoji_vs[i]; + xassert(vs->start <= vs->end); + xassert(vs->start > last_end); + xassert(vs->vs15 || vs->vs16); + last_end = vs->end; + } +} +#endif + +void +term_process_and_print_non_ascii(struct terminal *term, char32_t wc) +{ + int width = c32width(wc); + bool insert_mode_disable = false; + const bool grapheme_clustering = term->grapheme_shaping; + +#if !defined(FOOT_GRAPHEME_CLUSTERING) + xassert(!grapheme_clustering); +#endif + + if (term->grid->cursor.point.col > 0 && + (grapheme_clustering || + (!grapheme_clustering && width == 0 && wc >= 0x300))) + { + int col = term->grid->cursor.point.col; + if (!term->grid->cursor.lcf) + col--; + + /* Skip past spacers */ + struct row *row = term->grid->cur_row; + while (row->cells[col].wc >= CELL_SPACER && col > 0) + col--; + + xassert(col >= 0 && col < term->cols); + char32_t base = row->cells[col].wc; + char32_t UNUSED last = base; + + /* Is base cell already a cluster? */ + const struct composed *composed = + (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI) + ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO) + : NULL; + + uint32_t key; + + if (composed != NULL) { + base = composed->chars[0]; + last = composed->chars[composed->count - 1]; + key = composed_key_from_key(composed->key, wc); + } else + key = composed_key_from_key(base, wc); + +#if defined(FOOT_GRAPHEME_CLUSTERING) + if (grapheme_clustering) { + /* Check if we're on a grapheme cluster break */ + if (utf8proc_grapheme_break_stateful( + last, wc, &term->vt.grapheme_state)) + { + term_reset_grapheme_state(term); + goto out; + } + } +#endif + + int base_width = c32width(base); + if (base_width > 0) { + term->grid->cursor.point.col = col; + term->grid->cursor.lcf = false; + insert_mode_disable = true; + + if (composed == NULL) { + bool base_from_primary; + bool comb_from_primary; + bool pre_from_primary; + + char32_t precomposed = term->fonts[0] != NULL + ? fcft_precompose( + term->fonts[0], base, wc, &base_from_primary, + &comb_from_primary, &pre_from_primary) + : (char32_t)-1; + + int precomposed_width = c32width(precomposed); + + /* + * Only use the pre-composed character if: + * + * 1. we *have* a pre-composed character + * 2. the width matches the base characters width + * 3. it's in the primary font, OR one of the base or + * combining characters are *not* from the primary + * font + */ + + if (precomposed != (char32_t)-1 && + precomposed_width == base_width && + (pre_from_primary || + !base_from_primary || + !comb_from_primary)) + { + wc = precomposed; + width = precomposed_width; + term_reset_grapheme_state(term); + goto out; + } + } + + size_t wanted_count = composed != NULL ? composed->count + 1 : 2; + if (wanted_count > 255) { + xassert(composed != NULL); + +#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG + LOG_WARN("combining character overflow:"); + LOG_WARN(" base: 0x%04x", composed->chars[0]); + for (size_t i = 1; i < composed->count; i++) + LOG_WARN(" cc: 0x%04x", composed->chars[i]); + LOG_ERR(" new: 0x%04x", wc); +#endif + /* This is going to break anyway... */ + wanted_count--; + } + + xassert(wanted_count <= 255); + + /* Check if we already have a match for the entire compose chain */ + const struct composed *cc = + composed_lookup_without_collision( + term->composed, &key, + composed != NULL ? composed->chars : &(char32_t){base}, + composed != NULL ? composed->count : 1, + wc, 0); + + if (cc != NULL) { + /* We *do* have a match! */ + wc = CELL_COMB_CHARS_LO + cc->key; + width = cc->width; + goto out; + } else { + /* No match - allocate a new chain below */ + } + + if (unlikely(term->composed_count >= + (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO))) + { + /* We reached our maximum number of allowed composed + * character chains. Fall through here and print the + * current zero-width character to the current cell */ + LOG_WARN("maximum number of composed characters reached"); + term_reset_grapheme_state(term); + goto out; + } + + /* Allocate new chain */ + struct composed *new_cc = xmalloc(sizeof(*new_cc)); + new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0])); + new_cc->key = key; + new_cc->count = wanted_count; + new_cc->chars[0] = base; + new_cc->chars[wanted_count - 1] = wc; + new_cc->forced_width = 0; + + if (composed != NULL) { + memcpy(&new_cc->chars[1], &composed->chars[1], + (wanted_count - 2) * sizeof(new_cc->chars[0])); + } + + const int grapheme_width = + composed != NULL ? composed->width : base_width; + + switch (term->conf->tweak.grapheme_width_method) { + case GRAPHEME_WIDTH_MAX: + new_cc->width = max(grapheme_width, width); + break; + + case GRAPHEME_WIDTH_DOUBLE: + new_cc->width = min(grapheme_width + width, 2); + +#if defined(FOOT_GRAPHEME_CLUSTERING) + /* Handle VS-15 and VS-16 variation selectors */ + if (unlikely(grapheme_clustering && + (wc == 0xfe0e || wc == 0xfe0f) && + new_cc->count == 2)) + { + const struct emoji_vs *vs = + bsearch( + &(struct emoji_vs){.start = new_cc->chars[0]}, + emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]), + sizeof(struct emoji_vs), + &emoji_vs_compare); + + if (vs != NULL) { + xassert(new_cc->chars[0] >= vs->start && + new_cc->chars[0] <= vs->end); + + /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */ + if (wc == 0xfe0e) { + if (vs->vs15) + new_cc->width = 1; + } else if (wc == 0xfe0f) { + if (vs->vs16) + new_cc->width = 2; + } + } + } +#endif + + break; + + case GRAPHEME_WIDTH_WCSWIDTH: + new_cc->width = grapheme_width + width; + break; + } + + term->composed_count++; + composed_insert(&term->composed, new_cc); + + wc = CELL_COMB_CHARS_LO + new_cc->key; + width = new_cc->width; + + xassert(wc >= CELL_COMB_CHARS_LO); + xassert(wc <= CELL_COMB_CHARS_HI); + goto out; + } + } else + term_reset_grapheme_state(term); + + +out: + if (width > 0) + term_print(term, wc, width, insert_mode_disable); +} + enum term_surface term_surface_kind(const struct terminal *term, const struct wl_surface *surface) { diff --git a/terminal.h b/terminal.h index d8e7cf94..a69a8d0f 100644 --- a/terminal.h +++ b/terminal.h @@ -894,6 +894,7 @@ void term_cursor_up(struct terminal *term, int count); void term_cursor_down(struct terminal *term, int count); void term_cursor_blink_update(struct terminal *term); +void term_process_and_print_non_ascii(struct terminal *term, char32_t wc); void term_print(struct terminal *term, char32_t wc, int width, bool insert_mode_disable); void term_fill(struct terminal *term, int row, int col, uint8_t c, size_t count, diff --git a/vt.c b/vt.c index 5447493a..9c758c55 100644 --- a/vt.c +++ b/vt.c @@ -16,7 +16,6 @@ #include "csi.h" #include "dcs.h" #include "debug.h" -#include "emoji-variation-sequences.h" #include "osc.h" #include "sixel.h" #include "util.h" @@ -647,258 +646,10 @@ action_put(struct terminal *term, uint8_t c) dcs_put(term, c); } -#if defined(FOOT_GRAPHEME_CLUSTERING) -static int -emoji_vs_compare(const void *_key, const void *_entry) -{ - const struct emoji_vs *key = _key; - const struct emoji_vs *entry = _entry; - - uint32_t cp = key->start; - - if (cp < entry->start) - return -1; - else if (cp > entry->end) - return 1; - else - return 0; -} - -UNITTEST -{ - /* Verify the emoji_vs list is sorted */ - int64_t last_end = -1; - - for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) { - const struct emoji_vs *vs = &emoji_vs[i]; - xassert(vs->start <= vs->end); - xassert(vs->start > last_end); - xassert(vs->vs15 || vs->vs16); - last_end = vs->end; - } -} -#endif - static void action_utf8_print(struct terminal *term, char32_t wc) { - int width = c32width(wc); - bool insert_mode_disable = false; - const bool grapheme_clustering = term->grapheme_shaping; - -#if !defined(FOOT_GRAPHEME_CLUSTERING) - xassert(!grapheme_clustering); -#endif - - if (term->grid->cursor.point.col > 0 && - (grapheme_clustering || - (!grapheme_clustering && width == 0 && wc >= 0x300))) - { - int col = term->grid->cursor.point.col; - if (!term->grid->cursor.lcf) - col--; - - /* Skip past spacers */ - struct row *row = term->grid->cur_row; - while (row->cells[col].wc >= CELL_SPACER && col > 0) - col--; - - xassert(col >= 0 && col < term->cols); - char32_t base = row->cells[col].wc; - char32_t UNUSED last = base; - - /* Is base cell already a cluster? */ - const struct composed *composed = - (base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI) - ? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO) - : NULL; - - uint32_t key; - - if (composed != NULL) { - base = composed->chars[0]; - last = composed->chars[composed->count - 1]; - key = composed_key_from_key(composed->key, wc); - } else - key = composed_key_from_key(base, wc); - -#if defined(FOOT_GRAPHEME_CLUSTERING) - if (grapheme_clustering) { - /* Check if we're on a grapheme cluster break */ - if (utf8proc_grapheme_break_stateful( - last, wc, &term->vt.grapheme_state)) - { - term_reset_grapheme_state(term); - goto out; - } - } -#endif - - int base_width = c32width(base); - if (base_width > 0) { - term->grid->cursor.point.col = col; - term->grid->cursor.lcf = false; - insert_mode_disable = true; - - if (composed == NULL) { - bool base_from_primary; - bool comb_from_primary; - bool pre_from_primary; - - char32_t precomposed = term->fonts[0] != NULL - ? fcft_precompose( - term->fonts[0], base, wc, &base_from_primary, - &comb_from_primary, &pre_from_primary) - : (char32_t)-1; - - int precomposed_width = c32width(precomposed); - - /* - * Only use the pre-composed character if: - * - * 1. we *have* a pre-composed character - * 2. the width matches the base characters width - * 3. it's in the primary font, OR one of the base or - * combining characters are *not* from the primary - * font - */ - - if (precomposed != (char32_t)-1 && - precomposed_width == base_width && - (pre_from_primary || - !base_from_primary || - !comb_from_primary)) - { - wc = precomposed; - width = precomposed_width; - term_reset_grapheme_state(term); - goto out; - } - } - - size_t wanted_count = composed != NULL ? composed->count + 1 : 2; - if (wanted_count > 255) { - xassert(composed != NULL); - -#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG - LOG_WARN("combining character overflow:"); - LOG_WARN(" base: 0x%04x", composed->chars[0]); - for (size_t i = 1; i < composed->count; i++) - LOG_WARN(" cc: 0x%04x", composed->chars[i]); - LOG_ERR(" new: 0x%04x", wc); -#endif - /* This is going to break anyway... */ - wanted_count--; - } - - xassert(wanted_count <= 255); - - /* Check if we already have a match for the entire compose chain */ - const struct composed *cc = - composed_lookup_without_collision( - term->composed, &key, - composed != NULL ? composed->chars : &(char32_t){base}, - composed != NULL ? composed->count : 1, - wc, 0); - - if (cc != NULL) { - /* We *do* have a match! */ - wc = CELL_COMB_CHARS_LO + cc->key; - width = cc->width; - goto out; - } else { - /* No match - allocate a new chain below */ - } - - if (unlikely(term->composed_count >= - (CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO))) - { - /* We reached our maximum number of allowed composed - * character chains. Fall through here and print the - * current zero-width character to the current cell */ - LOG_WARN("maximum number of composed characters reached"); - term_reset_grapheme_state(term); - goto out; - } - - /* Allocate new chain */ - struct composed *new_cc = xmalloc(sizeof(*new_cc)); - new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0])); - new_cc->key = key; - new_cc->count = wanted_count; - new_cc->chars[0] = base; - new_cc->chars[wanted_count - 1] = wc; - new_cc->forced_width = 0; - - if (composed != NULL) { - memcpy(&new_cc->chars[1], &composed->chars[1], - (wanted_count - 2) * sizeof(new_cc->chars[0])); - } - - const int grapheme_width = - composed != NULL ? composed->width : base_width; - - switch (term->conf->tweak.grapheme_width_method) { - case GRAPHEME_WIDTH_MAX: - new_cc->width = max(grapheme_width, width); - break; - - case GRAPHEME_WIDTH_DOUBLE: - new_cc->width = min(grapheme_width + width, 2); - -#if defined(FOOT_GRAPHEME_CLUSTERING) - /* Handle VS-15 and VS-16 variation selectors */ - if (unlikely(grapheme_clustering && - (wc == 0xfe0e || wc == 0xfe0f) && - new_cc->count == 2)) - { - const struct emoji_vs *vs = - bsearch( - &(struct emoji_vs){.start = new_cc->chars[0]}, - emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]), - sizeof(struct emoji_vs), - &emoji_vs_compare); - - if (vs != NULL) { - xassert(new_cc->chars[0] >= vs->start && - new_cc->chars[0] <= vs->end); - - /* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */ - if (wc == 0xfe0e) { - if (vs->vs15) - new_cc->width = 1; - } else if (wc == 0xfe0f) { - if (vs->vs16) - new_cc->width = 2; - } - } - } -#endif - - break; - - case GRAPHEME_WIDTH_WCSWIDTH: - new_cc->width = grapheme_width + width; - break; - } - - term->composed_count++; - composed_insert(&term->composed, new_cc); - - wc = CELL_COMB_CHARS_LO + new_cc->key; - width = new_cc->width; - - xassert(wc >= CELL_COMB_CHARS_LO); - xassert(wc <= CELL_COMB_CHARS_HI); - goto out; - } - } else - term_reset_grapheme_state(term); - - -out: - if (width > 0) - term_print(term, wc, width, insert_mode_disable); + term_process_and_print_non_ascii(term, wc); } static void