term+vt: refactor: move "utf8" char processing to term_process_and_print_non_ascii()

This function "prints" any non-ascii character (i.e. any character
that ends up in the action_utf8_print() function in vt.c) to the
grid. This includes grapheme cluster processing etc.

action_utf8_print() now simply calls this function.

This allows us to re-use the same functionality from other
places (like the text-sizing protocol).
This commit is contained in:
Daniel Eklöf 2025-01-26 07:33:53 +01:00
parent 7a8d2b5e01
commit d3f692990e
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
4 changed files with 261 additions and 251 deletions

251
vt.c
View file

@ -16,7 +16,6 @@
#include "csi.h"
#include "dcs.h"
#include "debug.h"
#include "emoji-variation-sequences.h"
#include "osc.h"
#include "sixel.h"
#include "util.h"
@ -647,258 +646,10 @@ action_put(struct terminal *term, uint8_t c)
dcs_put(term, c);
}
#if defined(FOOT_GRAPHEME_CLUSTERING)
static int
emoji_vs_compare(const void *_key, const void *_entry)
{
const struct emoji_vs *key = _key;
const struct emoji_vs *entry = _entry;
uint32_t cp = key->start;
if (cp < entry->start)
return -1;
else if (cp > entry->end)
return 1;
else
return 0;
}
UNITTEST
{
/* Verify the emoji_vs list is sorted */
int64_t last_end = -1;
for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
const struct emoji_vs *vs = &emoji_vs[i];
xassert(vs->start <= vs->end);
xassert(vs->start > last_end);
xassert(vs->vs15 || vs->vs16);
last_end = vs->end;
}
}
#endif
static void
action_utf8_print(struct terminal *term, char32_t wc)
{
int width = c32width(wc);
bool insert_mode_disable = false;
const bool grapheme_clustering = term->grapheme_shaping;
#if !defined(FOOT_GRAPHEME_CLUSTERING)
xassert(!grapheme_clustering);
#endif
if (term->grid->cursor.point.col > 0 &&
(grapheme_clustering ||
(!grapheme_clustering && width == 0 && wc >= 0x300)))
{
int col = term->grid->cursor.point.col;
if (!term->grid->cursor.lcf)
col--;
/* Skip past spacers */
struct row *row = term->grid->cur_row;
while (row->cells[col].wc >= CELL_SPACER && col > 0)
col--;
xassert(col >= 0 && col < term->cols);
char32_t base = row->cells[col].wc;
char32_t UNUSED last = base;
/* Is base cell already a cluster? */
const struct composed *composed =
(base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
: NULL;
uint32_t key;
if (composed != NULL) {
base = composed->chars[0];
last = composed->chars[composed->count - 1];
key = composed_key_from_key(composed->key, wc);
} else
key = composed_key_from_key(base, wc);
#if defined(FOOT_GRAPHEME_CLUSTERING)
if (grapheme_clustering) {
/* Check if we're on a grapheme cluster break */
if (utf8proc_grapheme_break_stateful(
last, wc, &term->vt.grapheme_state))
{
term_reset_grapheme_state(term);
goto out;
}
}
#endif
int base_width = c32width(base);
if (base_width > 0) {
term->grid->cursor.point.col = col;
term->grid->cursor.lcf = false;
insert_mode_disable = true;
if (composed == NULL) {
bool base_from_primary;
bool comb_from_primary;
bool pre_from_primary;
char32_t precomposed = term->fonts[0] != NULL
? fcft_precompose(
term->fonts[0], base, wc, &base_from_primary,
&comb_from_primary, &pre_from_primary)
: (char32_t)-1;
int precomposed_width = c32width(precomposed);
/*
* Only use the pre-composed character if:
*
* 1. we *have* a pre-composed character
* 2. the width matches the base characters width
* 3. it's in the primary font, OR one of the base or
* combining characters are *not* from the primary
* font
*/
if (precomposed != (char32_t)-1 &&
precomposed_width == base_width &&
(pre_from_primary ||
!base_from_primary ||
!comb_from_primary))
{
wc = precomposed;
width = precomposed_width;
term_reset_grapheme_state(term);
goto out;
}
}
size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
if (wanted_count > 255) {
xassert(composed != NULL);
#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
LOG_WARN("combining character overflow:");
LOG_WARN(" base: 0x%04x", composed->chars[0]);
for (size_t i = 1; i < composed->count; i++)
LOG_WARN(" cc: 0x%04x", composed->chars[i]);
LOG_ERR(" new: 0x%04x", wc);
#endif
/* This is going to break anyway... */
wanted_count--;
}
xassert(wanted_count <= 255);
/* Check if we already have a match for the entire compose chain */
const struct composed *cc =
composed_lookup_without_collision(
term->composed, &key,
composed != NULL ? composed->chars : &(char32_t){base},
composed != NULL ? composed->count : 1,
wc, 0);
if (cc != NULL) {
/* We *do* have a match! */
wc = CELL_COMB_CHARS_LO + cc->key;
width = cc->width;
goto out;
} else {
/* No match - allocate a new chain below */
}
if (unlikely(term->composed_count >=
(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
{
/* We reached our maximum number of allowed composed
* character chains. Fall through here and print the
* current zero-width character to the current cell */
LOG_WARN("maximum number of composed characters reached");
term_reset_grapheme_state(term);
goto out;
}
/* Allocate new chain */
struct composed *new_cc = xmalloc(sizeof(*new_cc));
new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
new_cc->key = key;
new_cc->count = wanted_count;
new_cc->chars[0] = base;
new_cc->chars[wanted_count - 1] = wc;
new_cc->forced_width = 0;
if (composed != NULL) {
memcpy(&new_cc->chars[1], &composed->chars[1],
(wanted_count - 2) * sizeof(new_cc->chars[0]));
}
const int grapheme_width =
composed != NULL ? composed->width : base_width;
switch (term->conf->tweak.grapheme_width_method) {
case GRAPHEME_WIDTH_MAX:
new_cc->width = max(grapheme_width, width);
break;
case GRAPHEME_WIDTH_DOUBLE:
new_cc->width = min(grapheme_width + width, 2);
#if defined(FOOT_GRAPHEME_CLUSTERING)
/* Handle VS-15 and VS-16 variation selectors */
if (unlikely(grapheme_clustering &&
(wc == 0xfe0e || wc == 0xfe0f) &&
new_cc->count == 2))
{
const struct emoji_vs *vs =
bsearch(
&(struct emoji_vs){.start = new_cc->chars[0]},
emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
sizeof(struct emoji_vs),
&emoji_vs_compare);
if (vs != NULL) {
xassert(new_cc->chars[0] >= vs->start &&
new_cc->chars[0] <= vs->end);
/* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
if (wc == 0xfe0e) {
if (vs->vs15)
new_cc->width = 1;
} else if (wc == 0xfe0f) {
if (vs->vs16)
new_cc->width = 2;
}
}
}
#endif
break;
case GRAPHEME_WIDTH_WCSWIDTH:
new_cc->width = grapheme_width + width;
break;
}
term->composed_count++;
composed_insert(&term->composed, new_cc);
wc = CELL_COMB_CHARS_LO + new_cc->key;
width = new_cc->width;
xassert(wc >= CELL_COMB_CHARS_LO);
xassert(wc <= CELL_COMB_CHARS_HI);
goto out;
}
} else
term_reset_grapheme_state(term);
out:
if (width > 0)
term_print(term, wc, width, insert_mode_disable);
term_process_and_print_non_ascii(term, wc);
}
static void