mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-04 04:06:06 -05:00
term+vt: refactor: move "utf8" char processing to term_process_and_print_non_ascii()
This function "prints" any non-ascii character (i.e. any character that ends up in the action_utf8_print() function in vt.c) to the grid. This includes grapheme cluster processing etc. action_utf8_print() now simply calls this function. This allows us to re-use the same functionality from other places (like the text-sizing protocol).
This commit is contained in:
parent
7a8d2b5e01
commit
d3f692990e
4 changed files with 261 additions and 251 deletions
5
osc.c
5
osc.c
|
|
@ -1207,7 +1207,10 @@ kitty_text_size(struct terminal *term, char *string)
|
|||
free(wchars);
|
||||
}
|
||||
|
||||
term_print(term, CELL_COMB_CHARS_LO + composed->key, composed->forced_width > 0 ? composed->forced_width : composed->width);
|
||||
term_print(
|
||||
term, CELL_COMB_CHARS_LO + composed->key,
|
||||
composed->forced_width > 0 ? composed->forced_width : composed->width,
|
||||
false);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
255
terminal.c
255
terminal.c
|
|
@ -27,6 +27,7 @@
|
|||
#include "commands.h"
|
||||
#include "config.h"
|
||||
#include "debug.h"
|
||||
#include "emoji-variation-sequences.h"
|
||||
#include "extract.h"
|
||||
#include "grid.h"
|
||||
#include "ime.h"
|
||||
|
|
@ -4073,6 +4074,260 @@ term_single_shift(struct terminal *term, enum charset_designator idx)
|
|||
term->ascii_printer = &ascii_printer_single_shift;
|
||||
}
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
static int
|
||||
emoji_vs_compare(const void *_key, const void *_entry)
|
||||
{
|
||||
const struct emoji_vs *key = _key;
|
||||
const struct emoji_vs *entry = _entry;
|
||||
|
||||
uint32_t cp = key->start;
|
||||
|
||||
if (cp < entry->start)
|
||||
return -1;
|
||||
else if (cp > entry->end)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
UNITTEST
|
||||
{
|
||||
/* Verify the emoji_vs list is sorted */
|
||||
int64_t last_end = -1;
|
||||
|
||||
for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
|
||||
const struct emoji_vs *vs = &emoji_vs[i];
|
||||
xassert(vs->start <= vs->end);
|
||||
xassert(vs->start > last_end);
|
||||
xassert(vs->vs15 || vs->vs16);
|
||||
last_end = vs->end;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
term_process_and_print_non_ascii(struct terminal *term, char32_t wc)
|
||||
{
|
||||
int width = c32width(wc);
|
||||
bool insert_mode_disable = false;
|
||||
const bool grapheme_clustering = term->grapheme_shaping;
|
||||
|
||||
#if !defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
xassert(!grapheme_clustering);
|
||||
#endif
|
||||
|
||||
if (term->grid->cursor.point.col > 0 &&
|
||||
(grapheme_clustering ||
|
||||
(!grapheme_clustering && width == 0 && wc >= 0x300)))
|
||||
{
|
||||
int col = term->grid->cursor.point.col;
|
||||
if (!term->grid->cursor.lcf)
|
||||
col--;
|
||||
|
||||
/* Skip past spacers */
|
||||
struct row *row = term->grid->cur_row;
|
||||
while (row->cells[col].wc >= CELL_SPACER && col > 0)
|
||||
col--;
|
||||
|
||||
xassert(col >= 0 && col < term->cols);
|
||||
char32_t base = row->cells[col].wc;
|
||||
char32_t UNUSED last = base;
|
||||
|
||||
/* Is base cell already a cluster? */
|
||||
const struct composed *composed =
|
||||
(base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
|
||||
? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
|
||||
: NULL;
|
||||
|
||||
uint32_t key;
|
||||
|
||||
if (composed != NULL) {
|
||||
base = composed->chars[0];
|
||||
last = composed->chars[composed->count - 1];
|
||||
key = composed_key_from_key(composed->key, wc);
|
||||
} else
|
||||
key = composed_key_from_key(base, wc);
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
if (grapheme_clustering) {
|
||||
/* Check if we're on a grapheme cluster break */
|
||||
if (utf8proc_grapheme_break_stateful(
|
||||
last, wc, &term->vt.grapheme_state))
|
||||
{
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int base_width = c32width(base);
|
||||
if (base_width > 0) {
|
||||
term->grid->cursor.point.col = col;
|
||||
term->grid->cursor.lcf = false;
|
||||
insert_mode_disable = true;
|
||||
|
||||
if (composed == NULL) {
|
||||
bool base_from_primary;
|
||||
bool comb_from_primary;
|
||||
bool pre_from_primary;
|
||||
|
||||
char32_t precomposed = term->fonts[0] != NULL
|
||||
? fcft_precompose(
|
||||
term->fonts[0], base, wc, &base_from_primary,
|
||||
&comb_from_primary, &pre_from_primary)
|
||||
: (char32_t)-1;
|
||||
|
||||
int precomposed_width = c32width(precomposed);
|
||||
|
||||
/*
|
||||
* Only use the pre-composed character if:
|
||||
*
|
||||
* 1. we *have* a pre-composed character
|
||||
* 2. the width matches the base characters width
|
||||
* 3. it's in the primary font, OR one of the base or
|
||||
* combining characters are *not* from the primary
|
||||
* font
|
||||
*/
|
||||
|
||||
if (precomposed != (char32_t)-1 &&
|
||||
precomposed_width == base_width &&
|
||||
(pre_from_primary ||
|
||||
!base_from_primary ||
|
||||
!comb_from_primary))
|
||||
{
|
||||
wc = precomposed;
|
||||
width = precomposed_width;
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
|
||||
if (wanted_count > 255) {
|
||||
xassert(composed != NULL);
|
||||
|
||||
#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
|
||||
LOG_WARN("combining character overflow:");
|
||||
LOG_WARN(" base: 0x%04x", composed->chars[0]);
|
||||
for (size_t i = 1; i < composed->count; i++)
|
||||
LOG_WARN(" cc: 0x%04x", composed->chars[i]);
|
||||
LOG_ERR(" new: 0x%04x", wc);
|
||||
#endif
|
||||
/* This is going to break anyway... */
|
||||
wanted_count--;
|
||||
}
|
||||
|
||||
xassert(wanted_count <= 255);
|
||||
|
||||
/* Check if we already have a match for the entire compose chain */
|
||||
const struct composed *cc =
|
||||
composed_lookup_without_collision(
|
||||
term->composed, &key,
|
||||
composed != NULL ? composed->chars : &(char32_t){base},
|
||||
composed != NULL ? composed->count : 1,
|
||||
wc, 0);
|
||||
|
||||
if (cc != NULL) {
|
||||
/* We *do* have a match! */
|
||||
wc = CELL_COMB_CHARS_LO + cc->key;
|
||||
width = cc->width;
|
||||
goto out;
|
||||
} else {
|
||||
/* No match - allocate a new chain below */
|
||||
}
|
||||
|
||||
if (unlikely(term->composed_count >=
|
||||
(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
|
||||
{
|
||||
/* We reached our maximum number of allowed composed
|
||||
* character chains. Fall through here and print the
|
||||
* current zero-width character to the current cell */
|
||||
LOG_WARN("maximum number of composed characters reached");
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Allocate new chain */
|
||||
struct composed *new_cc = xmalloc(sizeof(*new_cc));
|
||||
new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
|
||||
new_cc->key = key;
|
||||
new_cc->count = wanted_count;
|
||||
new_cc->chars[0] = base;
|
||||
new_cc->chars[wanted_count - 1] = wc;
|
||||
new_cc->forced_width = 0;
|
||||
|
||||
if (composed != NULL) {
|
||||
memcpy(&new_cc->chars[1], &composed->chars[1],
|
||||
(wanted_count - 2) * sizeof(new_cc->chars[0]));
|
||||
}
|
||||
|
||||
const int grapheme_width =
|
||||
composed != NULL ? composed->width : base_width;
|
||||
|
||||
switch (term->conf->tweak.grapheme_width_method) {
|
||||
case GRAPHEME_WIDTH_MAX:
|
||||
new_cc->width = max(grapheme_width, width);
|
||||
break;
|
||||
|
||||
case GRAPHEME_WIDTH_DOUBLE:
|
||||
new_cc->width = min(grapheme_width + width, 2);
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
/* Handle VS-15 and VS-16 variation selectors */
|
||||
if (unlikely(grapheme_clustering &&
|
||||
(wc == 0xfe0e || wc == 0xfe0f) &&
|
||||
new_cc->count == 2))
|
||||
{
|
||||
const struct emoji_vs *vs =
|
||||
bsearch(
|
||||
&(struct emoji_vs){.start = new_cc->chars[0]},
|
||||
emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
|
||||
sizeof(struct emoji_vs),
|
||||
&emoji_vs_compare);
|
||||
|
||||
if (vs != NULL) {
|
||||
xassert(new_cc->chars[0] >= vs->start &&
|
||||
new_cc->chars[0] <= vs->end);
|
||||
|
||||
/* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
|
||||
if (wc == 0xfe0e) {
|
||||
if (vs->vs15)
|
||||
new_cc->width = 1;
|
||||
} else if (wc == 0xfe0f) {
|
||||
if (vs->vs16)
|
||||
new_cc->width = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
||||
case GRAPHEME_WIDTH_WCSWIDTH:
|
||||
new_cc->width = grapheme_width + width;
|
||||
break;
|
||||
}
|
||||
|
||||
term->composed_count++;
|
||||
composed_insert(&term->composed, new_cc);
|
||||
|
||||
wc = CELL_COMB_CHARS_LO + new_cc->key;
|
||||
width = new_cc->width;
|
||||
|
||||
xassert(wc >= CELL_COMB_CHARS_LO);
|
||||
xassert(wc <= CELL_COMB_CHARS_HI);
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
term_reset_grapheme_state(term);
|
||||
|
||||
|
||||
out:
|
||||
if (width > 0)
|
||||
term_print(term, wc, width, insert_mode_disable);
|
||||
}
|
||||
|
||||
enum term_surface
|
||||
term_surface_kind(const struct terminal *term, const struct wl_surface *surface)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -894,6 +894,7 @@ void term_cursor_up(struct terminal *term, int count);
|
|||
void term_cursor_down(struct terminal *term, int count);
|
||||
void term_cursor_blink_update(struct terminal *term);
|
||||
|
||||
void term_process_and_print_non_ascii(struct terminal *term, char32_t wc);
|
||||
void term_print(struct terminal *term, char32_t wc, int width,
|
||||
bool insert_mode_disable);
|
||||
void term_fill(struct terminal *term, int row, int col, uint8_t c, size_t count,
|
||||
|
|
|
|||
251
vt.c
251
vt.c
|
|
@ -16,7 +16,6 @@
|
|||
#include "csi.h"
|
||||
#include "dcs.h"
|
||||
#include "debug.h"
|
||||
#include "emoji-variation-sequences.h"
|
||||
#include "osc.h"
|
||||
#include "sixel.h"
|
||||
#include "util.h"
|
||||
|
|
@ -647,258 +646,10 @@ action_put(struct terminal *term, uint8_t c)
|
|||
dcs_put(term, c);
|
||||
}
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
static int
|
||||
emoji_vs_compare(const void *_key, const void *_entry)
|
||||
{
|
||||
const struct emoji_vs *key = _key;
|
||||
const struct emoji_vs *entry = _entry;
|
||||
|
||||
uint32_t cp = key->start;
|
||||
|
||||
if (cp < entry->start)
|
||||
return -1;
|
||||
else if (cp > entry->end)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
UNITTEST
|
||||
{
|
||||
/* Verify the emoji_vs list is sorted */
|
||||
int64_t last_end = -1;
|
||||
|
||||
for (size_t i = 0; i < sizeof(emoji_vs) / sizeof(emoji_vs[0]); i++) {
|
||||
const struct emoji_vs *vs = &emoji_vs[i];
|
||||
xassert(vs->start <= vs->end);
|
||||
xassert(vs->start > last_end);
|
||||
xassert(vs->vs15 || vs->vs16);
|
||||
last_end = vs->end;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
action_utf8_print(struct terminal *term, char32_t wc)
|
||||
{
|
||||
int width = c32width(wc);
|
||||
bool insert_mode_disable = false;
|
||||
const bool grapheme_clustering = term->grapheme_shaping;
|
||||
|
||||
#if !defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
xassert(!grapheme_clustering);
|
||||
#endif
|
||||
|
||||
if (term->grid->cursor.point.col > 0 &&
|
||||
(grapheme_clustering ||
|
||||
(!grapheme_clustering && width == 0 && wc >= 0x300)))
|
||||
{
|
||||
int col = term->grid->cursor.point.col;
|
||||
if (!term->grid->cursor.lcf)
|
||||
col--;
|
||||
|
||||
/* Skip past spacers */
|
||||
struct row *row = term->grid->cur_row;
|
||||
while (row->cells[col].wc >= CELL_SPACER && col > 0)
|
||||
col--;
|
||||
|
||||
xassert(col >= 0 && col < term->cols);
|
||||
char32_t base = row->cells[col].wc;
|
||||
char32_t UNUSED last = base;
|
||||
|
||||
/* Is base cell already a cluster? */
|
||||
const struct composed *composed =
|
||||
(base >= CELL_COMB_CHARS_LO && base <= CELL_COMB_CHARS_HI)
|
||||
? composed_lookup(term->composed, base - CELL_COMB_CHARS_LO)
|
||||
: NULL;
|
||||
|
||||
uint32_t key;
|
||||
|
||||
if (composed != NULL) {
|
||||
base = composed->chars[0];
|
||||
last = composed->chars[composed->count - 1];
|
||||
key = composed_key_from_key(composed->key, wc);
|
||||
} else
|
||||
key = composed_key_from_key(base, wc);
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
if (grapheme_clustering) {
|
||||
/* Check if we're on a grapheme cluster break */
|
||||
if (utf8proc_grapheme_break_stateful(
|
||||
last, wc, &term->vt.grapheme_state))
|
||||
{
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int base_width = c32width(base);
|
||||
if (base_width > 0) {
|
||||
term->grid->cursor.point.col = col;
|
||||
term->grid->cursor.lcf = false;
|
||||
insert_mode_disable = true;
|
||||
|
||||
if (composed == NULL) {
|
||||
bool base_from_primary;
|
||||
bool comb_from_primary;
|
||||
bool pre_from_primary;
|
||||
|
||||
char32_t precomposed = term->fonts[0] != NULL
|
||||
? fcft_precompose(
|
||||
term->fonts[0], base, wc, &base_from_primary,
|
||||
&comb_from_primary, &pre_from_primary)
|
||||
: (char32_t)-1;
|
||||
|
||||
int precomposed_width = c32width(precomposed);
|
||||
|
||||
/*
|
||||
* Only use the pre-composed character if:
|
||||
*
|
||||
* 1. we *have* a pre-composed character
|
||||
* 2. the width matches the base characters width
|
||||
* 3. it's in the primary font, OR one of the base or
|
||||
* combining characters are *not* from the primary
|
||||
* font
|
||||
*/
|
||||
|
||||
if (precomposed != (char32_t)-1 &&
|
||||
precomposed_width == base_width &&
|
||||
(pre_from_primary ||
|
||||
!base_from_primary ||
|
||||
!comb_from_primary))
|
||||
{
|
||||
wc = precomposed;
|
||||
width = precomposed_width;
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
|
||||
if (wanted_count > 255) {
|
||||
xassert(composed != NULL);
|
||||
|
||||
#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
|
||||
LOG_WARN("combining character overflow:");
|
||||
LOG_WARN(" base: 0x%04x", composed->chars[0]);
|
||||
for (size_t i = 1; i < composed->count; i++)
|
||||
LOG_WARN(" cc: 0x%04x", composed->chars[i]);
|
||||
LOG_ERR(" new: 0x%04x", wc);
|
||||
#endif
|
||||
/* This is going to break anyway... */
|
||||
wanted_count--;
|
||||
}
|
||||
|
||||
xassert(wanted_count <= 255);
|
||||
|
||||
/* Check if we already have a match for the entire compose chain */
|
||||
const struct composed *cc =
|
||||
composed_lookup_without_collision(
|
||||
term->composed, &key,
|
||||
composed != NULL ? composed->chars : &(char32_t){base},
|
||||
composed != NULL ? composed->count : 1,
|
||||
wc, 0);
|
||||
|
||||
if (cc != NULL) {
|
||||
/* We *do* have a match! */
|
||||
wc = CELL_COMB_CHARS_LO + cc->key;
|
||||
width = cc->width;
|
||||
goto out;
|
||||
} else {
|
||||
/* No match - allocate a new chain below */
|
||||
}
|
||||
|
||||
if (unlikely(term->composed_count >=
|
||||
(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO)))
|
||||
{
|
||||
/* We reached our maximum number of allowed composed
|
||||
* character chains. Fall through here and print the
|
||||
* current zero-width character to the current cell */
|
||||
LOG_WARN("maximum number of composed characters reached");
|
||||
term_reset_grapheme_state(term);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Allocate new chain */
|
||||
struct composed *new_cc = xmalloc(sizeof(*new_cc));
|
||||
new_cc->chars = xmalloc(wanted_count * sizeof(new_cc->chars[0]));
|
||||
new_cc->key = key;
|
||||
new_cc->count = wanted_count;
|
||||
new_cc->chars[0] = base;
|
||||
new_cc->chars[wanted_count - 1] = wc;
|
||||
new_cc->forced_width = 0;
|
||||
|
||||
if (composed != NULL) {
|
||||
memcpy(&new_cc->chars[1], &composed->chars[1],
|
||||
(wanted_count - 2) * sizeof(new_cc->chars[0]));
|
||||
}
|
||||
|
||||
const int grapheme_width =
|
||||
composed != NULL ? composed->width : base_width;
|
||||
|
||||
switch (term->conf->tweak.grapheme_width_method) {
|
||||
case GRAPHEME_WIDTH_MAX:
|
||||
new_cc->width = max(grapheme_width, width);
|
||||
break;
|
||||
|
||||
case GRAPHEME_WIDTH_DOUBLE:
|
||||
new_cc->width = min(grapheme_width + width, 2);
|
||||
|
||||
#if defined(FOOT_GRAPHEME_CLUSTERING)
|
||||
/* Handle VS-15 and VS-16 variation selectors */
|
||||
if (unlikely(grapheme_clustering &&
|
||||
(wc == 0xfe0e || wc == 0xfe0f) &&
|
||||
new_cc->count == 2))
|
||||
{
|
||||
const struct emoji_vs *vs =
|
||||
bsearch(
|
||||
&(struct emoji_vs){.start = new_cc->chars[0]},
|
||||
emoji_vs, sizeof(emoji_vs) / sizeof(emoji_vs[0]),
|
||||
sizeof(struct emoji_vs),
|
||||
&emoji_vs_compare);
|
||||
|
||||
if (vs != NULL) {
|
||||
xassert(new_cc->chars[0] >= vs->start &&
|
||||
new_cc->chars[0] <= vs->end);
|
||||
|
||||
/* Force a grapheme width of 1 for VS-15, and 2 for VS-16 */
|
||||
if (wc == 0xfe0e) {
|
||||
if (vs->vs15)
|
||||
new_cc->width = 1;
|
||||
} else if (wc == 0xfe0f) {
|
||||
if (vs->vs16)
|
||||
new_cc->width = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
||||
case GRAPHEME_WIDTH_WCSWIDTH:
|
||||
new_cc->width = grapheme_width + width;
|
||||
break;
|
||||
}
|
||||
|
||||
term->composed_count++;
|
||||
composed_insert(&term->composed, new_cc);
|
||||
|
||||
wc = CELL_COMB_CHARS_LO + new_cc->key;
|
||||
width = new_cc->width;
|
||||
|
||||
xassert(wc >= CELL_COMB_CHARS_LO);
|
||||
xassert(wc <= CELL_COMB_CHARS_HI);
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
term_reset_grapheme_state(term);
|
||||
|
||||
|
||||
out:
|
||||
if (width > 0)
|
||||
term_print(term, wc, width, insert_mode_disable);
|
||||
term_process_and_print_non_ascii(term, wc);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue