unicode-combining: store seen combining chains "globally" in the term struct

Instead of storing combining data per cell, realize that most
combinations are re-occurring and that there's lots of available space
left in the unicode range, and store seen base+combining combinations
chains in a per-terminal array.

When we encounter a combining character, we first try to pre-compose,
like before. If that fails, we then search for the current
base+combining combo in the list of previously seen combinations. If
not found there either, we allocate a new combo and add it to the
list. Regardless, the result is an index into this array. We store
this index, offsetted by COMB_CHARS_LO=0x40000000ul in the cell.

When rendering, we need to check if the cell character is a plain
character, or if it's a composed character (identified by checking if
the cell character is >= COMB_CHARS_LO).

Then we render the grapheme pretty much like before.
This commit is contained in:
Daniel Eklöf 2020-05-03 11:03:22 +02:00
parent ae7383189a
commit 62e0774319
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
8 changed files with 97 additions and 92 deletions

23
grid.c
View file

@ -34,17 +34,10 @@ grid_row_alloc(int cols, bool initialize)
if (initialize) { if (initialize) {
row->cells = calloc(cols, sizeof(row->cells[0])); row->cells = calloc(cols, sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = calloc(cols, sizeof(row->comb_chars[0]));
#endif
for (size_t c = 0; c < cols; c++) for (size_t c = 0; c < cols; c++)
row->cells[c].attrs.clean = 1; row->cells[c].attrs.clean = 1;
} else { } else
row->cells = malloc(cols * sizeof(row->cells[0])); row->cells = malloc(cols * sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = malloc(cols * sizeof(row->comb_chars[0]));
#endif
}
return row; return row;
} }
@ -55,9 +48,6 @@ grid_row_free(struct row *row)
if (row == NULL) if (row == NULL)
return; return;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
free(row->comb_chars);
#endif
free(row->cells); free(row->cells);
free(row); free(row);
} }
@ -214,17 +204,6 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols,
new_row->cells[new_col_idx] = *old_cell; new_row->cells[new_col_idx] = *old_cell;
new_row->cells[new_col_idx].attrs.clean = 1; new_row->cells[new_col_idx].attrs.clean = 1;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *old_comb_chars
= &old_row->comb_chars[c - empty_count + i];
struct combining_chars *new_comb_chars
= &new_row->comb_chars[new_col_idx];
new_comb_chars->count = old_comb_chars->count;
for (size_t j = 0; j < ALEN(new_comb_chars->chars); j++)
new_comb_chars->chars[j] = old_comb_chars->chars[j];
#endif
/* Translate tracking point(s) */ /* Translate tracking point(s) */
if (is_tracking_point && i >= empty_count) { if (is_tracking_point && i >= empty_count) {
tll_foreach(tracking_points, it) { tll_foreach(tracking_points, it) {

View file

@ -57,11 +57,8 @@ wayland_client = dependency('wayland-client')
wayland_cursor = dependency('wayland-cursor') wayland_cursor = dependency('wayland-cursor')
xkb = dependency('xkbcommon') xkb = dependency('xkbcommon')
add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format(
get_option('unicode-max-combining-chars')), language: 'c')
add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format( add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')), get_option('unicode-precompose')), language: 'c')
language: 'c')
tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist') tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft') fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
@ -95,7 +92,7 @@ foreach prot : [
command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@']) command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
endforeach endforeach
if get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose') if get_option('unicode-precompose')
generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh') generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
unicode_data = custom_target( unicode_data = custom_target(
'unicode-data', 'unicode-data',
@ -167,8 +164,7 @@ subdir('doc')
summary( summary(
{ {
'Unicode max combining chars': get_option('unicode-max-combining-chars'), 'Unicode precompose': get_option('unicode-precompose'),
'Unicode precompose': get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose'),
}, },
bool_yn: true bool_yn: true
) )

View file

@ -1,5 +1,2 @@
option('unicode-max-combining-chars', type: 'integer', value: 2,
description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)')
option('unicode-precompose', type: 'boolean', value: true, option('unicode-precompose', type: 'boolean', value: true,
description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled') description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')

View file

@ -402,9 +402,20 @@ render_cell(struct terminal *term, pixman_image_t *pix,
struct fcft_font *font = attrs_to_font(term, &cell->attrs); struct fcft_font *font = attrs_to_font(term, &cell->attrs);
const struct fcft_glyph *glyph = NULL; const struct fcft_glyph *glyph = NULL;
const struct composed *composed = NULL;
if (cell->wc != 0) if (cell->wc != 0) {
glyph = fcft_glyph_rasterize(font, cell->wc, term->font_subpixel); wchar_t base = cell->wc;
if (base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - COMB_CHARS_LO];
base = composed->base;
}
glyph = fcft_glyph_rasterize(font, base, term->font_subpixel);
}
int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1; int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1;
@ -442,25 +453,25 @@ render_cell(struct terminal *term, pixman_image_t *pix,
} }
} }
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/* Combining characters */ /* Combining characters */
const struct combining_chars *comb_chars = &row->comb_chars[col]; if (composed != NULL) {
for (size_t i = 0; i < comb_chars->count; i++) { for (size_t i = 0; i < composed->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize( const struct fcft_glyph *g = fcft_glyph_rasterize(
font, comb_chars->chars[i], term->font_subpixel); font, composed->combining[i], term->font_subpixel);
if (g == NULL) if (g == NULL)
continue; continue;
pixman_image_composite32( pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
/* Some fonts use a negative offset, while others use a /* Some fonts use a negative offset, while others use a
* "normal" offset */ * "normal" offset */
x + (g->x < 0 ? term->cell_width : 0) + g->x, x + (g->x < 0 ? term->cell_width : 0) + g->x,
y + font_baseline(term) - g->y, y + font_baseline(term) - g->y,
g->width, g->height); g->width, g->height);
}
} }
#endif
pixman_image_unref(clr_pix); pixman_image_unref(clr_pix);
/* Underline */ /* Underline */

View file

@ -142,12 +142,7 @@ min_bufsize_for_extraction(const struct terminal *term)
{ {
const struct coord *start = &term->selection.start; const struct coord *start = &term->selection.start;
const struct coord *end = &term->selection.end; const struct coord *end = &term->selection.end;
const size_t chars_per_cell = const size_t chars_per_cell = 1 + ALEN(term->composed[0].combining);
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
1 + ALEN(term->grid->cur_row->comb_chars[0].chars);
#else
1;
#endif
switch (term->selection.kind) { switch (term->selection.kind) {
case SELECTION_NONE: case SELECTION_NONE:
@ -239,16 +234,17 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell,
ctx->empty_count = 0; ctx->empty_count = 0;
assert(ctx->idx + 1 <= ctx->size); assert(ctx->idx + 1 <= ctx->size);
ctx->buf[ctx->idx++] = cell->wc;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 if (cell->wc >= COMB_CHARS_LO && cell->wc < (COMB_CHARS_LO + term->composed_count)) {
const struct combining_chars *comb_chars = &row->comb_chars[col]; const struct composed *composed = &term->composed[cell->wc - COMB_CHARS_LO];
assert(cell->wc != 0); ctx->buf[ctx->idx++] = composed->base;
assert(ctx->idx + comb_chars->count <= ctx->size);
for (size_t i = 0; i < comb_chars->count; i++) assert(ctx->idx + composed->count <= ctx->size);
ctx->buf[ctx->idx++] = comb_chars->chars[i]; for (size_t i = 0; i < composed->count; i++)
#endif ctx->buf[ctx->idx++] = composed->combining[i];
} else
ctx->buf[ctx->idx++] = cell->wc;
ctx->last_row = row; ctx->last_row = row;
ctx->last_cell = cell; ctx->last_cell = cell;

View file

@ -822,6 +822,8 @@ term_init(const struct config *conf, struct fdm *fdm, struct wayland *wayl,
.normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.grid = &term->normal, .grid = &term->normal,
.composed_count = 0,
.composed = NULL,
.meta = { .meta = {
.esc_prefix = true, .esc_prefix = true,
.eight_bit = true, .eight_bit = true,
@ -1086,6 +1088,8 @@ term_destroy(struct terminal *term)
tll_free(term->normal.scroll_damage); tll_free(term->normal.scroll_damage);
tll_free(term->alt.scroll_damage); tll_free(term->alt.scroll_damage);
free(term->composed);
free(term->window_title); free(term->window_title);
tll_free_and_free(term->window_title_stack, free); tll_free_and_free(term->window_title_stack, free);
@ -2295,10 +2299,6 @@ term_print(struct terminal *term, wchar_t wc, int width)
cell->wc = term->vt.last_printed = wc; cell->wc = term->vt.last_printed = wc;
cell->attrs = term->vt.attrs; cell->attrs = term->vt.attrs;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars[term->grid->cursor.point.col].count = 0;
#endif
row->dirty = true; row->dirty = true;
cell->attrs.clean = 0; cell->attrs.clean = 0;

View file

@ -77,21 +77,16 @@ struct damage {
int lines; int lines;
}; };
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 struct composed {
struct combining_chars { wchar_t base;
wchar_t combining[5];
uint8_t count; uint8_t count;
wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS]; };
} __attribute__((packed));
#endif
struct row { struct row {
struct cell *cells; struct cell *cells;
bool dirty; bool dirty;
bool linebreak; bool linebreak;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *comb_chars;
#endif
}; };
struct sixel { struct sixel {
@ -221,6 +216,10 @@ struct terminal {
struct grid alt; struct grid alt;
struct grid *grid; struct grid *grid;
#define COMB_CHARS_LO 0x40000000ul
size_t composed_count;
struct composed *composed;
struct fcft_font *fonts[4]; struct fcft_font *fonts[4];
int font_dpi; int font_dpi;
int font_adjustments; int font_adjustments;

59
vt.c
View file

@ -571,8 +571,6 @@ action_utf8_print(struct terminal *term, uint8_t c)
int width = wcwidth(wc); int width = wcwidth(wc);
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/* /*
* Is this is combining character? The basic assumption is that if * Is this is combining character? The basic assumption is that if
* wcwdith() returns 0, then it *is* a combining character. * wcwdith() returns 0, then it *is* a combining character.
@ -606,6 +604,15 @@ action_utf8_print(struct terminal *term, uint8_t c)
base = row->cells[base_col].wc; base = row->cells[base_col].wc;
} }
const struct composed *composed =
(base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
? &term->composed[base - COMB_CHARS_LO]
: NULL;
if (composed != NULL)
base = composed->base;
int base_width = wcwidth(base); int base_width = wcwidth(base);
if (base != 0 && base_width > 0) { if (base != 0 && base_width > 0) {
@ -619,35 +626,55 @@ action_utf8_print(struct terminal *term, uint8_t c)
* pre-composed character, as that is likely to produce a * pre-composed character, as that is likely to produce a
* better looking result. * better looking result.
*/ */
term->grid->cursor.point.col = base_col;
struct combining_chars *comb_chars = &row->comb_chars[base_col]; term->grid->cursor.lcf = false;
#if FOOT_UNICODE_PRECOMPOSE #if FOOT_UNICODE_PRECOMPOSE
if (comb_chars->count == 0) { if (composed == NULL) {
wchar_t precomposed = precompose(base, wc); wchar_t precomposed = precompose(base, wc);
int precomposed_width = wcwidth(precomposed); int precomposed_width = wcwidth(precomposed);
if (precomposed != (wchar_t)-1 && precomposed_width == base_width) { if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
term_print(term, precomposed, precomposed_width); term_print(term, precomposed, precomposed_width);
return; return;
} }
} }
#endif #endif
if (comb_chars->count < ALEN(comb_chars->chars)) size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
comb_chars->chars[comb_chars->count++] = wc;
else { /* Look for existing combining chain */
LOG_WARN("combining character overflow:"); for (size_t i = 0; i < term->composed_count; i++) {
LOG_WARN(" 0x%04x", base); const struct composed *cc = &term->composed[i];
for (size_t i = 0; i < comb_chars->count; i++) if (cc->base != base)
LOG_WARN(" 0x%04x", comb_chars->chars[i]); continue;
LOG_ERR(" 0x%04x", wc);
if (cc->count != wanted_count)
continue;
if (cc->combining[wanted_count - 1] != wc)
continue;
term_print(term, COMB_CHARS_LO + i, base_width);
return;
} }
/* Allocate new chain */
struct composed new_cc;
new_cc.base = base;
new_cc.count = wanted_count;
for (size_t i = 0; i < wanted_count - 1; i++)
new_cc.combining[i] = composed->combining[i];
new_cc.combining[wanted_count - 1] = wc;
term->composed_count++;
term->composed = realloc(term->composed, term->composed_count * sizeof(term->composed[0]));
term->composed[term->composed_count - 1] = new_cc;
term_print(term, COMB_CHARS_LO + term->composed_count - 1, base_width);
return; return;
} }
} }
#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */
term_print(term, wc, width); term_print(term, wc, width);
} }