From 62e07743194c8051334ed9b23be90ce31ab80773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sun, 3 May 2020 11:03:22 +0200 Subject: [PATCH] unicode-combining: store seen combining chains "globally" in the term struct Instead of storing combining data per cell, realize that most combinations are re-occurring and that there's lots of available space left in the unicode range, and store seen base+combining combinations chains in a per-terminal array. When we encounter a combining character, we first try to pre-compose, like before. If that fails, we then search for the current base+combining combo in the list of previously seen combinations. If not found there either, we allocate a new combo and add it to the list. Regardless, the result is an index into this array. We store this index, offsetted by COMB_CHARS_LO=0x40000000ul in the cell. When rendering, we need to check if the cell character is a plain character, or if it's a composed character (identified by checking if the cell character is >= COMB_CHARS_LO). Then we render the grapheme pretty much like before. --- grid.c | 23 +----------------- meson.build | 10 +++----- meson_options.txt | 3 --- render.c | 45 ++++++++++++++++++++++-------------- selection.c | 24 ++++++++----------- terminal.c | 8 +++---- terminal.h | 17 +++++++------- vt.c | 59 ++++++++++++++++++++++++++++++++++------------- 8 files changed, 97 insertions(+), 92 deletions(-) diff --git a/grid.c b/grid.c index 522df101..51d76191 100644 --- a/grid.c +++ b/grid.c @@ -34,17 +34,10 @@ grid_row_alloc(int cols, bool initialize) if (initialize) { row->cells = calloc(cols, sizeof(row->cells[0])); -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - row->comb_chars = calloc(cols, sizeof(row->comb_chars[0])); -#endif for (size_t c = 0; c < cols; c++) row->cells[c].attrs.clean = 1; - } else { + } else row->cells = malloc(cols * sizeof(row->cells[0])); -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - row->comb_chars = malloc(cols * sizeof(row->comb_chars[0])); -#endif - } return row; } @@ -55,9 +48,6 @@ grid_row_free(struct row *row) if (row == NULL) return; -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - free(row->comb_chars); -#endif free(row->cells); free(row); } @@ -214,17 +204,6 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols, new_row->cells[new_col_idx] = *old_cell; new_row->cells[new_col_idx].attrs.clean = 1; -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - struct combining_chars *old_comb_chars - = &old_row->comb_chars[c - empty_count + i]; - struct combining_chars *new_comb_chars - = &new_row->comb_chars[new_col_idx]; - - new_comb_chars->count = old_comb_chars->count; - for (size_t j = 0; j < ALEN(new_comb_chars->chars); j++) - new_comb_chars->chars[j] = old_comb_chars->chars[j]; -#endif - /* Translate tracking point(s) */ if (is_tracking_point && i >= empty_count) { tll_foreach(tracking_points, it) { diff --git a/meson.build b/meson.build index eca6e0fd..f34a5e60 100644 --- a/meson.build +++ b/meson.build @@ -57,11 +57,8 @@ wayland_client = dependency('wayland-client') wayland_cursor = dependency('wayland-cursor') xkb = dependency('xkbcommon') -add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format( - get_option('unicode-max-combining-chars')), language: 'c') add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format( - get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')), - language: 'c') + get_option('unicode-precompose')), language: 'c') tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist') fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft') @@ -95,7 +92,7 @@ foreach prot : [ command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@']) endforeach -if get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose') +if get_option('unicode-precompose') generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh') unicode_data = custom_target( 'unicode-data', @@ -167,8 +164,7 @@ subdir('doc') summary( { - 'Unicode max combining chars': get_option('unicode-max-combining-chars'), - 'Unicode precompose': get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose'), + 'Unicode precompose': get_option('unicode-precompose'), }, bool_yn: true ) diff --git a/meson_options.txt b/meson_options.txt index 42a0663d..5052f66c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,5 +1,2 @@ -option('unicode-max-combining-chars', type: 'integer', value: 2, - description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)') - option('unicode-precompose', type: 'boolean', value: true, description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled') diff --git a/render.c b/render.c index 328f793b..2af605c8 100644 --- a/render.c +++ b/render.c @@ -402,9 +402,20 @@ render_cell(struct terminal *term, pixman_image_t *pix, struct fcft_font *font = attrs_to_font(term, &cell->attrs); const struct fcft_glyph *glyph = NULL; + const struct composed *composed = NULL; - if (cell->wc != 0) - glyph = fcft_glyph_rasterize(font, cell->wc, term->font_subpixel); + if (cell->wc != 0) { + wchar_t base = cell->wc; + + if (base >= COMB_CHARS_LO && + base < (COMB_CHARS_LO + term->composed_count)) + { + composed = &term->composed[base - COMB_CHARS_LO]; + base = composed->base; + } + + glyph = fcft_glyph_rasterize(font, base, term->font_subpixel); + } int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1; @@ -442,25 +453,25 @@ render_cell(struct terminal *term, pixman_image_t *pix, } } -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 /* Combining characters */ - const struct combining_chars *comb_chars = &row->comb_chars[col]; - for (size_t i = 0; i < comb_chars->count; i++) { - const struct fcft_glyph *g = fcft_glyph_rasterize( - font, comb_chars->chars[i], term->font_subpixel); + if (composed != NULL) { + for (size_t i = 0; i < composed->count; i++) { + const struct fcft_glyph *g = fcft_glyph_rasterize( + font, composed->combining[i], term->font_subpixel); - if (g == NULL) - continue; + if (g == NULL) + continue; - pixman_image_composite32( - PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, - /* Some fonts use a negative offset, while others use a - * "normal" offset */ - x + (g->x < 0 ? term->cell_width : 0) + g->x, - y + font_baseline(term) - g->y, - g->width, g->height); + pixman_image_composite32( + PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, + /* Some fonts use a negative offset, while others use a + * "normal" offset */ + x + (g->x < 0 ? term->cell_width : 0) + g->x, + y + font_baseline(term) - g->y, + g->width, g->height); + } } -#endif + pixman_image_unref(clr_pix); /* Underline */ diff --git a/selection.c b/selection.c index 2fa1c50a..af52e4b5 100644 --- a/selection.c +++ b/selection.c @@ -142,12 +142,7 @@ min_bufsize_for_extraction(const struct terminal *term) { const struct coord *start = &term->selection.start; const struct coord *end = &term->selection.end; - const size_t chars_per_cell = -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - 1 + ALEN(term->grid->cur_row->comb_chars[0].chars); -#else - 1; -#endif + const size_t chars_per_cell = 1 + ALEN(term->composed[0].combining); switch (term->selection.kind) { case SELECTION_NONE: @@ -239,16 +234,17 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell, ctx->empty_count = 0; assert(ctx->idx + 1 <= ctx->size); - ctx->buf[ctx->idx++] = cell->wc; -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - const struct combining_chars *comb_chars = &row->comb_chars[col]; + if (cell->wc >= COMB_CHARS_LO && cell->wc < (COMB_CHARS_LO + term->composed_count)) { + const struct composed *composed = &term->composed[cell->wc - COMB_CHARS_LO]; - assert(cell->wc != 0); - assert(ctx->idx + comb_chars->count <= ctx->size); - for (size_t i = 0; i < comb_chars->count; i++) - ctx->buf[ctx->idx++] = comb_chars->chars[i]; -#endif + ctx->buf[ctx->idx++] = composed->base; + + assert(ctx->idx + composed->count <= ctx->size); + for (size_t i = 0; i < composed->count; i++) + ctx->buf[ctx->idx++] = composed->combining[i]; + } else + ctx->buf[ctx->idx++] = cell->wc; ctx->last_row = row; ctx->last_cell = cell; diff --git a/terminal.c b/terminal.c index 60dff8a6..b5cb6590 100644 --- a/terminal.c +++ b/terminal.c @@ -822,6 +822,8 @@ term_init(const struct config *conf, struct fdm *fdm, struct wayland *wayl, .normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .grid = &term->normal, + .composed_count = 0, + .composed = NULL, .meta = { .esc_prefix = true, .eight_bit = true, @@ -1086,6 +1088,8 @@ term_destroy(struct terminal *term) tll_free(term->normal.scroll_damage); tll_free(term->alt.scroll_damage); + free(term->composed); + free(term->window_title); tll_free_and_free(term->window_title_stack, free); @@ -2295,10 +2299,6 @@ term_print(struct terminal *term, wchar_t wc, int width) cell->wc = term->vt.last_printed = wc; cell->attrs = term->vt.attrs; -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - row->comb_chars[term->grid->cursor.point.col].count = 0; -#endif - row->dirty = true; cell->attrs.clean = 0; diff --git a/terminal.h b/terminal.h index 2292e927..2767060b 100644 --- a/terminal.h +++ b/terminal.h @@ -77,21 +77,16 @@ struct damage { int lines; }; -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 -struct combining_chars { +struct composed { + wchar_t base; + wchar_t combining[5]; uint8_t count; - wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS]; -} __attribute__((packed)); -#endif +}; struct row { struct cell *cells; bool dirty; bool linebreak; - -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - struct combining_chars *comb_chars; -#endif }; struct sixel { @@ -221,6 +216,10 @@ struct terminal { struct grid alt; struct grid *grid; + #define COMB_CHARS_LO 0x40000000ul + size_t composed_count; + struct composed *composed; + struct fcft_font *fonts[4]; int font_dpi; int font_adjustments; diff --git a/vt.c b/vt.c index 7678711e..74110eb6 100644 --- a/vt.c +++ b/vt.c @@ -571,8 +571,6 @@ action_utf8_print(struct terminal *term, uint8_t c) int width = wcwidth(wc); -#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 - /* * Is this is combining character? The basic assumption is that if * wcwdith() returns 0, then it *is* a combining character. @@ -606,6 +604,15 @@ action_utf8_print(struct terminal *term, uint8_t c) base = row->cells[base_col].wc; } + const struct composed *composed = + (base >= COMB_CHARS_LO && + base < (COMB_CHARS_LO + term->composed_count)) + ? &term->composed[base - COMB_CHARS_LO] + : NULL; + + if (composed != NULL) + base = composed->base; + int base_width = wcwidth(base); if (base != 0 && base_width > 0) { @@ -619,35 +626,55 @@ action_utf8_print(struct terminal *term, uint8_t c) * pre-composed character, as that is likely to produce a * better looking result. */ - - struct combining_chars *comb_chars = &row->comb_chars[base_col]; + term->grid->cursor.point.col = base_col; + term->grid->cursor.lcf = false; #if FOOT_UNICODE_PRECOMPOSE - if (comb_chars->count == 0) { + if (composed == NULL) { wchar_t precomposed = precompose(base, wc); int precomposed_width = wcwidth(precomposed); if (precomposed != (wchar_t)-1 && precomposed_width == base_width) { - term->grid->cursor.point.col = base_col; - term->grid->cursor.lcf = false; term_print(term, precomposed, precomposed_width); return; } } #endif - if (comb_chars->count < ALEN(comb_chars->chars)) - comb_chars->chars[comb_chars->count++] = wc; - else { - LOG_WARN("combining character overflow:"); - LOG_WARN(" 0x%04x", base); - for (size_t i = 0; i < comb_chars->count; i++) - LOG_WARN(" 0x%04x", comb_chars->chars[i]); - LOG_ERR(" 0x%04x", wc); + size_t wanted_count = composed != NULL ? composed->count + 1 : 1; + + /* Look for existing combining chain */ + for (size_t i = 0; i < term->composed_count; i++) { + const struct composed *cc = &term->composed[i]; + if (cc->base != base) + continue; + + if (cc->count != wanted_count) + continue; + + if (cc->combining[wanted_count - 1] != wc) + continue; + + term_print(term, COMB_CHARS_LO + i, base_width); + return; } + + /* Allocate new chain */ + + struct composed new_cc; + new_cc.base = base; + new_cc.count = wanted_count; + for (size_t i = 0; i < wanted_count - 1; i++) + new_cc.combining[i] = composed->combining[i]; + new_cc.combining[wanted_count - 1] = wc; + + term->composed_count++; + term->composed = realloc(term->composed, term->composed_count * sizeof(term->composed[0])); + term->composed[term->composed_count - 1] = new_cc; + + term_print(term, COMB_CHARS_LO + term->composed_count - 1, base_width); return; } } -#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */ term_print(term, wc, width); }