Merge branch 'space-optimize-combining-chars'

2026-02-06 04:06:06 -05:00 · 2020-05-03 11:36:20 +02:00 · 2020-05-03 11:36:20 +02:00 · 1d1eb89925
commit 1d1eb89925
parent ae7383189a 4d4df92f66
10 changed files with 137 additions and 115 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -22,8 +22,9 @@
 * Right mouse button extends the current selection.
 * `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
  `11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
-* Unicode combining characters. This feature is compile time
-  optional. See [README.md](README.md#user-content-unicode-combining) for details.
+* Unicode combining characters. Parts of this feature are compile time
+  optional. See [README.md](README.md#user-content-unicode-combining)
+  for details.

 ### Changed

--- a/README.md
+++ b/README.md
@ -271,30 +271,24 @@ with the terminal emulator itself. Foot implements the following OSCs:

 ## Unicode combining

-In order to handle combining characters (typically diacritics), foot
-must store additional data for each cell. By default, foot stores at
-most 2 combining characters per cell. This adds 9 bytes of additional
-space to each cell, or 75% more space than without combining
-characters).
+When the client prints Unicode combining characters, e.g `a\\u0308`
+('a' + `COMBINING DIAERESIS`), foot will be default try to create a
+pre-composed character. For example, `\\u0061\\u0308` (`a\\u0308`)
+will be transformed into `\\u00e5` (`å`).

-You can configure the maximum number of characters to store for each
-cell at **compile time** with
-`-Dunicode-max-combining-chars=<int>`. Setting this to `0`
-**disables** unicode combining completely - **no** additional data is
-stored.
+This is to improve the looks of the rendered grapheme. When rendering
+a decomposed string, `a\\u0308`, the glyphs for `a` and `\\u0308` are
+rendered independently, on top off each other. The result if often not
+optimal, with e.g. diacritics looking a bit out of place. If we are
+really unlucky, the base character and the combining characters may be
+picked from different fonts, making the result look even more awkward.

-Furthermore, in order to improve the looks of the rendered combined
-character,, foot will by default try to convert the base and combining
-characters to a pre-composed character.
+When rendering a pre-composed character, we are rendering a single
+glyph only and thus it is guaranteed to look the way the font designer
+intended it to.

-This will typically look better since we can now render a single
-glyph, the way the font designer intended it to be rendered. When
-pre-composing fails, foot will fallback to storing the combining
-character(s) separate from the base character, and will render the
-final grapheme by rendering the base and combining glyphs separately.
-
-You can disable pre-composing at **compile time** with
-`-Dunicode-precompose=false`.
+Still, if you do not want this, you can disable pre-composing at
+**compile time** with `-Dunicode-precompose=false`.


 ## Requirements
--- a/grid.c
+++ b/grid.c
@ -34,17 +34,10 @@ grid_row_alloc(int cols, bool initialize)

    if (initialize) {
        row->cells = calloc(cols, sizeof(row->cells[0]));
-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-        row->comb_chars = calloc(cols, sizeof(row->comb_chars[0]));
-#endif
        for (size_t c = 0; c < cols; c++)
            row->cells[c].attrs.clean = 1;
-    } else {
+    } else
        row->cells = malloc(cols * sizeof(row->cells[0]));
-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-        row->comb_chars = malloc(cols * sizeof(row->comb_chars[0]));
-#endif
-    }

    return row;
 }
@ -55,9 +48,6 @@ grid_row_free(struct row *row)
    if (row == NULL)
        return;

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-    free(row->comb_chars);
-#endif
    free(row->cells);
    free(row);
 }
@ -214,17 +204,6 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols,
                new_row->cells[new_col_idx] = *old_cell;
                new_row->cells[new_col_idx].attrs.clean = 1;

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-                struct combining_chars *old_comb_chars
-                    = &old_row->comb_chars[c - empty_count + i];
-                struct combining_chars *new_comb_chars
-                    = &new_row->comb_chars[new_col_idx];
-
-                new_comb_chars->count = old_comb_chars->count;
-                for (size_t j = 0; j < ALEN(new_comb_chars->chars); j++)
-                    new_comb_chars->chars[j] = old_comb_chars->chars[j];
-#endif
-
                /* Translate tracking point(s) */
                if (is_tracking_point && i >= empty_count) {
                    tll_foreach(tracking_points, it) {
--- a/meson.build
+++ b/meson.build
@ -57,11 +57,8 @@ wayland_client = dependency('wayland-client')
 wayland_cursor = dependency('wayland-cursor')
 xkb = dependency('xkbcommon')

-add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format(
-  get_option('unicode-max-combining-chars')), language: 'c')
 add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
-  get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')),
-  language: 'c')
+  get_option('unicode-precompose')), language: 'c')

 tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
 fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
@ -95,7 +92,7 @@ foreach prot : [
    command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
 endforeach

-if get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')
+if get_option('unicode-precompose')
  generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
  unicode_data = custom_target(
    'unicode-data',
@ -167,8 +164,7 @@ subdir('doc')

 summary(
  {
-    'Unicode max combining chars': get_option('unicode-max-combining-chars'),
-    'Unicode precompose': get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose'),
+    'Unicode precompose': get_option('unicode-precompose'),
  },
  bool_yn: true
 )
--- a/meson_options.txt
+++ b/meson_options.txt
@ -1,5 +1,2 @@
-option('unicode-max-combining-chars', type: 'integer', value: 2,
-       description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)')
-
 option('unicode-precompose', type: 'boolean', value: true,
       description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')
--- a/render.c
+++ b/render.c
@ -402,9 +402,20 @@ render_cell(struct terminal *term, pixman_image_t *pix,

    struct fcft_font *font = attrs_to_font(term, &cell->attrs);
    const struct fcft_glyph *glyph = NULL;
+    const struct composed *composed = NULL;

-    if (cell->wc != 0)
-        glyph = fcft_glyph_rasterize(font, cell->wc, term->font_subpixel);
+    if (cell->wc != 0) {
+        wchar_t base = cell->wc;
+
+        if (base >= COMB_CHARS_LO &&
+            base < (COMB_CHARS_LO + term->composed_count))
+        {
+            composed = &term->composed[base - COMB_CHARS_LO];
+            base = composed->base;
+        }
+
+        glyph = fcft_glyph_rasterize(font, base, term->font_subpixel);
+    }

    int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1;

@ -442,25 +453,25 @@ render_cell(struct terminal *term, pixman_image_t *pix,
        }
    }

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
    /* Combining characters */
-    const struct combining_chars *comb_chars = &row->comb_chars[col];
-    for (size_t i = 0; i < comb_chars->count; i++) {
-        const struct fcft_glyph *g = fcft_glyph_rasterize(
-            font, comb_chars->chars[i], term->font_subpixel);
+    if (composed != NULL) {
+        for (size_t i = 0; i < composed->count; i++) {
+            const struct fcft_glyph *g = fcft_glyph_rasterize(
+                font, composed->combining[i], term->font_subpixel);

-        if (g == NULL)
-            continue;
+            if (g == NULL)
+                continue;

-        pixman_image_composite32(
-            PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
-            /* Some fonts use a negative offset, while others use a
-             * "normal" offset */
-            x + (g->x < 0 ? term->cell_width : 0) + g->x,
-            y + font_baseline(term) - g->y,
-            g->width, g->height);
+            pixman_image_composite32(
+                PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
+                /* Some fonts use a negative offset, while others use a
+                 * "normal" offset */
+                x + (g->x < 0 ? term->cell_width : 0) + g->x,
+                y + font_baseline(term) - g->y,
+                g->width, g->height);
+        }
    }
-#endif
+
    pixman_image_unref(clr_pix);

    /* Underline */
--- a/selection.c
+++ b/selection.c
@ -142,12 +142,7 @@ min_bufsize_for_extraction(const struct terminal *term)
 {
    const struct coord *start = &term->selection.start;
    const struct coord *end = &term->selection.end;
-    const size_t chars_per_cell =
-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-        1 + ALEN(term->grid->cur_row->comb_chars[0].chars);
-#else
-        1;
-#endif
+    const size_t chars_per_cell = 1 + ALEN(term->composed[0].combining);

    switch (term->selection.kind) {
    case SELECTION_NONE:
@ -239,16 +234,17 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell,
    ctx->empty_count = 0;

    assert(ctx->idx + 1 <= ctx->size);
-    ctx->buf[ctx->idx++] = cell->wc;

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-    const struct combining_chars *comb_chars = &row->comb_chars[col];
+    if (cell->wc >= COMB_CHARS_LO && cell->wc < (COMB_CHARS_LO + term->composed_count)) {
+        const struct composed *composed = &term->composed[cell->wc - COMB_CHARS_LO];

-    assert(cell->wc != 0);
-    assert(ctx->idx + comb_chars->count <= ctx->size);
-    for (size_t i = 0; i < comb_chars->count; i++)
-        ctx->buf[ctx->idx++] = comb_chars->chars[i];
-#endif
+        ctx->buf[ctx->idx++] = composed->base;
+
+        assert(ctx->idx + composed->count <= ctx->size);
+        for (size_t i = 0; i < composed->count; i++)
+            ctx->buf[ctx->idx++] = composed->combining[i];
+    } else
+        ctx->buf[ctx->idx++] = cell->wc;

    ctx->last_row = row;
    ctx->last_cell = cell;
--- a/terminal.c
+++ b/terminal.c
@ -822,6 +822,8 @@ term_init(const struct config *conf, struct fdm *fdm, struct wayland *wayl,
        .normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
        .alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
        .grid = &term->normal,
+        .composed_count = 0,
+        .composed = NULL,
        .meta = {
            .esc_prefix = true,
            .eight_bit = true,
@ -1086,6 +1088,8 @@ term_destroy(struct terminal *term)
    tll_free(term->normal.scroll_damage);
    tll_free(term->alt.scroll_damage);

+    free(term->composed);
+
    free(term->window_title);
    tll_free_and_free(term->window_title_stack, free);

@ -2295,10 +2299,6 @@ term_print(struct terminal *term, wchar_t wc, int width)
    cell->wc = term->vt.last_printed = wc;
    cell->attrs = term->vt.attrs;

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-    row->comb_chars[term->grid->cursor.point.col].count = 0;
-#endif
-
    row->dirty = true;
    cell->attrs.clean = 0;

--- a/terminal.h
+++ b/terminal.h
@ -77,21 +77,16 @@ struct damage {
    int lines;
 };

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-struct combining_chars {
+struct composed {
+    wchar_t base;
+    wchar_t combining[5];
    uint8_t count;
-    wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS];
-} __attribute__((packed));
-#endif
+};

 struct row {
    struct cell *cells;
    bool dirty;
    bool linebreak;
-
-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-    struct combining_chars *comb_chars;
-#endif
 };

 struct sixel {
@ -221,6 +216,11 @@ struct terminal {
    struct grid alt;
    struct grid *grid;

+    #define COMB_CHARS_LO 0x40000000ul
+    #define COMB_CHARS_HI 0x400ffffful
+    size_t composed_count;
+    struct composed *composed;
+
    struct fcft_font *fonts[4];
    int font_dpi;
    int font_adjustments;
--- a/vt.c
+++ b/vt.c
@ -571,8 +571,6 @@ action_utf8_print(struct terminal *term, uint8_t c)

    int width = wcwidth(wc);

-#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
-
    /*
     * Is this is combining character? The basic assumption is that if
     * wcwdith() returns 0, then it *is* a combining character.
@ -606,6 +604,15 @@ action_utf8_print(struct terminal *term, uint8_t c)
            base = row->cells[base_col].wc;
        }

+        const struct composed *composed =
+            (base >= COMB_CHARS_LO &&
+             base < (COMB_CHARS_LO + term->composed_count))
+            ? &term->composed[base - COMB_CHARS_LO]
+            : NULL;
+
+        if (composed != NULL)
+            base = composed->base;
+
        int base_width = wcwidth(base);

        if (base != 0 && base_width > 0) {
@ -619,35 +626,76 @@ action_utf8_print(struct terminal *term, uint8_t c)
             * pre-composed character, as that is likely to produce a
             * better looking result.
             */
-
-            struct combining_chars *comb_chars = &row->comb_chars[base_col];
+            term->grid->cursor.point.col = base_col;
+            term->grid->cursor.lcf = false;

 #if FOOT_UNICODE_PRECOMPOSE
-            if (comb_chars->count == 0) {
+            if (composed == NULL) {
                wchar_t precomposed = precompose(base, wc);
                int precomposed_width = wcwidth(precomposed);
                if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
-                    term->grid->cursor.point.col = base_col;
-                    term->grid->cursor.lcf = false;
                    term_print(term, precomposed, precomposed_width);
                    return;
                }
            }
 #endif

-            if (comb_chars->count < ALEN(comb_chars->chars))
-                comb_chars->chars[comb_chars->count++] = wc;
-            else {
+            size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
+            if (wanted_count > ALEN(composed->combining)) {
+                assert(composed != NULL);
+
                LOG_WARN("combining character overflow:");
-                LOG_WARN("  0x%04x", base);
-                for (size_t i = 0; i < comb_chars->count; i++)
-                    LOG_WARN("  0x%04x", comb_chars->chars[i]);
-                LOG_ERR("  0x%04x", wc);
+                LOG_WARN("  base: 0x%04x", composed->base);
+                for (size_t i = 0; i < composed->count; i++)
+                    LOG_WARN("    cc: 0x%04x", composed->combining[i]);
+                LOG_ERR("   new: 0x%04x", wc);
+
+                /* This are going to break anyway... */
+                wanted_count--;
+            }
+
+            assert(wanted_count <= ALEN(composed->combining));
+
+            /* Look for existing combining chain */
+            for (size_t i = 0; i < term->composed_count; i++) {
+                const struct composed *cc = &term->composed[i];
+                if (cc->base != base)
+                    continue;
+
+                if (cc->count != wanted_count)
+                    continue;
+
+                if (cc->combining[wanted_count - 1] != wc)
+                    continue;
+
+                term_print(term, COMB_CHARS_LO + i, base_width);
+                return;
+            }
+
+            /* Allocate new chain */
+
+            struct composed new_cc;
+            new_cc.base = base;
+            new_cc.count = wanted_count;
+            for (size_t i = 0; i < wanted_count - 1; i++)
+                new_cc.combining[i] = composed->combining[i];
+            new_cc.combining[wanted_count - 1] = wc;
+
+            if (term->composed_count < COMB_CHARS_HI) {
+                term->composed_count++;
+                term->composed = realloc(term->composed, term->composed_count * sizeof(term->composed[0]));
+                term->composed[term->composed_count - 1] = new_cc;
+
+                term_print(term, COMB_CHARS_LO + term->composed_count - 1, base_width);
+                return;
+            } else {
+                /* We reached our maximum number of allowed composed
+                 * character chains. Fall through here and print the
+                 * current zero-width character to the current cell */
+                LOG_WARN("maximum number of composed characters reached");
            }
-            return;
        }
    }
-#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */

    term_print(term, wc, width);
 }