Merge branch 'space-optimize-combining-chars'

This commit is contained in:
Daniel Eklöf 2020-05-03 11:36:20 +02:00
commit 1d1eb89925
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
10 changed files with 137 additions and 115 deletions

View file

@ -22,8 +22,9 @@
* Right mouse button extends the current selection. * Right mouse button extends the current selection.
* `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters: * `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
`11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`. `11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
* Unicode combining characters. This feature is compile time * Unicode combining characters. Parts of this feature are compile time
optional. See [README.md](README.md#user-content-unicode-combining) for details. optional. See [README.md](README.md#user-content-unicode-combining)
for details.
### Changed ### Changed

View file

@ -271,30 +271,24 @@ with the terminal emulator itself. Foot implements the following OSCs:
## Unicode combining ## Unicode combining
In order to handle combining characters (typically diacritics), foot When the client prints Unicode combining characters, e.g `a\\u0308`
must store additional data for each cell. By default, foot stores at ('a' + `COMBINING DIAERESIS`), foot will be default try to create a
most 2 combining characters per cell. This adds 9 bytes of additional pre-composed character. For example, `\\u0061\\u0308` (`a\\u0308`)
space to each cell, or 75% more space than without combining will be transformed into `\\u00e5` (`å`).
characters).
You can configure the maximum number of characters to store for each This is to improve the looks of the rendered grapheme. When rendering
cell at **compile time** with a decomposed string, `a\\u0308`, the glyphs for `a` and `\\u0308` are
`-Dunicode-max-combining-chars=<int>`. Setting this to `0` rendered independently, on top off each other. The result if often not
**disables** unicode combining completely - **no** additional data is optimal, with e.g. diacritics looking a bit out of place. If we are
stored. really unlucky, the base character and the combining characters may be
picked from different fonts, making the result look even more awkward.
Furthermore, in order to improve the looks of the rendered combined When rendering a pre-composed character, we are rendering a single
character,, foot will by default try to convert the base and combining glyph only and thus it is guaranteed to look the way the font designer
characters to a pre-composed character. intended it to.
This will typically look better since we can now render a single Still, if you do not want this, you can disable pre-composing at
glyph, the way the font designer intended it to be rendered. When **compile time** with `-Dunicode-precompose=false`.
pre-composing fails, foot will fallback to storing the combining
character(s) separate from the base character, and will render the
final grapheme by rendering the base and combining glyphs separately.
You can disable pre-composing at **compile time** with
`-Dunicode-precompose=false`.
## Requirements ## Requirements

23
grid.c
View file

@ -34,17 +34,10 @@ grid_row_alloc(int cols, bool initialize)
if (initialize) { if (initialize) {
row->cells = calloc(cols, sizeof(row->cells[0])); row->cells = calloc(cols, sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = calloc(cols, sizeof(row->comb_chars[0]));
#endif
for (size_t c = 0; c < cols; c++) for (size_t c = 0; c < cols; c++)
row->cells[c].attrs.clean = 1; row->cells[c].attrs.clean = 1;
} else { } else
row->cells = malloc(cols * sizeof(row->cells[0])); row->cells = malloc(cols * sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = malloc(cols * sizeof(row->comb_chars[0]));
#endif
}
return row; return row;
} }
@ -55,9 +48,6 @@ grid_row_free(struct row *row)
if (row == NULL) if (row == NULL)
return; return;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
free(row->comb_chars);
#endif
free(row->cells); free(row->cells);
free(row); free(row);
} }
@ -214,17 +204,6 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols,
new_row->cells[new_col_idx] = *old_cell; new_row->cells[new_col_idx] = *old_cell;
new_row->cells[new_col_idx].attrs.clean = 1; new_row->cells[new_col_idx].attrs.clean = 1;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *old_comb_chars
= &old_row->comb_chars[c - empty_count + i];
struct combining_chars *new_comb_chars
= &new_row->comb_chars[new_col_idx];
new_comb_chars->count = old_comb_chars->count;
for (size_t j = 0; j < ALEN(new_comb_chars->chars); j++)
new_comb_chars->chars[j] = old_comb_chars->chars[j];
#endif
/* Translate tracking point(s) */ /* Translate tracking point(s) */
if (is_tracking_point && i >= empty_count) { if (is_tracking_point && i >= empty_count) {
tll_foreach(tracking_points, it) { tll_foreach(tracking_points, it) {

View file

@ -57,11 +57,8 @@ wayland_client = dependency('wayland-client')
wayland_cursor = dependency('wayland-cursor') wayland_cursor = dependency('wayland-cursor')
xkb = dependency('xkbcommon') xkb = dependency('xkbcommon')
add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format(
get_option('unicode-max-combining-chars')), language: 'c')
add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format( add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')), get_option('unicode-precompose')), language: 'c')
language: 'c')
tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist') tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft') fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
@ -95,7 +92,7 @@ foreach prot : [
command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@']) command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
endforeach endforeach
if get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose') if get_option('unicode-precompose')
generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh') generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
unicode_data = custom_target( unicode_data = custom_target(
'unicode-data', 'unicode-data',
@ -167,8 +164,7 @@ subdir('doc')
summary( summary(
{ {
'Unicode max combining chars': get_option('unicode-max-combining-chars'), 'Unicode precompose': get_option('unicode-precompose'),
'Unicode precompose': get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose'),
}, },
bool_yn: true bool_yn: true
) )

View file

@ -1,5 +1,2 @@
option('unicode-max-combining-chars', type: 'integer', value: 2,
description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)')
option('unicode-precompose', type: 'boolean', value: true, option('unicode-precompose', type: 'boolean', value: true,
description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled') description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')

View file

@ -402,9 +402,20 @@ render_cell(struct terminal *term, pixman_image_t *pix,
struct fcft_font *font = attrs_to_font(term, &cell->attrs); struct fcft_font *font = attrs_to_font(term, &cell->attrs);
const struct fcft_glyph *glyph = NULL; const struct fcft_glyph *glyph = NULL;
const struct composed *composed = NULL;
if (cell->wc != 0) if (cell->wc != 0) {
glyph = fcft_glyph_rasterize(font, cell->wc, term->font_subpixel); wchar_t base = cell->wc;
if (base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - COMB_CHARS_LO];
base = composed->base;
}
glyph = fcft_glyph_rasterize(font, base, term->font_subpixel);
}
int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1; int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1;
@ -442,25 +453,25 @@ render_cell(struct terminal *term, pixman_image_t *pix,
} }
} }
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/* Combining characters */ /* Combining characters */
const struct combining_chars *comb_chars = &row->comb_chars[col]; if (composed != NULL) {
for (size_t i = 0; i < comb_chars->count; i++) { for (size_t i = 0; i < composed->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize( const struct fcft_glyph *g = fcft_glyph_rasterize(
font, comb_chars->chars[i], term->font_subpixel); font, composed->combining[i], term->font_subpixel);
if (g == NULL) if (g == NULL)
continue; continue;
pixman_image_composite32( pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
/* Some fonts use a negative offset, while others use a /* Some fonts use a negative offset, while others use a
* "normal" offset */ * "normal" offset */
x + (g->x < 0 ? term->cell_width : 0) + g->x, x + (g->x < 0 ? term->cell_width : 0) + g->x,
y + font_baseline(term) - g->y, y + font_baseline(term) - g->y,
g->width, g->height); g->width, g->height);
}
} }
#endif
pixman_image_unref(clr_pix); pixman_image_unref(clr_pix);
/* Underline */ /* Underline */

View file

@ -142,12 +142,7 @@ min_bufsize_for_extraction(const struct terminal *term)
{ {
const struct coord *start = &term->selection.start; const struct coord *start = &term->selection.start;
const struct coord *end = &term->selection.end; const struct coord *end = &term->selection.end;
const size_t chars_per_cell = const size_t chars_per_cell = 1 + ALEN(term->composed[0].combining);
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
1 + ALEN(term->grid->cur_row->comb_chars[0].chars);
#else
1;
#endif
switch (term->selection.kind) { switch (term->selection.kind) {
case SELECTION_NONE: case SELECTION_NONE:
@ -239,16 +234,17 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell,
ctx->empty_count = 0; ctx->empty_count = 0;
assert(ctx->idx + 1 <= ctx->size); assert(ctx->idx + 1 <= ctx->size);
ctx->buf[ctx->idx++] = cell->wc;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 if (cell->wc >= COMB_CHARS_LO && cell->wc < (COMB_CHARS_LO + term->composed_count)) {
const struct combining_chars *comb_chars = &row->comb_chars[col]; const struct composed *composed = &term->composed[cell->wc - COMB_CHARS_LO];
assert(cell->wc != 0); ctx->buf[ctx->idx++] = composed->base;
assert(ctx->idx + comb_chars->count <= ctx->size);
for (size_t i = 0; i < comb_chars->count; i++) assert(ctx->idx + composed->count <= ctx->size);
ctx->buf[ctx->idx++] = comb_chars->chars[i]; for (size_t i = 0; i < composed->count; i++)
#endif ctx->buf[ctx->idx++] = composed->combining[i];
} else
ctx->buf[ctx->idx++] = cell->wc;
ctx->last_row = row; ctx->last_row = row;
ctx->last_cell = cell; ctx->last_cell = cell;

View file

@ -822,6 +822,8 @@ term_init(const struct config *conf, struct fdm *fdm, struct wayland *wayl,
.normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()}, .alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.grid = &term->normal, .grid = &term->normal,
.composed_count = 0,
.composed = NULL,
.meta = { .meta = {
.esc_prefix = true, .esc_prefix = true,
.eight_bit = true, .eight_bit = true,
@ -1086,6 +1088,8 @@ term_destroy(struct terminal *term)
tll_free(term->normal.scroll_damage); tll_free(term->normal.scroll_damage);
tll_free(term->alt.scroll_damage); tll_free(term->alt.scroll_damage);
free(term->composed);
free(term->window_title); free(term->window_title);
tll_free_and_free(term->window_title_stack, free); tll_free_and_free(term->window_title_stack, free);
@ -2295,10 +2299,6 @@ term_print(struct terminal *term, wchar_t wc, int width)
cell->wc = term->vt.last_printed = wc; cell->wc = term->vt.last_printed = wc;
cell->attrs = term->vt.attrs; cell->attrs = term->vt.attrs;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars[term->grid->cursor.point.col].count = 0;
#endif
row->dirty = true; row->dirty = true;
cell->attrs.clean = 0; cell->attrs.clean = 0;

View file

@ -77,21 +77,16 @@ struct damage {
int lines; int lines;
}; };
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0 struct composed {
struct combining_chars { wchar_t base;
wchar_t combining[5];
uint8_t count; uint8_t count;
wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS]; };
} __attribute__((packed));
#endif
struct row { struct row {
struct cell *cells; struct cell *cells;
bool dirty; bool dirty;
bool linebreak; bool linebreak;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *comb_chars;
#endif
}; };
struct sixel { struct sixel {
@ -221,6 +216,11 @@ struct terminal {
struct grid alt; struct grid alt;
struct grid *grid; struct grid *grid;
#define COMB_CHARS_LO 0x40000000ul
#define COMB_CHARS_HI 0x400ffffful
size_t composed_count;
struct composed *composed;
struct fcft_font *fonts[4]; struct fcft_font *fonts[4];
int font_dpi; int font_dpi;
int font_adjustments; int font_adjustments;

80
vt.c
View file

@ -571,8 +571,6 @@ action_utf8_print(struct terminal *term, uint8_t c)
int width = wcwidth(wc); int width = wcwidth(wc);
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/* /*
* Is this is combining character? The basic assumption is that if * Is this is combining character? The basic assumption is that if
* wcwdith() returns 0, then it *is* a combining character. * wcwdith() returns 0, then it *is* a combining character.
@ -606,6 +604,15 @@ action_utf8_print(struct terminal *term, uint8_t c)
base = row->cells[base_col].wc; base = row->cells[base_col].wc;
} }
const struct composed *composed =
(base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
? &term->composed[base - COMB_CHARS_LO]
: NULL;
if (composed != NULL)
base = composed->base;
int base_width = wcwidth(base); int base_width = wcwidth(base);
if (base != 0 && base_width > 0) { if (base != 0 && base_width > 0) {
@ -619,35 +626,76 @@ action_utf8_print(struct terminal *term, uint8_t c)
* pre-composed character, as that is likely to produce a * pre-composed character, as that is likely to produce a
* better looking result. * better looking result.
*/ */
term->grid->cursor.point.col = base_col;
struct combining_chars *comb_chars = &row->comb_chars[base_col]; term->grid->cursor.lcf = false;
#if FOOT_UNICODE_PRECOMPOSE #if FOOT_UNICODE_PRECOMPOSE
if (comb_chars->count == 0) { if (composed == NULL) {
wchar_t precomposed = precompose(base, wc); wchar_t precomposed = precompose(base, wc);
int precomposed_width = wcwidth(precomposed); int precomposed_width = wcwidth(precomposed);
if (precomposed != (wchar_t)-1 && precomposed_width == base_width) { if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
term_print(term, precomposed, precomposed_width); term_print(term, precomposed, precomposed_width);
return; return;
} }
} }
#endif #endif
if (comb_chars->count < ALEN(comb_chars->chars)) size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
comb_chars->chars[comb_chars->count++] = wc; if (wanted_count > ALEN(composed->combining)) {
else { assert(composed != NULL);
LOG_WARN("combining character overflow:"); LOG_WARN("combining character overflow:");
LOG_WARN(" 0x%04x", base); LOG_WARN(" base: 0x%04x", composed->base);
for (size_t i = 0; i < comb_chars->count; i++) for (size_t i = 0; i < composed->count; i++)
LOG_WARN(" 0x%04x", comb_chars->chars[i]); LOG_WARN(" cc: 0x%04x", composed->combining[i]);
LOG_ERR(" 0x%04x", wc); LOG_ERR(" new: 0x%04x", wc);
/* This are going to break anyway... */
wanted_count--;
}
assert(wanted_count <= ALEN(composed->combining));
/* Look for existing combining chain */
for (size_t i = 0; i < term->composed_count; i++) {
const struct composed *cc = &term->composed[i];
if (cc->base != base)
continue;
if (cc->count != wanted_count)
continue;
if (cc->combining[wanted_count - 1] != wc)
continue;
term_print(term, COMB_CHARS_LO + i, base_width);
return;
}
/* Allocate new chain */
struct composed new_cc;
new_cc.base = base;
new_cc.count = wanted_count;
for (size_t i = 0; i < wanted_count - 1; i++)
new_cc.combining[i] = composed->combining[i];
new_cc.combining[wanted_count - 1] = wc;
if (term->composed_count < COMB_CHARS_HI) {
term->composed_count++;
term->composed = realloc(term->composed, term->composed_count * sizeof(term->composed[0]));
term->composed[term->composed_count - 1] = new_cc;
term_print(term, COMB_CHARS_LO + term->composed_count - 1, base_width);
return;
} else {
/* We reached our maximum number of allowed composed
* character chains. Fall through here and print the
* current zero-width character to the current cell */
LOG_WARN("maximum number of composed characters reached");
} }
return;
} }
} }
#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */
term_print(term, wc, width); term_print(term, wc, width);
} }