Merge branch 'space-optimize-combining-chars'

This commit is contained in:
Daniel Eklöf 2020-05-03 11:36:20 +02:00
commit 1d1eb89925
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
10 changed files with 137 additions and 115 deletions

View file

@ -22,8 +22,9 @@
* Right mouse button extends the current selection.
* `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
`11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
* Unicode combining characters. This feature is compile time
optional. See [README.md](README.md#user-content-unicode-combining) for details.
* Unicode combining characters. Parts of this feature are compile time
optional. See [README.md](README.md#user-content-unicode-combining)
for details.
### Changed

View file

@ -271,30 +271,24 @@ with the terminal emulator itself. Foot implements the following OSCs:
## Unicode combining
In order to handle combining characters (typically diacritics), foot
must store additional data for each cell. By default, foot stores at
most 2 combining characters per cell. This adds 9 bytes of additional
space to each cell, or 75% more space than without combining
characters).
When the client prints Unicode combining characters, e.g `a\\u0308`
('a' + `COMBINING DIAERESIS`), foot will be default try to create a
pre-composed character. For example, `\\u0061\\u0308` (`a\\u0308`)
will be transformed into `\\u00e5` (`å`).
You can configure the maximum number of characters to store for each
cell at **compile time** with
`-Dunicode-max-combining-chars=<int>`. Setting this to `0`
**disables** unicode combining completely - **no** additional data is
stored.
This is to improve the looks of the rendered grapheme. When rendering
a decomposed string, `a\\u0308`, the glyphs for `a` and `\\u0308` are
rendered independently, on top off each other. The result if often not
optimal, with e.g. diacritics looking a bit out of place. If we are
really unlucky, the base character and the combining characters may be
picked from different fonts, making the result look even more awkward.
Furthermore, in order to improve the looks of the rendered combined
character,, foot will by default try to convert the base and combining
characters to a pre-composed character.
When rendering a pre-composed character, we are rendering a single
glyph only and thus it is guaranteed to look the way the font designer
intended it to.
This will typically look better since we can now render a single
glyph, the way the font designer intended it to be rendered. When
pre-composing fails, foot will fallback to storing the combining
character(s) separate from the base character, and will render the
final grapheme by rendering the base and combining glyphs separately.
You can disable pre-composing at **compile time** with
`-Dunicode-precompose=false`.
Still, if you do not want this, you can disable pre-composing at
**compile time** with `-Dunicode-precompose=false`.
## Requirements

23
grid.c
View file

@ -34,17 +34,10 @@ grid_row_alloc(int cols, bool initialize)
if (initialize) {
row->cells = calloc(cols, sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = calloc(cols, sizeof(row->comb_chars[0]));
#endif
for (size_t c = 0; c < cols; c++)
row->cells[c].attrs.clean = 1;
} else {
} else
row->cells = malloc(cols * sizeof(row->cells[0]));
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars = malloc(cols * sizeof(row->comb_chars[0]));
#endif
}
return row;
}
@ -55,9 +48,6 @@ grid_row_free(struct row *row)
if (row == NULL)
return;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
free(row->comb_chars);
#endif
free(row->cells);
free(row);
}
@ -214,17 +204,6 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols,
new_row->cells[new_col_idx] = *old_cell;
new_row->cells[new_col_idx].attrs.clean = 1;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *old_comb_chars
= &old_row->comb_chars[c - empty_count + i];
struct combining_chars *new_comb_chars
= &new_row->comb_chars[new_col_idx];
new_comb_chars->count = old_comb_chars->count;
for (size_t j = 0; j < ALEN(new_comb_chars->chars); j++)
new_comb_chars->chars[j] = old_comb_chars->chars[j];
#endif
/* Translate tracking point(s) */
if (is_tracking_point && i >= empty_count) {
tll_foreach(tracking_points, it) {

View file

@ -57,11 +57,8 @@ wayland_client = dependency('wayland-client')
wayland_cursor = dependency('wayland-cursor')
xkb = dependency('xkbcommon')
add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format(
get_option('unicode-max-combining-chars')), language: 'c')
add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')),
language: 'c')
get_option('unicode-precompose')), language: 'c')
tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
@ -95,7 +92,7 @@ foreach prot : [
command: [wscanner_prog, 'private-code', '@INPUT@', '@OUTPUT@'])
endforeach
if get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose')
if get_option('unicode-precompose')
generate_unicode_precompose_sh = files('scripts/generate-unicode-precompose.sh')
unicode_data = custom_target(
'unicode-data',
@ -167,8 +164,7 @@ subdir('doc')
summary(
{
'Unicode max combining chars': get_option('unicode-max-combining-chars'),
'Unicode precompose': get_option('unicode-max-combining-chars') > 0 and get_option('unicode-precompose'),
'Unicode precompose': get_option('unicode-precompose'),
},
bool_yn: true
)

View file

@ -1,5 +1,2 @@
option('unicode-max-combining-chars', type: 'integer', value: 2,
description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)')
option('unicode-precompose', type: 'boolean', value: true,
description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')

View file

@ -402,9 +402,20 @@ render_cell(struct terminal *term, pixman_image_t *pix,
struct fcft_font *font = attrs_to_font(term, &cell->attrs);
const struct fcft_glyph *glyph = NULL;
const struct composed *composed = NULL;
if (cell->wc != 0)
glyph = fcft_glyph_rasterize(font, cell->wc, term->font_subpixel);
if (cell->wc != 0) {
wchar_t base = cell->wc;
if (base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - COMB_CHARS_LO];
base = composed->base;
}
glyph = fcft_glyph_rasterize(font, base, term->font_subpixel);
}
int cell_cols = glyph != NULL ? max(1, glyph->cols) : 1;
@ -442,25 +453,25 @@ render_cell(struct terminal *term, pixman_image_t *pix,
}
}
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/* Combining characters */
const struct combining_chars *comb_chars = &row->comb_chars[col];
for (size_t i = 0; i < comb_chars->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize(
font, comb_chars->chars[i], term->font_subpixel);
if (composed != NULL) {
for (size_t i = 0; i < composed->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize(
font, composed->combining[i], term->font_subpixel);
if (g == NULL)
continue;
if (g == NULL)
continue;
pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
/* Some fonts use a negative offset, while others use a
* "normal" offset */
x + (g->x < 0 ? term->cell_width : 0) + g->x,
y + font_baseline(term) - g->y,
g->width, g->height);
pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
/* Some fonts use a negative offset, while others use a
* "normal" offset */
x + (g->x < 0 ? term->cell_width : 0) + g->x,
y + font_baseline(term) - g->y,
g->width, g->height);
}
}
#endif
pixman_image_unref(clr_pix);
/* Underline */

View file

@ -142,12 +142,7 @@ min_bufsize_for_extraction(const struct terminal *term)
{
const struct coord *start = &term->selection.start;
const struct coord *end = &term->selection.end;
const size_t chars_per_cell =
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
1 + ALEN(term->grid->cur_row->comb_chars[0].chars);
#else
1;
#endif
const size_t chars_per_cell = 1 + ALEN(term->composed[0].combining);
switch (term->selection.kind) {
case SELECTION_NONE:
@ -239,16 +234,17 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell,
ctx->empty_count = 0;
assert(ctx->idx + 1 <= ctx->size);
ctx->buf[ctx->idx++] = cell->wc;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
const struct combining_chars *comb_chars = &row->comb_chars[col];
if (cell->wc >= COMB_CHARS_LO && cell->wc < (COMB_CHARS_LO + term->composed_count)) {
const struct composed *composed = &term->composed[cell->wc - COMB_CHARS_LO];
assert(cell->wc != 0);
assert(ctx->idx + comb_chars->count <= ctx->size);
for (size_t i = 0; i < comb_chars->count; i++)
ctx->buf[ctx->idx++] = comb_chars->chars[i];
#endif
ctx->buf[ctx->idx++] = composed->base;
assert(ctx->idx + composed->count <= ctx->size);
for (size_t i = 0; i < composed->count; i++)
ctx->buf[ctx->idx++] = composed->combining[i];
} else
ctx->buf[ctx->idx++] = cell->wc;
ctx->last_row = row;
ctx->last_cell = cell;

View file

@ -822,6 +822,8 @@ term_init(const struct config *conf, struct fdm *fdm, struct wayland *wayl,
.normal = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.alt = {.damage = tll_init(), .scroll_damage = tll_init(), .sixel_images = tll_init()},
.grid = &term->normal,
.composed_count = 0,
.composed = NULL,
.meta = {
.esc_prefix = true,
.eight_bit = true,
@ -1086,6 +1088,8 @@ term_destroy(struct terminal *term)
tll_free(term->normal.scroll_damage);
tll_free(term->alt.scroll_damage);
free(term->composed);
free(term->window_title);
tll_free_and_free(term->window_title_stack, free);
@ -2295,10 +2299,6 @@ term_print(struct terminal *term, wchar_t wc, int width)
cell->wc = term->vt.last_printed = wc;
cell->attrs = term->vt.attrs;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
row->comb_chars[term->grid->cursor.point.col].count = 0;
#endif
row->dirty = true;
cell->attrs.clean = 0;

View file

@ -77,21 +77,16 @@ struct damage {
int lines;
};
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars {
struct composed {
wchar_t base;
wchar_t combining[5];
uint8_t count;
wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS];
} __attribute__((packed));
#endif
};
struct row {
struct cell *cells;
bool dirty;
bool linebreak;
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
struct combining_chars *comb_chars;
#endif
};
struct sixel {
@ -221,6 +216,11 @@ struct terminal {
struct grid alt;
struct grid *grid;
#define COMB_CHARS_LO 0x40000000ul
#define COMB_CHARS_HI 0x400ffffful
size_t composed_count;
struct composed *composed;
struct fcft_font *fonts[4];
int font_dpi;
int font_adjustments;

80
vt.c
View file

@ -571,8 +571,6 @@ action_utf8_print(struct terminal *term, uint8_t c)
int width = wcwidth(wc);
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
/*
* Is this is combining character? The basic assumption is that if
* wcwdith() returns 0, then it *is* a combining character.
@ -606,6 +604,15 @@ action_utf8_print(struct terminal *term, uint8_t c)
base = row->cells[base_col].wc;
}
const struct composed *composed =
(base >= COMB_CHARS_LO &&
base < (COMB_CHARS_LO + term->composed_count))
? &term->composed[base - COMB_CHARS_LO]
: NULL;
if (composed != NULL)
base = composed->base;
int base_width = wcwidth(base);
if (base != 0 && base_width > 0) {
@ -619,35 +626,76 @@ action_utf8_print(struct terminal *term, uint8_t c)
* pre-composed character, as that is likely to produce a
* better looking result.
*/
struct combining_chars *comb_chars = &row->comb_chars[base_col];
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
#if FOOT_UNICODE_PRECOMPOSE
if (comb_chars->count == 0) {
if (composed == NULL) {
wchar_t precomposed = precompose(base, wc);
int precomposed_width = wcwidth(precomposed);
if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
term_print(term, precomposed, precomposed_width);
return;
}
}
#endif
if (comb_chars->count < ALEN(comb_chars->chars))
comb_chars->chars[comb_chars->count++] = wc;
else {
size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
if (wanted_count > ALEN(composed->combining)) {
assert(composed != NULL);
LOG_WARN("combining character overflow:");
LOG_WARN(" 0x%04x", base);
for (size_t i = 0; i < comb_chars->count; i++)
LOG_WARN(" 0x%04x", comb_chars->chars[i]);
LOG_ERR(" 0x%04x", wc);
LOG_WARN(" base: 0x%04x", composed->base);
for (size_t i = 0; i < composed->count; i++)
LOG_WARN(" cc: 0x%04x", composed->combining[i]);
LOG_ERR(" new: 0x%04x", wc);
/* This are going to break anyway... */
wanted_count--;
}
assert(wanted_count <= ALEN(composed->combining));
/* Look for existing combining chain */
for (size_t i = 0; i < term->composed_count; i++) {
const struct composed *cc = &term->composed[i];
if (cc->base != base)
continue;
if (cc->count != wanted_count)
continue;
if (cc->combining[wanted_count - 1] != wc)
continue;
term_print(term, COMB_CHARS_LO + i, base_width);
return;
}
/* Allocate new chain */
struct composed new_cc;
new_cc.base = base;
new_cc.count = wanted_count;
for (size_t i = 0; i < wanted_count - 1; i++)
new_cc.combining[i] = composed->combining[i];
new_cc.combining[wanted_count - 1] = wc;
if (term->composed_count < COMB_CHARS_HI) {
term->composed_count++;
term->composed = realloc(term->composed, term->composed_count * sizeof(term->composed[0]));
term->composed[term->composed_count - 1] = new_cc;
term_print(term, COMB_CHARS_LO + term->composed_count - 1, base_width);
return;
} else {
/* We reached our maximum number of allowed composed
* character chains. Fall through here and print the
* current zero-width character to the current cell */
LOG_WARN("maximum number of composed characters reached");
}
return;
}
}
#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */
term_print(term, wc, width);
}