mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-14 04:27:57 -05:00
unicode-combine: remove utf8proc dependency
We only used utf8proc to try to pre-compose a glyph from a base and combining character. We can do this ourselves by using a pre-compiled table of valid pre-compositions. This table isn't _that_ big, and binary searching it is fast. That is, for a very small amount of code, and not too much extra RO data, we can get rid of the utf8proc dependency.
This commit is contained in:
parent
8389c76549
commit
d945b68b73
13 changed files with 34934 additions and 40 deletions
|
|
@ -22,12 +22,8 @@
|
|||
* Right mouse button extends the current selection.
|
||||
* `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
|
||||
`11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
|
||||
* Unicode combining characters. This feature is optional. By default,
|
||||
it is enabled if
|
||||
[utf8proc](https://github.com/JuliaStrings/utf8proc) is available,
|
||||
but can be explicitly disabled or enabled at compile time with
|
||||
`meson -Dunicode-combining=disabled|enabled`.
|
||||
|
||||
* Unicode combining characters. This feature is compile time
|
||||
optional. See [README.md](README.md#unicode-combining]) for details.
|
||||
|
||||
### Changed
|
||||
|
||||
|
|
|
|||
28
README.md
28
README.md
|
|
@ -16,6 +16,7 @@ The fast, lightweight and minimalistic Wayland terminal emulator.
|
|||
1. [Backspace](#backspace)
|
||||
1. [DPI and font size](#dpi-and-font-size)
|
||||
1. [Supported OSCs](#supported-oscs)
|
||||
1. [Unicode combining](#unicode-combining)
|
||||
1. [Requirements](#requirements)
|
||||
1. [Running](#running)
|
||||
1. [Building](#building)
|
||||
|
|
@ -268,6 +269,33 @@ with the terminal emulator itself. Foot implements the following OSCs:
|
|||
* `OSC 555` - flash screen (**foot specific**)
|
||||
|
||||
|
||||
## Unicode combining
|
||||
|
||||
In order to handle combining characters, foot must store additional
|
||||
data for each cell. By default, foot stores at most 2 combining
|
||||
characters per cell. This adds 9 bytes of additional space to each
|
||||
cell (that's 75% more space than without combining characters).
|
||||
|
||||
You can configure the maximum number of characters to store for each
|
||||
cell at **compile time** with
|
||||
`-Dunicode-max-combining-chars=<int>`. Setting this to `0`
|
||||
**disables** unicode combining completely - **no** additional data is
|
||||
stored.
|
||||
|
||||
Furthermore, in order to improve rendering of combining characters,
|
||||
foot will by default try to convert base + combining characters to a
|
||||
pre-composed character.
|
||||
|
||||
This will typically look better, since we can now render a single
|
||||
glyph, the way the font designer intended it to be rendered. When
|
||||
pre-composing fails, foot will fallback to storing the combining
|
||||
character(s) separate from the base character, and will render the
|
||||
final grapheme by rendering the base and combining glyphs separately.
|
||||
|
||||
You can disable pre-composing at **compile time** with
|
||||
`-Dunicode-precompose=false`.
|
||||
|
||||
|
||||
## Requirements
|
||||
|
||||
### Running
|
||||
|
|
|
|||
33797
UnicodeData.txt
Normal file
33797
UnicodeData.txt
Normal file
File diff suppressed because it is too large
Load diff
8
grid.c
8
grid.c
|
|
@ -34,14 +34,14 @@ grid_row_alloc(int cols, bool initialize)
|
|||
|
||||
if (initialize) {
|
||||
row->cells = calloc(cols, sizeof(row->cells[0]));
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
row->comb_chars = calloc(cols, sizeof(row->comb_chars[0]));
|
||||
#endif
|
||||
for (size_t c = 0; c < cols; c++)
|
||||
row->cells[c].attrs.clean = 1;
|
||||
} else {
|
||||
row->cells = malloc(cols * sizeof(row->cells[0]));
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
row->comb_chars = malloc(cols * sizeof(row->comb_chars[0]));
|
||||
#endif
|
||||
}
|
||||
|
|
@ -55,7 +55,7 @@ grid_row_free(struct row *row)
|
|||
if (row == NULL)
|
||||
return;
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
free(row->comb_chars);
|
||||
#endif
|
||||
free(row->cells);
|
||||
|
|
@ -214,7 +214,7 @@ grid_reflow(struct grid *grid, int new_rows, int new_cols,
|
|||
new_row->cells[new_col_idx] = *old_cell;
|
||||
new_row->cells[new_col_idx].attrs.clean = 1;
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
struct combining_chars *old_comb_chars
|
||||
= &old_row->comb_chars[c - empty_count + i];
|
||||
struct combining_chars *new_comb_chars
|
||||
|
|
|
|||
12
meson.build
12
meson.build
|
|
@ -57,8 +57,10 @@ wayland_client = dependency('wayland-client')
|
|||
wayland_cursor = dependency('wayland-cursor')
|
||||
xkb = dependency('xkbcommon')
|
||||
|
||||
utf8proc = dependency('libutf8proc', required: get_option('unicode-combining'))
|
||||
add_project_arguments('-DFOOT_UNICODE_COMBINING=@0@'.format(utf8proc.found()), language: 'c')
|
||||
add_project_arguments('-DFOOT_UNICODE_MAX_COMBINING_CHARS=@0@'.format(
|
||||
get_option('unicode-max-combining-chars')), language: 'c')
|
||||
add_project_arguments('-DFOOT_UNICODE_PRECOMPOSE=@0@'.format(
|
||||
get_option('unicode-precompose')), language: 'c')
|
||||
|
||||
tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
|
||||
fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
|
||||
|
|
@ -128,8 +130,7 @@ executable(
|
|||
'vt.c', 'vt.h',
|
||||
'wayland.c', 'wayland.h',
|
||||
wl_proto_src + wl_proto_headers, version,
|
||||
dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb, utf8proc,
|
||||
tllist, fcft],
|
||||
dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb, tllist, fcft],
|
||||
install: true)
|
||||
|
||||
executable(
|
||||
|
|
@ -154,7 +155,8 @@ subdir('doc')
|
|||
|
||||
summary(
|
||||
{
|
||||
'Unicode combining': utf8proc.found(),
|
||||
'Unicode max combining chars': get_option('unicode-max-combining-chars'),
|
||||
'Unicode precompose': get_option('unicode-precompose'),
|
||||
},
|
||||
bool_yn: true
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1 +1,5 @@
|
|||
option('unicode-combining', type: 'feature', value: 'auto', description: 'Perform unicode combining')
|
||||
option('unicode-max-combining-chars', type: 'integer', value: 2,
|
||||
description: 'Maximum number of combining characters to track per cell. A value of 0 completely disables unicode combining (this reduces the runtime memory footprint)')
|
||||
|
||||
option('unicode-precompose', type: 'boolean', value: true,
|
||||
description: 'Convert decomposed characters to precomposed. Ignored if "unicode-combining" has been disabled')
|
||||
|
|
|
|||
2
render.c
2
render.c
|
|
@ -442,7 +442,7 @@ render_cell(struct terminal *term, pixman_image_t *pix,
|
|||
}
|
||||
}
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
/* Combining characters */
|
||||
const struct combining_chars *comb_chars = &row->comb_chars[col];
|
||||
for (size_t i = 0; i < comb_chars->count; i++) {
|
||||
|
|
|
|||
8
scripts/generate-unicode-precompose.sh
Executable file
8
scripts/generate-unicode-precompose.sh
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/usr/bin/sh
|
||||
|
||||
cut - -d ";" -f 1,6 |
|
||||
grep ";[0-9,A-F]" | grep " " |
|
||||
sed -e "s/ /, 0x/;s/^/{ 0x/;s/;/, 0x/;s/$/},/" |
|
||||
sed -e "s,0x\(....\)\([^0-9A-Fa-f]\),0x0\1\2,g" |
|
||||
(sort -k 3 || sort +2) |
|
||||
sed -e "s,0x0\(...[0-9A-Fa-f]\),0x\1,g"
|
||||
|
|
@ -143,7 +143,7 @@ min_bufsize_for_extraction(const struct terminal *term)
|
|||
const struct coord *start = &term->selection.start;
|
||||
const struct coord *end = &term->selection.end;
|
||||
const size_t chars_per_cell =
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
1 + ALEN(term->grid->cur_row->comb_chars[0].chars);
|
||||
#else
|
||||
1;
|
||||
|
|
@ -241,7 +241,7 @@ extract_one(struct terminal *term, struct row *row, struct cell *cell,
|
|||
assert(ctx->idx + 1 <= ctx->size);
|
||||
ctx->buf[ctx->idx++] = cell->wc;
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
const struct combining_chars *comb_chars = &row->comb_chars[col];
|
||||
|
||||
assert(cell->wc != 0);
|
||||
|
|
|
|||
|
|
@ -2295,7 +2295,7 @@ term_print(struct terminal *term, wchar_t wc, int width)
|
|||
cell->wc = term->vt.last_printed = wc;
|
||||
cell->attrs = term->vt.attrs;
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
row->comb_chars[term->grid->cursor.point.col].count = 0;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -77,10 +77,10 @@ struct damage {
|
|||
int lines;
|
||||
};
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
struct combining_chars {
|
||||
uint8_t count;
|
||||
wchar_t chars[2]; /* This is XTerms default, but there _are_ cases where more are needed */
|
||||
wchar_t chars[FOOT_UNICODE_MAX_COMBINING_CHARS];
|
||||
} __attribute__((packed));
|
||||
#endif
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ struct row {
|
|||
bool dirty;
|
||||
bool linebreak;
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
struct combining_chars *comb_chars;
|
||||
#endif
|
||||
};
|
||||
|
|
|
|||
1036
unicode-compose-table.h
Normal file
1036
unicode-compose-table.h
Normal file
File diff suppressed because it is too large
Load diff
57
vt.c
57
vt.c
|
|
@ -5,10 +5,6 @@
|
|||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#include <utf8proc.h>
|
||||
#endif
|
||||
|
||||
#define LOG_MODULE "vt"
|
||||
#define LOG_ENABLE_DBG 0
|
||||
#include "log.h"
|
||||
|
|
@ -18,6 +14,10 @@
|
|||
#include "osc.h"
|
||||
#include "util.h"
|
||||
|
||||
#if FOOT_UNICODE_PRECOMPOSE
|
||||
#include "unicode-compose-table.h"
|
||||
#endif
|
||||
|
||||
#define UNHANDLED() LOG_DBG("unhandled: %s", esc_as_string(term, final))
|
||||
|
||||
/* https://vt100.net/emu/dec_ansi_parser */
|
||||
|
|
@ -527,6 +527,34 @@ action_utf8_4_entry(struct terminal *term, uint8_t c)
|
|||
term->vt.utf8.data[term->vt.utf8.idx++] = c;
|
||||
}
|
||||
|
||||
static wchar_t
|
||||
precompose(wchar_t base, wchar_t comb)
|
||||
{
|
||||
static_assert(2 * sizeof(wchar_t) <= sizeof(uint64_t),
|
||||
"two wchars does not fit in an uint64_t");
|
||||
|
||||
const uint64_t match = (uint64_t)base << 32 | comb;
|
||||
|
||||
size_t start = 0;
|
||||
size_t end = ALEN(precompose_table) - 1;
|
||||
|
||||
while (start <= end) {
|
||||
size_t middle = (start + end) / 2;
|
||||
|
||||
const uint64_t maybe =
|
||||
(uint64_t)precompose_table[middle].base << 32 | precompose_table[middle].comb;
|
||||
|
||||
if (maybe < match)
|
||||
start = middle + 1;
|
||||
else if (maybe > match)
|
||||
end = middle - 1;
|
||||
else
|
||||
return precompose_table[middle].replacement;
|
||||
}
|
||||
|
||||
return (wchar_t)-1;
|
||||
}
|
||||
|
||||
static void
|
||||
action_utf8_print(struct terminal *term, uint8_t c)
|
||||
{
|
||||
|
|
@ -541,7 +569,7 @@ action_utf8_print(struct terminal *term, uint8_t c)
|
|||
|
||||
int width = wcwidth(wc);
|
||||
|
||||
#if FOOT_UNICODE_COMBINING
|
||||
#if FOOT_UNICODE_MAX_COMBINING_CHARS > 0
|
||||
|
||||
/*
|
||||
* Is this is combining character? The basic assumption is that if
|
||||
|
|
@ -588,27 +616,22 @@ action_utf8_print(struct terminal *term, uint8_t c)
|
|||
* If there is, replace the base character with the
|
||||
* pre-composed character, as that is likely to produce a
|
||||
* better looking result.
|
||||
*
|
||||
* TODO: we could perhaps remove this is we improve our
|
||||
* positioning of the combining characters when rendering
|
||||
* the glyph.
|
||||
*/
|
||||
|
||||
struct combining_chars *comb_chars = &row->comb_chars[base_col];
|
||||
|
||||
#if FOOT_UNICODE_PRECOMPOSE
|
||||
if (comb_chars->count == 0) {
|
||||
wchar_t composed[] = {base, wc};
|
||||
ssize_t composed_length = utf8proc_normalize_utf32(
|
||||
composed, ALEN(composed), UTF8PROC_COMPOSE | UTF8PROC_STABLE);
|
||||
int composed_width = wcwidth(composed[0]);
|
||||
|
||||
if (composed_length == 1 && composed_width == base_width) {
|
||||
wchar_t precomposed = precompose(base, wc);
|
||||
int precomposed_width = wcwidth(precomposed);
|
||||
if (precomposed != (wchar_t)-1 && precomposed_width == base_width) {
|
||||
term->grid->cursor.point.col = base_col;
|
||||
term->grid->cursor.lcf = false;
|
||||
term_print(term, composed[0], composed_width);
|
||||
term_print(term, precomposed, precomposed_width);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (comb_chars->count < ALEN(comb_chars->chars))
|
||||
comb_chars->chars[comb_chars->count++] = wc;
|
||||
|
|
@ -622,7 +645,7 @@ action_utf8_print(struct terminal *term, uint8_t c)
|
|||
return;
|
||||
}
|
||||
}
|
||||
#endif /* FOOT_UNICODE_COMBINING */
|
||||
#endif /* FOOT_UNICODE_MAX_COMBINING_CHARS > 0 */
|
||||
|
||||
term_print(term, wc, width);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue