Merge branch 'unicode-combining'

This commit is contained in:
Daniel Eklöf 2020-04-27 15:53:34 +02:00
commit c4e3b9f69d
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
6 changed files with 68 additions and 22 deletions

View file

@ -22,6 +22,14 @@
* Right mouse button extends the current selection.
* `CSI Ps ; Ps ; Ps t` escape sequences for the following parameters:
`11t`, `13t`, `13;2t`, `14t`, `14;2t`, `15t`, `19t`.
* (Optional) spport for unicode combining characters. For example,
`a\u0301` will be combined to `á` (`\u00e1`). Note that copying the
printed character to the clipboard/primary selection will copy the
byte `\u00e1` and **not** `\u0061\u0301`. It requires
[utf8proc](https://github.com/JuliaStrings/utf8proc). By default,
the feature is enabled if utf8proc is found. However, it can also be
explicitly disabled (or enabled) with `meson
-Dunicode-combining=enabled|disabled`)
### Changed

View file

@ -277,6 +277,8 @@ with the terminal emulator itself. Foot implements the following OSCs:
* pixman
* wayland (_client_ and _cursor_ libraries)
* xkbcommon
* [utf8proc](https://github.com/JuliaStrings/utf8proc) (_optional_ -
enables unicode combining)
* [tllist](https://codeberg.org/dnkl/tllist) [^1]
* [fcft](https://codeberg.org/dnkl/fcft) [^1]

View file

@ -57,6 +57,9 @@ wayland_client = dependency('wayland-client')
wayland_cursor = dependency('wayland-cursor')
xkb = dependency('xkbcommon')
utf8proc = dependency('libutf8proc', required: get_option('unicode-combining'))
add_project_arguments('-DFOOT_UNICODE_COMBINING=@0@'.format(utf8proc.found()), language: 'c')
tllist = dependency('tllist', version: '>=1.0.1', fallback: 'tllist')
fcft = dependency('fcft', version: ['>=2.0.0', '<2.1.0'], fallback: 'fcft')
@ -125,7 +128,7 @@ executable(
'vt.c', 'vt.h',
'wayland.c', 'wayland.h',
wl_proto_src + wl_proto_headers, version,
dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb,
dependencies: [math, threads, pixman, wayland_client, wayland_cursor, xkb, utf8proc,
tllist, fcft],
install: true)

1
meson_options.txt Normal file
View file

@ -0,0 +1 @@
option('unicode-combining', type: 'feature', value: 'auto', description: 'Perform unicode combining')

View file

@ -135,7 +135,6 @@ struct vt {
struct {
uint8_t data[4];
uint8_t idx;
uint8_t left;
} utf8;
struct {
uint8_t *data;

73
vt.c
View file

@ -5,6 +5,10 @@
#include <unistd.h>
#include <assert.h>
#if FOOT_UNICODE_COMBINING
#include <utf8proc.h>
#endif
#define LOG_MODULE "vt"
#define LOG_ENABLE_DBG 0
#include "log.h"
@ -105,7 +109,6 @@ action_clear(struct terminal *term)
term->vt.params.idx = 0;
term->vt.private[0] = 0;
term->vt.private[1] = 0;
term->vt.utf8.idx = 0;
}
static void
@ -509,27 +512,21 @@ static void
action_utf8_2_entry(struct terminal *term, uint8_t c)
{
term->vt.utf8.idx = 0;
term->vt.utf8.left = 2;
term->vt.utf8.data[term->vt.utf8.idx++] = c;
term->vt.utf8.left--;
}
static void
action_utf8_3_entry(struct terminal *term, uint8_t c)
{
term->vt.utf8.idx = 0;
term->vt.utf8.left = 3;
term->vt.utf8.data[term->vt.utf8.idx++] = c;
term->vt.utf8.left--;
}
static void
action_utf8_4_entry(struct terminal *term, uint8_t c)
{
term->vt.utf8.idx = 0;
term->vt.utf8.left = 4;
term->vt.utf8.data[term->vt.utf8.idx++] = c;
term->vt.utf8.left--;
}
static void
@ -544,11 +541,56 @@ action_utf8_print(struct terminal *term, uint8_t c)
if ((ssize_t)count < 0)
wc = 0;
/* Reset VT utf8 state */
term->vt.utf8.idx = 0;
#if FOOT_UNICODE_COMBINING
/*
* Try to combine with the previous character.
*
* We _could_ try regardless of what 'wc' is. However, for
* performance reasons, we only do it when 'wc' is in a known
* 'combining' range.
*
* TODO:
* - doesn't work when base character is multi-column (we'll only
* see an empty "null" character)
*/
int width = wcwidth(wc);
term_print(term, wc, width);
if (((wc >= 0x0300 && wc <= 0x036F) || /* diacritical marks */
(wc >= 0x1AB0 && wc <= 0x1AFF) || /* diacritical marks, extended */
(wc >= 0x1DC0 && wc <= 0x1DFF) || /* diacritical marks, supplement */
(wc >= 0x20D0 && wc <= 0x20FF) || /* diacritical marks, for symbols */
(wc >= 0xFE20 && wc <= 0xFE2F)) /* half marks */
&& term->grid->cursor.point.col > 0)
{
int base_col = term->grid->cursor.point.col;
if (!term->grid->cursor.lcf)
base_col--;
assert(base_col >= 0 && base_col < term->cols);
wchar_t base = term->grid->cur_row->cells[base_col].wc;
int base_width = wcwidth(base);
if (base_width > 0) {
wchar_t composed[] = {base, wc};
ssize_t composed_length = utf8proc_normalize_utf32(
composed, sizeof(composed) / sizeof(composed[0]),
UTF8PROC_COMPOSE | UTF8PROC_STABLE);
LOG_DBG("composed = 0x%04x, 0x%04x (length = %zd)",
composed[0], composed[1], composed_length);
if (composed_length == 1) {
/* Compose succeess - overwrite last cell with
* combined character */
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
term_print(term, composed[0], wcwidth(composed[0]));
return;
}
}
}
#endif /* FOOT_UNICODE_COMBINING */
term_print(term, wc, wcwidth(wc));
}
static enum state
@ -1016,9 +1058,6 @@ static enum state
state_utf8_collect_1_switch(struct terminal *term, uint8_t data)
{
term->vt.utf8.data[term->vt.utf8.idx++] = data;
term->vt.utf8.left--;
assert(term->vt.utf8.left == 0);
action_utf8_print(term, data);
return STATE_GROUND;
}
@ -1027,9 +1066,6 @@ static enum state
state_utf8_collect_2_switch(struct terminal *term, uint8_t data)
{
term->vt.utf8.data[term->vt.utf8.idx++] = data;
term->vt.utf8.left--;
assert(term->vt.utf8.left == 1);
return STATE_UTF8_COLLECT_1;
}
@ -1037,9 +1073,6 @@ static enum state
state_utf8_collect_3_switch(struct terminal *term, uint8_t data)
{
term->vt.utf8.data[term->vt.utf8.idx++] = data;
term->vt.utf8.left--;
assert(term->vt.utf8.left == 2);
return STATE_UTF8_COLLECT_2;
}