From b9ef703eb14ba2357b847b62a715b4e97f690209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Thu, 20 Aug 2020 19:25:35 +0200 Subject: [PATCH] wip: grapheme shaping --- .builds/alpine-x64.yml | 5 +- .builds/alpine-x86.yml.disabled | 5 +- .builds/freebsd-x64.yml | 5 +- .gitlab-ci.yml | 24 +++- config.c | 30 ++++ config.h | 2 + doc/foot.ini.5.scd | 22 +++ extract.c | 5 +- meson.build | 8 +- meson_options.txt | 3 + render.c | 128 +++++++++++------ search.c | 6 +- selection.c | 8 +- terminal.h | 17 ++- util.h | 8 ++ vt.c | 242 +++++++++++++++++--------------- 16 files changed, 340 insertions(+), 178 deletions(-) diff --git a/.builds/alpine-x64.yml b/.builds/alpine-x64.yml index 3884e66b..6823c834 100644 --- a/.builds/alpine-x64.yml +++ b/.builds/alpine-x64.yml @@ -13,6 +13,7 @@ packages: - freetype-dev - fontconfig-dev - harfbuzz-dev + - utf8proc-dev - pixman-dev - libxkbcommon-dev - ncurses @@ -33,12 +34,12 @@ sources: tasks: - debug: | mkdir -p bld/debug - meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug + meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug ninja -C bld/debug -k0 meson test -C bld/debug --print-errorlogs - release: | mkdir -p bld/release - meson --buildtype=minsize -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release + meson --buildtype=minsize -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release ninja -C bld/release -k0 meson test -C bld/release --print-errorlogs - codespell: | diff --git a/.builds/alpine-x86.yml.disabled b/.builds/alpine-x86.yml.disabled index 52bb6e26..1866e15a 100644 --- a/.builds/alpine-x86.yml.disabled +++ b/.builds/alpine-x86.yml.disabled @@ -14,6 +14,7 @@ packages: - freetype-dev - fontconfig-dev - harfbuzz-dev + - utf8proc-dev - pixman-dev - libxkbcommon-dev - ncurses @@ -32,11 +33,11 @@ sources: tasks: - debug: | mkdir -p bld/debug - meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug + meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug ninja -C bld/debug -k0 meson test -C bld/debug --print-errorlogs - release: | mkdir -p bld/release - meson --buildtype=minsize -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release + meson --buildtype=minsize -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release ninja -C bld/release -k0 meson test -C bld/release --print-errorlogs diff --git a/.builds/freebsd-x64.yml b/.builds/freebsd-x64.yml index d14ae645..7fc65233 100644 --- a/.builds/freebsd-x64.yml +++ b/.builds/freebsd-x64.yml @@ -11,6 +11,7 @@ packages: - freetype2 - fontconfig - harfbuzz + - utf8proc - pixman - libxkbcommon - check @@ -28,11 +29,11 @@ sources: tasks: - debug: | mkdir -p bld/debug - meson --buildtype=debug -Dterminfo=disabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug + meson --buildtype=debug -Dterminfo=disabled -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug ninja -C bld/debug -k0 meson test -C bld/debug --print-errorlogs - release: | mkdir -p bld/release - meson --buildtype=minsize -Dterminfo=disabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release + meson --buildtype=minsize -Dterminfo=disabled -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release ninja -C bld/release -k0 meson test -C bld/release --print-errorlogs diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bf61ada4..05992cf8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,7 +7,7 @@ variables: before_script: - apk update - apk add musl-dev linux-headers meson ninja gcc scdoc ncurses - - apk add libxkbcommon-dev pixman-dev freetype-dev fontconfig-dev harfbuzz-dev + - apk add libxkbcommon-dev pixman-dev freetype-dev fontconfig-dev harfbuzz-dev utf8proc-dev - apk add wayland-dev wayland-protocols - apk add git - apk add check-dev @@ -19,7 +19,21 @@ debug-x64: script: - mkdir -p bld/debug - cd bld/debug - - meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ + - meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ + - ninja -v -k0 + - ninja -v test + artifacts: + reports: + junit: bld/debug/meson-logs/testlog.junit.xml + +debug-x64-no-grapheme-clustering: + image: alpine:edge + stage: build + script: + - apk del harfbuzz harfbuzz-dev utf8proc utf8proc-dev + - mkdir -p bld/debug + - cd bld/debug + - meson --buildtype=debug -Dgrapheme-clustering=disabled -Dfcft:text-shaping=disabled -Dfcft:test-text-shaping=false ../../ - ninja -v -k0 - ninja -v test artifacts: @@ -32,7 +46,7 @@ release-x64: script: - mkdir -p bld/release - cd bld/release - - meson --buildtype=release -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ + - meson --buildtype=release -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ - ninja -v -k0 - ninja -v test artifacts: @@ -45,7 +59,7 @@ debug-x86: script: - mkdir -p bld/debug - cd bld/debug - - meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ + - meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ - ninja -v -k0 - ninja -v test artifacts: @@ -58,7 +72,7 @@ release-x86: script: - mkdir -p bld/release - cd bld/release - - meson --buildtype=release -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ + - meson --buildtype=release -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../ - ninja -v -k0 - ninja -v test artifacts: diff --git a/config.c b/config.c index 81392043..a95f8485 100644 --- a/config.c +++ b/config.c @@ -2129,6 +2129,33 @@ parse_section_tweak( LOG_WARN("tweak: damage whole window"); } + else if (strcmp(key, "grapheme-shaping") == 0) { + conf->tweak.grapheme_shaping = str_to_bool(value); + +#if !defined(FOOT_GRAPHEME_CLUSTERING) + if (conf->tweak.grapheme_shaping) { + LOG_AND_NOTIFY_WARN( + "%s:%d: [tweak]: " + "grapheme-shaping enabled but foot was not compiled with " + "support for it", path, lineno); + conf->tweak.grapheme_shaping = false; + } +#endif + + if (conf->tweak.grapheme_shaping && !conf->can_shape_grapheme) { + LOG_WARN( + "%s:%d [tweak]: " + "grapheme-shaping enabled but fcft was not compiled with " + "support for it", path, lineno); + + /* Keep it enabled though - this will cause us to do + * grapheme-clustering at least */ + } + + if (conf->tweak.grapheme_shaping) + LOG_WARN("tweak: grapheme shaping"); + } + else if (strcmp(key, "render-timer") == 0) { if (strcmp(value, "none") == 0) { conf->tweak.render_timer_osd = false; @@ -2580,6 +2607,7 @@ config_load(struct config *conf, const char *conf_path, config_override_t *overrides, bool errors_are_fatal) { bool ret = false; + enum fcft_capabilities fcft_caps = fcft_capabilities(); *conf = (struct config) { .term = xstrdup(DEFAULT_TERM), @@ -2620,6 +2648,7 @@ config_load(struct config *conf, const char *conf_path, .label_letters = xwcsdup(L"sadfjklewcmpgh"), .osc8_underline = OSC8_UNDERLINE_URL_MODE, }, + .can_shape_grapheme = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING, .scrollback = { .lines = 1000, .indicator = { @@ -2694,6 +2723,7 @@ config_load(struct config *conf, const char *conf_path, .tweak = { .fcft_filter = FCFT_SCALING_FILTER_LANCZOS3, .allow_overflowing_double_width_glyphs = true, + .grapheme_shaping = false, .delayed_render_lower_ns = 500000, /* 0.5ms */ .delayed_render_upper_ns = 16666666 / 2, /* half a frame period (60Hz) */ .max_shm_pool_size = 512 * 1024 * 1024, diff --git a/config.h b/config.h index c3c36b6a..ee427267 100644 --- a/config.h +++ b/config.h @@ -111,6 +111,7 @@ struct config { struct pt_or_px underline_offset; bool box_drawings_uses_font_glyphs; + bool can_shape_grapheme; struct { bool urgent; @@ -244,6 +245,7 @@ struct config { struct { enum fcft_scaling_filter fcft_filter; bool allow_overflowing_double_width_glyphs; + bool grapheme_shaping; bool render_timer_osd; bool render_timer_log; bool damage_whole_window; diff --git a/doc/foot.ini.5.scd b/doc/foot.ini.5.scd index 58134060..94ad42f6 100644 --- a/doc/foot.ini.5.scd +++ b/doc/foot.ini.5.scd @@ -975,6 +975,28 @@ any of these options. Default: _no_. +*grapheme-shaping* + Boolean. When enabled, foot will use _utf8proc_ to do grapheme + cluster segmentation while parsing "printed" text. Then, when + rendering, it will use _fcft_ (if compiled with _HarfBuzz_ + support) to shape the grapheme clusters. + + This is required to render e.g. flag (emoji) sequences, keycap + sequences, modifier sequences, zero-width-joiner (ZWJ) sequences + andn emoji tag sequences. + + This is an experimental feature with the following requirements and limitations: + + - foot must have been compiled with utf8proc support + - fcft must have been compiled with HarfBuzz support + - This option must be set to true + - Foot will use *wcswidth*(3) to calculate a cluster's display + width. This will typically _not_ match the shaped glyph's width, + but is necessary to not break cursor synchronization with the + application running in foot. + + Default: _no_ + *max-shm-pool-size-mb* This option controls the amount of virtual address space used by the pixmap memory to which the terminal screen content is diff --git a/extract.c b/extract.c index 07144597..8aa1acf3 100644 --- a/extract.c +++ b/extract.c @@ -229,12 +229,11 @@ extract_one(const struct terminal *term, const struct row *row, const struct composed *composed = &term->composed[cell->wc - CELL_COMB_CHARS_LO]; - if (!ensure_size(ctx, 1 + composed->count)) + if (!ensure_size(ctx, composed->count)) goto err; - ctx->buf[ctx->idx++] = composed->base; for (size_t i = 0; i < composed->count; i++) - ctx->buf[ctx->idx++] = composed->combining[i]; + ctx->buf[ctx->idx++] = composed->chars[i]; } else { diff --git a/meson.build b/meson.build index 14d23975..107fd905 100644 --- a/meson.build +++ b/meson.build @@ -71,6 +71,11 @@ wayland_client = dependency('wayland-client') wayland_cursor = dependency('wayland-cursor') xkb = dependency('xkbcommon', version: '>=1.0.0') fontconfig = dependency('fontconfig') +utf8proc = dependency('libutf8proc', required: get_option('grapheme-clustering')) + +if utf8proc.found() + add_project_arguments('-DFOOT_GRAPHEME_CLUSTERING=1', language: 'c') +endif tllist = dependency('tllist', version: '>=1.0.4', fallback: 'tllist') fcft = dependency('fcft', version: ['>=2.4.0', '<3.0.0'], fallback: 'fcft') @@ -149,7 +154,7 @@ vtlib = static_library( 'vt.c', 'vt.h', wl_proto_src + wl_proto_headers, version, - dependencies: [libepoll, pixman, fcft, tllist, wayland_client, xkb], + dependencies: [libepoll, pixman, fcft, tllist, wayland_client, xkb, utf8proc], link_with: [common, misc], ) @@ -247,6 +252,7 @@ subdir('icons') summary( { 'IME': get_option('ime'), + 'Grapheme clustering': utf8proc.found(), 'Terminfo': tic.found(), 'Terminfo install location': terminfo_install_location, }, diff --git a/meson_options.txt b/meson_options.txt index 52625b8d..57b5721a 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,5 +1,8 @@ option('ime', type: 'boolean', value: true, description: 'IME (Input Method Editor) support') +option('grapheme-clustering', type: 'feature', + description: 'Enables grapheme clustering using libutf8proc. Requires fcft with harfbuzz support to be useful.') + option('terminfo', type: 'feature', description: 'Build terminfo. When disabled, foot\'s terminfo will not be built, and foot will default to \'xterm-256color\' instead of \'foot\'.') option('terminfo-install-location', type: 'string', description: 'Where to install the foot terminfo files, relative to the installation prefix. If set to \'disabled\', the terminfo files are not installed at all (useful when packaging the terminfo files in a separate package). Defaults to $datadir/terminfo.') diff --git a/render.c b/render.c index f96ddb9e..54269bf3 100644 --- a/render.c +++ b/render.c @@ -500,19 +500,15 @@ render_cell(struct terminal *term, pixman_image_t *pix, } struct fcft_font *font = attrs_to_font(term, &cell->attrs); - const struct fcft_glyph *glyph = NULL; const struct composed *composed = NULL; + const struct fcft_grapheme *grapheme = NULL; + const struct fcft_glyph *single = NULL; + const struct fcft_glyph **glyphs = NULL; + unsigned glyph_count = 0; wchar_t base = cell->wc; if (base != 0) { - if (base >= CELL_COMB_CHARS_LO && - base < (CELL_COMB_CHARS_LO + term->composed_count)) - { - composed = &term->composed[base - CELL_COMB_CHARS_LO]; - base = composed->base; - } - if (unlikely( /* Classic box drawings */ (base >= 0x2500 && base <= 0x259f) || @@ -528,7 +524,7 @@ render_cell(struct terminal *term, pixman_image_t *pix, (base >= 0x1fb00 && base <= 0x1fb3b) || /* Unicode 13 partial blocks */ - /* TODO: there's more here! */ + /* TODO: there's more here! */ (base >= 0x1fb70 && base <= 0x1fb8b)) && likely(!term->conf->box_drawings_uses_font_glyphs)) @@ -542,7 +538,7 @@ render_cell(struct terminal *term, pixman_image_t *pix, xassert(idx < ALEN(term->box_drawing)); if (likely(term->box_drawing[idx] != NULL)) - glyph = term->box_drawing[idx]; + single = term->box_drawing[idx]; else { mtx_lock(&term->render.workers.lock); @@ -551,15 +547,45 @@ render_cell(struct terminal *term, pixman_image_t *pix, term->box_drawing[idx] = box_drawing(term, base); mtx_unlock(&term->render.workers.lock); - glyph = term->box_drawing[idx]; - xassert(glyph != NULL); + single = term->box_drawing[idx]; + xassert(single != NULL); } - } else - glyph = fcft_glyph_rasterize(font, base, term->font_subpixel); + + glyph_count = 1; + glyphs = &single; + } + + else if (base >= CELL_COMB_CHARS_LO && + base < (CELL_COMB_CHARS_LO + term->composed_count)) + { + composed = &term->composed[base - CELL_COMB_CHARS_LO]; + base = composed->chars[0]; + + if (term->conf->can_shape_grapheme && term->conf->tweak.grapheme_shaping) { + grapheme = fcft_grapheme_rasterize( + font, composed->count, composed->chars, + 0, NULL, term->font_subpixel); + } + + if (grapheme != NULL) { + composed = NULL; + glyphs = grapheme->glyphs; + glyph_count = grapheme->count; + } + } + + + if (single == NULL && grapheme == NULL) { + xassert(base != 0); + single = fcft_glyph_rasterize(font, base, term->font_subpixel); + glyph_count = 1; + glyphs = &single; + } } + assert(glyph_count == 0 || glyphs != NULL); const int cols_left = term->cols - col; - int cell_cols = glyph != NULL ? max(1, min(glyph->cols, cols_left)) : 1; + int cell_cols = glyph_count > 0 ? max(1, min(glyphs[0]->cols, cols_left)) : 1; /* * Hack! @@ -580,15 +606,15 @@ render_cell(struct terminal *term, pixman_image_t *pix, * - *this* cells is followed by an empty cell, or a space */ if (term->conf->tweak.allow_overflowing_double_width_glyphs && - ((glyph != NULL && - glyph->cols == 1 && - glyph->width >= term->cell_width * 15 / 10 && - glyph->width < 3 * term->cell_width && - col < term->cols - 1) || - (term->conf->tweak.pua_double_width && - ((base >= 0x00e000 && base <= 0x00f8ff) || - (base >= 0x0f0000 && base <= 0x0ffffd) || - (base >= 0x100000 && base <= 0x10fffd)))) && + ((glyph_count > 0 && + glyphs[0]->cols == 1 && + glyphs[0]->width >= term->cell_width * 15 / 10 && + glyphs[0]->width < 3 * term->cell_width && + col < term->cols - 1 || + (term->conf->tweak.pua_double_width && + ((base >= 0x00e000 && base <= 0x00f8ff) || + (base >= 0x0f0000 && base <= 0x0ffffd) || + (base >= 0x100000 && base <= 0x10fffd))))) && (row->cells[col + 1].wc == 0 || row->cells[col + 1].wc == L' ')) { cell_cols = 2; @@ -632,33 +658,43 @@ render_cell(struct terminal *term, pixman_image_t *pix, pixman_image_t *clr_pix = pixman_image_create_solid_fill(&fg); - if (glyph != NULL) { - const int letter_x_ofs = term->font_x_ofs; + for (unsigned i = 0; i < glyph_count; i++) { + const int letter_x_ofs = i == 0 ? term->font_x_ofs : 0; + + const struct fcft_glyph *glyph = glyphs[i]; + if (glyph == NULL) + continue; + + int g_x = glyph->x; + int g_y = glyph->y; + + if (i > 0 && glyph->x >= 0) + g_x -= term->cell_width; if (unlikely(pixman_image_get_format(glyph->pix) == PIXMAN_a8r8g8b8)) { /* Glyph surface is a pre-rendered image (typically a color emoji...) */ if (!(cell->attrs.blink && term->blink.state == BLINK_OFF)) { pixman_image_composite32( PIXMAN_OP_OVER, glyph->pix, NULL, pix, 0, 0, 0, 0, - x + letter_x_ofs + glyph->x, y + font_baseline(term) - glyph->y, + x + letter_x_ofs + g_x, y + font_baseline(term) - g_y, glyph->width, glyph->height); } } else { pixman_image_composite32( PIXMAN_OP_OVER, clr_pix, glyph->pix, pix, 0, 0, 0, 0, - x + letter_x_ofs + glyph->x, y + font_baseline(term) - glyph->y, + x + letter_x_ofs + g_x, y + font_baseline(term) - g_y, glyph->width, glyph->height); - } + /* Combining characters */ + if (composed != NULL) { + assert(glyph_count == 1); - /* Combining characters */ - if (composed != NULL) { - for (size_t i = 0; i < composed->count; i++) { - const struct fcft_glyph *g = fcft_glyph_rasterize( - font, composed->combining[i], term->font_subpixel); + for (size_t i = 1; i < composed->count; i++) { + const struct fcft_glyph *g = fcft_glyph_rasterize( + font, composed->chars[i], term->font_subpixel); - if (g == NULL) - continue; + if (g == NULL) + continue; /* * Fonts _should_ assume the pen position is now @@ -677,16 +713,22 @@ render_cell(struct terminal *term, pixman_image_t *pix, * somewhat deal with double-width glyphs we use * an offset of *one* cell. */ - int x_ofs = g->x < 0 - ? cell_cols * term->cell_width - : (cell_cols - 1) * term->cell_width; + int x_ofs = g->x < 0 + ? cell_cols * term->cell_width + : (cell_cols - 1) * term->cell_width; - pixman_image_composite32( - PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, - x + letter_x_ofs + x_ofs + g->x, y + font_baseline(term) - g->y, - g->width, g->height); + pixman_image_composite32( + PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0, + /* Some fonts use a negative offset, while others use a + * "normal" offset */ + x + x_ofs + g->x, + y + font_baseline(term) - g->y, + g->width, g->height); + } } } + + x += glyph->advance.x; } pixman_image_unref(clr_pix); diff --git a/search.c b/search.c index 9fc1ab2a..859608f5 100644 --- a/search.c +++ b/search.c @@ -249,7 +249,7 @@ matches_cell(const struct terminal *term, const struct cell *cell, size_t search base < (CELL_COMB_CHARS_LO + term->composed_count)) { composed = &term->composed[base - CELL_COMB_CHARS_LO]; - base = composed->base; + base = composed->chars[0]; } if (composed == NULL && base == 0 && term->search.buf[search_ofs] == L' ') @@ -262,8 +262,8 @@ matches_cell(const struct terminal *term, const struct cell *cell, size_t search if (search_ofs + 1 + composed->count > term->search.len) return -1; - for (size_t j = 0; j < composed->count; j++) { - if (composed->combining[j] != term->search.buf[search_ofs + 1 + j]) + for (size_t j = 1; j < composed->count; j++) { + if (composed->chars[j] != term->search.buf[search_ofs + 1 + j]) return -1; } } diff --git a/selection.c b/selection.c index 555b32f2..cafb161e 100644 --- a/selection.c +++ b/selection.c @@ -249,7 +249,7 @@ selection_find_word_boundary_left(struct terminal *term, struct coord *pos, if (c >= CELL_COMB_CHARS_LO && c < (CELL_COMB_CHARS_LO + term->composed_count)) { - c = term->composed[c - CELL_COMB_CHARS_LO].base; + c = term->composed[c - CELL_COMB_CHARS_LO].chars[0]; } bool initial_is_space = c == 0 || iswspace(c); @@ -289,7 +289,7 @@ selection_find_word_boundary_left(struct terminal *term, struct coord *pos, if (c >= CELL_COMB_CHARS_LO && c < (CELL_COMB_CHARS_LO + term->composed_count)) { - c = term->composed[c - CELL_COMB_CHARS_LO].base; + c = term->composed[c - CELL_COMB_CHARS_LO].chars[0]; } bool is_space = c == 0 || iswspace(c); @@ -328,7 +328,7 @@ selection_find_word_boundary_right(struct terminal *term, struct coord *pos, if (c >= CELL_COMB_CHARS_LO && c < (CELL_COMB_CHARS_LO + term->composed_count)) { - c = term->composed[c - CELL_COMB_CHARS_LO].base; + c = term->composed[c - CELL_COMB_CHARS_LO].chars[0]; } bool initial_is_space = c == 0 || iswspace(c); @@ -370,7 +370,7 @@ selection_find_word_boundary_right(struct terminal *term, struct coord *pos, if (c >= CELL_COMB_CHARS_LO && c < (CELL_COMB_CHARS_LO + term->composed_count)) { - c = term->composed[c - CELL_COMB_CHARS_LO].base; + c = term->composed[c - CELL_COMB_CHARS_LO].chars[0]; } bool is_space = c == 0 || iswspace(c); diff --git a/terminal.h b/terminal.h index e6bcfa40..45c6c98b 100644 --- a/terminal.h +++ b/terminal.h @@ -8,6 +8,10 @@ #include #include +#if defined(FOOT_GRAPHEME_CLUSTERING) + #include +#endif + #include #include @@ -81,8 +85,7 @@ struct damage { }; struct composed { - wchar_t base; - wchar_t combining[5]; + wchar_t chars[20]; uint8_t count; }; @@ -152,6 +155,9 @@ struct vt_param { struct vt { int state; /* enum state */ wchar_t last_printed; +#if defined(FOOT_GRAPHEME_CLUSTERING) + utf8proc_int32_t grapheme_state; +#endif wchar_t utf8; struct { struct vt_param v[16]; @@ -720,3 +726,10 @@ void term_collect_urls(struct terminal *term); void term_osc8_open(struct terminal *term, uint64_t id, const char *uri); void term_osc8_close(struct terminal *term); + +static inline void term_reset_grapheme_state(struct terminal *term) +{ +#if defined(FOOT_GRAPHEME_CLUSTERING) + term->vt.grapheme_state = 0; +#endif +} diff --git a/util.h b/util.h index aa9fc8ba..af215111 100644 --- a/util.h +++ b/util.h @@ -35,3 +35,11 @@ sdbm_hash(const char *s) return hash; } + +#include +static inline int +my_wcswidth(const wchar_t *s, size_t n) +{ + int ret = wcswidth(s, n); + return max(0, ret); +} diff --git a/vt.c b/vt.c index 723f0450..496333d7 100644 --- a/vt.c +++ b/vt.c @@ -4,9 +4,14 @@ #include #include +#if defined(FOOT_GRAPHEME_CLUSTERING) + #include +#endif + #define LOG_MODULE "vt" #define LOG_ENABLE_DBG 0 #include "log.h" +#include "config.h" #include "csi.h" #include "dcs.h" #include "debug.h" @@ -283,6 +288,7 @@ action_execute(struct terminal *term, uint8_t c) static void action_print(struct terminal *term, uint8_t c) { + term_reset_grapheme_state(term); term->ascii_printer(term, c); } @@ -583,152 +589,166 @@ static void action_utf8_print(struct terminal *term, wchar_t wc) { int width = wcwidth(wc); + const bool grapheme_clustering = term->conf->tweak.grapheme_shaping; - /* - * Is this is combining character? The basic assumption is that if - * wcwdith() returns 0, then it *is* a combining character. - * - * We hen optimize this by ignoring all characters before 0x0300, - * since there aren't any zero-width characters there. This means - * all "normal" western characters will quickly be categorized as - * *not* being combining characters. - * - * TODO: xterm does more or less the same, but also filters a - * small subset of BIDI control characters. Should we too? I think - * what we have here is good enough - a control character - * shouldn't have a glyph associated with it, so rendering - * shouldn't be affected. - * - * TODO: handle line-wrap when locating the base character. - */ - if (width == 0 && wc >= 0x0300 && term->grid->cursor.point.col > 0) { - const struct row *row = term->grid->cur_row; +#if !defined(FOOT_GRAPHEME_CLUSTERING) + xassert(!grapheme_clustering); +#endif - int base_col = term->grid->cursor.point.col; + if (term->grid->cursor.point.col > 0 && + (grapheme_clustering || + (!grapheme_clustering && width == 0 && wc >= 0x300))) + { + int col = term->grid->cursor.point.col; if (!term->grid->cursor.lcf) - base_col--; + col--; - while (row->cells[base_col].wc >= CELL_SPACER && base_col > 0) - base_col--; + /* Skip past spacers */ + struct row *row = term->grid->cur_row; + while (row->cells[col].wc >= CELL_SPACER && col > 0) + col--; - xassert(base_col >= 0 && base_col < term->cols); - wchar_t base = row->cells[base_col].wc; + xassert(col >= 0 && col < term->cols); + wchar_t base = row->cells[col].wc; + wchar_t UNUSED last = base; + /* Is base cell already a cluster? */ const struct composed *composed = (base >= CELL_COMB_CHARS_LO && base < (CELL_COMB_CHARS_LO + term->composed_count)) ? &term->composed[base - CELL_COMB_CHARS_LO] : NULL; - if (composed != NULL) - base = composed->base; + if (composed != NULL) { + base = composed->chars[0]; + last = composed->chars[composed->count - 1]; + } + +#if defined(FOOT_GRAPHEME_CLUSTERING) + if (grapheme_clustering) { + /* Check if we're on a grapheme cluster break */ + /* Note: utf8proc fails to ZWJ */ + if (utf8proc_grapheme_break_stateful(last, wc, &term->vt.grapheme_state) && + last != 0x200d /* ZWJ */) + { + term_reset_grapheme_state(term); + if (width > 0) + term_print(term, wc, width); + return; + } + } +#endif int base_width = wcwidth(base); + term->grid->cursor.point.col = col; + term->grid->cursor.lcf = false; - if (base != 0 && base_width > 0) { + if (composed == NULL) { + bool base_from_primary; + bool comb_from_primary; + bool pre_from_primary; + + wchar_t precomposed = fcft_precompose( + term->fonts[0], base, wc, &base_from_primary, + &comb_from_primary, &pre_from_primary); + + int precomposed_width = wcwidth(precomposed); /* - * If this is the *first* combining characger, see if - * there's a pre-composed character of this combo, with - * the same column width as the base character. + * Only use the pre-composed character if: * - * If there is, replace the base character with the - * pre-composed character, as that is likely to produce a - * better looking result. + * 1. we *have* a pre-composed character + * 2. the width matches the base characters width + * 3. it's in the primary font, OR one of the base or + * combining characters are *not* from the primary + * font */ - term->grid->cursor.point.col = base_col; - term->grid->cursor.lcf = false; - if (composed == NULL) { - bool base_from_primary; - bool comb_from_primary; - bool pre_from_primary; - - wchar_t precomposed = fcft_precompose( - term->fonts[0], base, wc, &base_from_primary, - &comb_from_primary, &pre_from_primary); - - int precomposed_width = wcwidth(precomposed); - - /* - * Only use the pre-composed character if: - * - * 1. we *have* a pre-composed character - * 2. the width matches the base characters width - * 3. it's in the primary font, OR one of the base or - * combining characters are *not* from the primary - * font - */ - - if (precomposed != (wchar_t)-1 && - precomposed_width == base_width && - (pre_from_primary || - !base_from_primary || - !comb_from_primary)) - { - term_print(term, precomposed, precomposed_width); - return; - } + if (precomposed != (wchar_t)-1 && + precomposed_width == base_width && + (pre_from_primary || + !base_from_primary || + !comb_from_primary)) + { + term_reset_grapheme_state(term); + term_print(term, precomposed, precomposed_width); + return; } + } - size_t wanted_count = composed != NULL ? composed->count + 1 : 1; - if (wanted_count > ALEN(composed->combining)) { - xassert(composed != NULL); + size_t wanted_count = composed != NULL ? composed->count + 1 : 2; + if (wanted_count > ALEN(composed->chars)) { + xassert(composed != NULL); #if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG - LOG_WARN("combining character overflow:"); - LOG_WARN(" base: 0x%04x", composed->base); - for (size_t i = 0; i < composed->count; i++) - LOG_WARN(" cc: 0x%04x", composed->combining[i]); - LOG_ERR(" new: 0x%04x", wc); + LOG_WARN("combining character overflow:"); + LOG_WARN(" base: 0x%04x", composed->chars[0]); + for (size_t i = 1; i < composed->count; i++) + LOG_WARN(" cc: 0x%04x", composed->chars[i]); + LOG_ERR(" new: 0x%04x", wc); #endif - /* This are going to break anyway... */ - wanted_count--; + /* This is going to break anyway... */ + wanted_count--; + } + + xassert(wanted_count <= ALEN(composed->chars)); + + /* Look for existing combining chain */ + for (size_t i = 0; i < term->composed_count; i++) { + const struct composed *cc = &term->composed[i]; + + if (cc->count != wanted_count) + continue; + + if (cc->chars[0] != base) + continue; + + bool match = true; + for (size_t j = 1; j < wanted_count - 1; j++) { + if (cc->chars[j] != composed->chars[j]) { + match = false; + break; + } } + if (!match) + continue; - xassert(wanted_count <= ALEN(composed->combining)); + if (cc->chars[wanted_count - 1] != wc) + continue; - /* Look for existing combining chain */ - for (size_t i = 0; i < term->composed_count; i++) { - const struct composed *cc = &term->composed[i]; - if (cc->base != base) - continue; + int grapheme_width = my_wcswidth(cc->chars, cc->count); + if (grapheme_width > 0) + term_print(term, CELL_COMB_CHARS_LO + i, grapheme_width); + return; + } - if (cc->count != wanted_count) - continue; + /* Allocate new chain */ - if (cc->combining[wanted_count - 1] != wc) - continue; + struct composed new_cc; + new_cc.count = wanted_count; + new_cc.chars[0] = base; + for (size_t i = 1; i < wanted_count - 1; i++) + new_cc.chars[i] = composed->chars[i]; + new_cc.chars[wanted_count - 1] = wc; - term_print(term, CELL_COMB_CHARS_LO + i, base_width); - return; - } + if (term->composed_count < CELL_COMB_CHARS_HI) { + term->composed_count++; + term->composed = xrealloc(term->composed, term->composed_count * sizeof(term->composed[0])); + term->composed[term->composed_count - 1] = new_cc; - /* Allocate new chain */ - - struct composed new_cc; - new_cc.base = base; - new_cc.count = wanted_count; - for (size_t i = 0; i < wanted_count - 1; i++) - new_cc.combining[i] = composed->combining[i]; - new_cc.combining[wanted_count - 1] = wc; - - if (term->composed_count < CELL_COMB_CHARS_HI) { - term->composed_count++; - term->composed = xrealloc(term->composed, term->composed_count * sizeof(term->composed[0])); - term->composed[term->composed_count - 1] = new_cc; - - term_print(term, CELL_COMB_CHARS_LO + term->composed_count - 1, base_width); - return; - } else { - /* We reached our maximum number of allowed composed - * character chains. Fall through here and print the - * current zero-width character to the current cell */ - LOG_WARN("maximum number of composed characters reached"); - } + int grapheme_width = my_wcswidth(new_cc.chars, new_cc.count); + if (grapheme_width > 0) + term_print(term, CELL_COMB_CHARS_LO + term->composed_count - 1, grapheme_width); + return; + } else { + /* We reached our maximum number of allowed composed + * character chains. Fall through here and print the + * current zero-width character to the current cell */ + LOG_WARN("maximum number of composed characters reached"); } } + term_reset_grapheme_state(term); if (width > 0) term_print(term, wc, width); }