wip: grapheme shaping

This commit is contained in:
Daniel Eklöf 2020-08-20 19:25:35 +02:00
parent c1cde66f70
commit b9ef703eb1
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
16 changed files with 340 additions and 178 deletions

View file

@ -13,6 +13,7 @@ packages:
- freetype-dev
- fontconfig-dev
- harfbuzz-dev
- utf8proc-dev
- pixman-dev
- libxkbcommon-dev
- ncurses
@ -33,12 +34,12 @@ sources:
tasks:
- debug: |
mkdir -p bld/debug
meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
ninja -C bld/debug -k0
meson test -C bld/debug --print-errorlogs
- release: |
mkdir -p bld/release
meson --buildtype=minsize -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
meson --buildtype=minsize -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
ninja -C bld/release -k0
meson test -C bld/release --print-errorlogs
- codespell: |

View file

@ -14,6 +14,7 @@ packages:
- freetype-dev
- fontconfig-dev
- harfbuzz-dev
- utf8proc-dev
- pixman-dev
- libxkbcommon-dev
- ncurses
@ -32,11 +33,11 @@ sources:
tasks:
- debug: |
mkdir -p bld/debug
meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
ninja -C bld/debug -k0
meson test -C bld/debug --print-errorlogs
- release: |
mkdir -p bld/release
meson --buildtype=minsize -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
meson --buildtype=minsize -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
ninja -C bld/release -k0
meson test -C bld/release --print-errorlogs

View file

@ -11,6 +11,7 @@ packages:
- freetype2
- fontconfig
- harfbuzz
- utf8proc
- pixman
- libxkbcommon
- check
@ -28,11 +29,11 @@ sources:
tasks:
- debug: |
mkdir -p bld/debug
meson --buildtype=debug -Dterminfo=disabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
meson --buildtype=debug -Dterminfo=disabled -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/debug
ninja -C bld/debug -k0
meson test -C bld/debug --print-errorlogs
- release: |
mkdir -p bld/release
meson --buildtype=minsize -Dterminfo=disabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
meson --buildtype=minsize -Dterminfo=disabled -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true foot bld/release
ninja -C bld/release -k0
meson test -C bld/release --print-errorlogs

View file

@ -7,7 +7,7 @@ variables:
before_script:
- apk update
- apk add musl-dev linux-headers meson ninja gcc scdoc ncurses
- apk add libxkbcommon-dev pixman-dev freetype-dev fontconfig-dev harfbuzz-dev
- apk add libxkbcommon-dev pixman-dev freetype-dev fontconfig-dev harfbuzz-dev utf8proc-dev
- apk add wayland-dev wayland-protocols
- apk add git
- apk add check-dev
@ -19,7 +19,21 @@ debug-x64:
script:
- mkdir -p bld/debug
- cd bld/debug
- meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- ninja -v -k0
- ninja -v test
artifacts:
reports:
junit: bld/debug/meson-logs/testlog.junit.xml
debug-x64-no-grapheme-clustering:
image: alpine:edge
stage: build
script:
- apk del harfbuzz harfbuzz-dev utf8proc utf8proc-dev
- mkdir -p bld/debug
- cd bld/debug
- meson --buildtype=debug -Dgrapheme-clustering=disabled -Dfcft:text-shaping=disabled -Dfcft:test-text-shaping=false ../../
- ninja -v -k0
- ninja -v test
artifacts:
@ -32,7 +46,7 @@ release-x64:
script:
- mkdir -p bld/release
- cd bld/release
- meson --buildtype=release -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- meson --buildtype=release -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- ninja -v -k0
- ninja -v test
artifacts:
@ -45,7 +59,7 @@ debug-x86:
script:
- mkdir -p bld/debug
- cd bld/debug
- meson --buildtype=debug -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- meson --buildtype=debug -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- ninja -v -k0
- ninja -v test
artifacts:
@ -58,7 +72,7 @@ release-x86:
script:
- mkdir -p bld/release
- cd bld/release
- meson --buildtype=release -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- meson --buildtype=release -Dgrapheme-clustering=enabled -Dfcft:text-shaping=enabled -Dfcft:test-text-shaping=true ../../
- ninja -v -k0
- ninja -v test
artifacts:

View file

@ -2129,6 +2129,33 @@ parse_section_tweak(
LOG_WARN("tweak: damage whole window");
}
else if (strcmp(key, "grapheme-shaping") == 0) {
conf->tweak.grapheme_shaping = str_to_bool(value);
#if !defined(FOOT_GRAPHEME_CLUSTERING)
if (conf->tweak.grapheme_shaping) {
LOG_AND_NOTIFY_WARN(
"%s:%d: [tweak]: "
"grapheme-shaping enabled but foot was not compiled with "
"support for it", path, lineno);
conf->tweak.grapheme_shaping = false;
}
#endif
if (conf->tweak.grapheme_shaping && !conf->can_shape_grapheme) {
LOG_WARN(
"%s:%d [tweak]: "
"grapheme-shaping enabled but fcft was not compiled with "
"support for it", path, lineno);
/* Keep it enabled though - this will cause us to do
* grapheme-clustering at least */
}
if (conf->tweak.grapheme_shaping)
LOG_WARN("tweak: grapheme shaping");
}
else if (strcmp(key, "render-timer") == 0) {
if (strcmp(value, "none") == 0) {
conf->tweak.render_timer_osd = false;
@ -2580,6 +2607,7 @@ config_load(struct config *conf, const char *conf_path,
config_override_t *overrides, bool errors_are_fatal)
{
bool ret = false;
enum fcft_capabilities fcft_caps = fcft_capabilities();
*conf = (struct config) {
.term = xstrdup(DEFAULT_TERM),
@ -2620,6 +2648,7 @@ config_load(struct config *conf, const char *conf_path,
.label_letters = xwcsdup(L"sadfjklewcmpgh"),
.osc8_underline = OSC8_UNDERLINE_URL_MODE,
},
.can_shape_grapheme = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING,
.scrollback = {
.lines = 1000,
.indicator = {
@ -2694,6 +2723,7 @@ config_load(struct config *conf, const char *conf_path,
.tweak = {
.fcft_filter = FCFT_SCALING_FILTER_LANCZOS3,
.allow_overflowing_double_width_glyphs = true,
.grapheme_shaping = false,
.delayed_render_lower_ns = 500000, /* 0.5ms */
.delayed_render_upper_ns = 16666666 / 2, /* half a frame period (60Hz) */
.max_shm_pool_size = 512 * 1024 * 1024,

View file

@ -111,6 +111,7 @@ struct config {
struct pt_or_px underline_offset;
bool box_drawings_uses_font_glyphs;
bool can_shape_grapheme;
struct {
bool urgent;
@ -244,6 +245,7 @@ struct config {
struct {
enum fcft_scaling_filter fcft_filter;
bool allow_overflowing_double_width_glyphs;
bool grapheme_shaping;
bool render_timer_osd;
bool render_timer_log;
bool damage_whole_window;

View file

@ -975,6 +975,28 @@ any of these options.
Default: _no_.
*grapheme-shaping*
Boolean. When enabled, foot will use _utf8proc_ to do grapheme
cluster segmentation while parsing "printed" text. Then, when
rendering, it will use _fcft_ (if compiled with _HarfBuzz_
support) to shape the grapheme clusters.
This is required to render e.g. flag (emoji) sequences, keycap
sequences, modifier sequences, zero-width-joiner (ZWJ) sequences
andn emoji tag sequences.
This is an experimental feature with the following requirements and limitations:
- foot must have been compiled with utf8proc support
- fcft must have been compiled with HarfBuzz support
- This option must be set to true
- Foot will use *wcswidth*(3) to calculate a cluster's display
width. This will typically _not_ match the shaped glyph's width,
but is necessary to not break cursor synchronization with the
application running in foot.
Default: _no_
*max-shm-pool-size-mb*
This option controls the amount of virtual address space used by
the pixmap memory to which the terminal screen content is

View file

@ -229,12 +229,11 @@ extract_one(const struct terminal *term, const struct row *row,
const struct composed *composed
= &term->composed[cell->wc - CELL_COMB_CHARS_LO];
if (!ensure_size(ctx, 1 + composed->count))
if (!ensure_size(ctx, composed->count))
goto err;
ctx->buf[ctx->idx++] = composed->base;
for (size_t i = 0; i < composed->count; i++)
ctx->buf[ctx->idx++] = composed->combining[i];
ctx->buf[ctx->idx++] = composed->chars[i];
}
else {

View file

@ -71,6 +71,11 @@ wayland_client = dependency('wayland-client')
wayland_cursor = dependency('wayland-cursor')
xkb = dependency('xkbcommon', version: '>=1.0.0')
fontconfig = dependency('fontconfig')
utf8proc = dependency('libutf8proc', required: get_option('grapheme-clustering'))
if utf8proc.found()
add_project_arguments('-DFOOT_GRAPHEME_CLUSTERING=1', language: 'c')
endif
tllist = dependency('tllist', version: '>=1.0.4', fallback: 'tllist')
fcft = dependency('fcft', version: ['>=2.4.0', '<3.0.0'], fallback: 'fcft')
@ -149,7 +154,7 @@ vtlib = static_library(
'vt.c', 'vt.h',
wl_proto_src + wl_proto_headers,
version,
dependencies: [libepoll, pixman, fcft, tllist, wayland_client, xkb],
dependencies: [libepoll, pixman, fcft, tllist, wayland_client, xkb, utf8proc],
link_with: [common, misc],
)
@ -247,6 +252,7 @@ subdir('icons')
summary(
{
'IME': get_option('ime'),
'Grapheme clustering': utf8proc.found(),
'Terminfo': tic.found(),
'Terminfo install location': terminfo_install_location,
},

View file

@ -1,5 +1,8 @@
option('ime', type: 'boolean', value: true, description: 'IME (Input Method Editor) support')
option('grapheme-clustering', type: 'feature',
description: 'Enables grapheme clustering using libutf8proc. Requires fcft with harfbuzz support to be useful.')
option('terminfo', type: 'feature', description: 'Build terminfo. When disabled, foot\'s terminfo will not be built, and foot will default to \'xterm-256color\' instead of \'foot\'.')
option('terminfo-install-location', type: 'string', description: 'Where to install the foot terminfo files, relative to the installation prefix. If set to \'disabled\', the terminfo files are not installed at all (useful when packaging the terminfo files in a separate package). Defaults to $datadir/terminfo.')

128
render.c
View file

@ -500,19 +500,15 @@ render_cell(struct terminal *term, pixman_image_t *pix,
}
struct fcft_font *font = attrs_to_font(term, &cell->attrs);
const struct fcft_glyph *glyph = NULL;
const struct composed *composed = NULL;
const struct fcft_grapheme *grapheme = NULL;
const struct fcft_glyph *single = NULL;
const struct fcft_glyph **glyphs = NULL;
unsigned glyph_count = 0;
wchar_t base = cell->wc;
if (base != 0) {
if (base >= CELL_COMB_CHARS_LO &&
base < (CELL_COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - CELL_COMB_CHARS_LO];
base = composed->base;
}
if (unlikely(
/* Classic box drawings */
(base >= 0x2500 && base <= 0x259f) ||
@ -528,7 +524,7 @@ render_cell(struct terminal *term, pixman_image_t *pix,
(base >= 0x1fb00 && base <= 0x1fb3b) ||
/* Unicode 13 partial blocks */
/* TODO: there's more here! */
/* TODO: there's more here! */
(base >= 0x1fb70 && base <= 0x1fb8b)) &&
likely(!term->conf->box_drawings_uses_font_glyphs))
@ -542,7 +538,7 @@ render_cell(struct terminal *term, pixman_image_t *pix,
xassert(idx < ALEN(term->box_drawing));
if (likely(term->box_drawing[idx] != NULL))
glyph = term->box_drawing[idx];
single = term->box_drawing[idx];
else {
mtx_lock(&term->render.workers.lock);
@ -551,15 +547,45 @@ render_cell(struct terminal *term, pixman_image_t *pix,
term->box_drawing[idx] = box_drawing(term, base);
mtx_unlock(&term->render.workers.lock);
glyph = term->box_drawing[idx];
xassert(glyph != NULL);
single = term->box_drawing[idx];
xassert(single != NULL);
}
} else
glyph = fcft_glyph_rasterize(font, base, term->font_subpixel);
glyph_count = 1;
glyphs = &single;
}
else if (base >= CELL_COMB_CHARS_LO &&
base < (CELL_COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - CELL_COMB_CHARS_LO];
base = composed->chars[0];
if (term->conf->can_shape_grapheme && term->conf->tweak.grapheme_shaping) {
grapheme = fcft_grapheme_rasterize(
font, composed->count, composed->chars,
0, NULL, term->font_subpixel);
}
if (grapheme != NULL) {
composed = NULL;
glyphs = grapheme->glyphs;
glyph_count = grapheme->count;
}
}
if (single == NULL && grapheme == NULL) {
xassert(base != 0);
single = fcft_glyph_rasterize(font, base, term->font_subpixel);
glyph_count = 1;
glyphs = &single;
}
}
assert(glyph_count == 0 || glyphs != NULL);
const int cols_left = term->cols - col;
int cell_cols = glyph != NULL ? max(1, min(glyph->cols, cols_left)) : 1;
int cell_cols = glyph_count > 0 ? max(1, min(glyphs[0]->cols, cols_left)) : 1;
/*
* Hack!
@ -580,15 +606,15 @@ render_cell(struct terminal *term, pixman_image_t *pix,
* - *this* cells is followed by an empty cell, or a space
*/
if (term->conf->tweak.allow_overflowing_double_width_glyphs &&
((glyph != NULL &&
glyph->cols == 1 &&
glyph->width >= term->cell_width * 15 / 10 &&
glyph->width < 3 * term->cell_width &&
col < term->cols - 1) ||
(term->conf->tweak.pua_double_width &&
((base >= 0x00e000 && base <= 0x00f8ff) ||
(base >= 0x0f0000 && base <= 0x0ffffd) ||
(base >= 0x100000 && base <= 0x10fffd)))) &&
((glyph_count > 0 &&
glyphs[0]->cols == 1 &&
glyphs[0]->width >= term->cell_width * 15 / 10 &&
glyphs[0]->width < 3 * term->cell_width &&
col < term->cols - 1 ||
(term->conf->tweak.pua_double_width &&
((base >= 0x00e000 && base <= 0x00f8ff) ||
(base >= 0x0f0000 && base <= 0x0ffffd) ||
(base >= 0x100000 && base <= 0x10fffd))))) &&
(row->cells[col + 1].wc == 0 || row->cells[col + 1].wc == L' '))
{
cell_cols = 2;
@ -632,33 +658,43 @@ render_cell(struct terminal *term, pixman_image_t *pix,
pixman_image_t *clr_pix = pixman_image_create_solid_fill(&fg);
if (glyph != NULL) {
const int letter_x_ofs = term->font_x_ofs;
for (unsigned i = 0; i < glyph_count; i++) {
const int letter_x_ofs = i == 0 ? term->font_x_ofs : 0;
const struct fcft_glyph *glyph = glyphs[i];
if (glyph == NULL)
continue;
int g_x = glyph->x;
int g_y = glyph->y;
if (i > 0 && glyph->x >= 0)
g_x -= term->cell_width;
if (unlikely(pixman_image_get_format(glyph->pix) == PIXMAN_a8r8g8b8)) {
/* Glyph surface is a pre-rendered image (typically a color emoji...) */
if (!(cell->attrs.blink && term->blink.state == BLINK_OFF)) {
pixman_image_composite32(
PIXMAN_OP_OVER, glyph->pix, NULL, pix, 0, 0, 0, 0,
x + letter_x_ofs + glyph->x, y + font_baseline(term) - glyph->y,
x + letter_x_ofs + g_x, y + font_baseline(term) - g_y,
glyph->width, glyph->height);
}
} else {
pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, glyph->pix, pix, 0, 0, 0, 0,
x + letter_x_ofs + glyph->x, y + font_baseline(term) - glyph->y,
x + letter_x_ofs + g_x, y + font_baseline(term) - g_y,
glyph->width, glyph->height);
}
/* Combining characters */
if (composed != NULL) {
assert(glyph_count == 1);
/* Combining characters */
if (composed != NULL) {
for (size_t i = 0; i < composed->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize(
font, composed->combining[i], term->font_subpixel);
for (size_t i = 1; i < composed->count; i++) {
const struct fcft_glyph *g = fcft_glyph_rasterize(
font, composed->chars[i], term->font_subpixel);
if (g == NULL)
continue;
if (g == NULL)
continue;
/*
* Fonts _should_ assume the pen position is now
@ -677,16 +713,22 @@ render_cell(struct terminal *term, pixman_image_t *pix,
* somewhat deal with double-width glyphs we use
* an offset of *one* cell.
*/
int x_ofs = g->x < 0
? cell_cols * term->cell_width
: (cell_cols - 1) * term->cell_width;
int x_ofs = g->x < 0
? cell_cols * term->cell_width
: (cell_cols - 1) * term->cell_width;
pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
x + letter_x_ofs + x_ofs + g->x, y + font_baseline(term) - g->y,
g->width, g->height);
pixman_image_composite32(
PIXMAN_OP_OVER, clr_pix, g->pix, pix, 0, 0, 0, 0,
/* Some fonts use a negative offset, while others use a
* "normal" offset */
x + x_ofs + g->x,
y + font_baseline(term) - g->y,
g->width, g->height);
}
}
}
x += glyph->advance.x;
}
pixman_image_unref(clr_pix);

View file

@ -249,7 +249,7 @@ matches_cell(const struct terminal *term, const struct cell *cell, size_t search
base < (CELL_COMB_CHARS_LO + term->composed_count))
{
composed = &term->composed[base - CELL_COMB_CHARS_LO];
base = composed->base;
base = composed->chars[0];
}
if (composed == NULL && base == 0 && term->search.buf[search_ofs] == L' ')
@ -262,8 +262,8 @@ matches_cell(const struct terminal *term, const struct cell *cell, size_t search
if (search_ofs + 1 + composed->count > term->search.len)
return -1;
for (size_t j = 0; j < composed->count; j++) {
if (composed->combining[j] != term->search.buf[search_ofs + 1 + j])
for (size_t j = 1; j < composed->count; j++) {
if (composed->chars[j] != term->search.buf[search_ofs + 1 + j])
return -1;
}
}

View file

@ -249,7 +249,7 @@ selection_find_word_boundary_left(struct terminal *term, struct coord *pos,
if (c >= CELL_COMB_CHARS_LO &&
c < (CELL_COMB_CHARS_LO + term->composed_count))
{
c = term->composed[c - CELL_COMB_CHARS_LO].base;
c = term->composed[c - CELL_COMB_CHARS_LO].chars[0];
}
bool initial_is_space = c == 0 || iswspace(c);
@ -289,7 +289,7 @@ selection_find_word_boundary_left(struct terminal *term, struct coord *pos,
if (c >= CELL_COMB_CHARS_LO &&
c < (CELL_COMB_CHARS_LO + term->composed_count))
{
c = term->composed[c - CELL_COMB_CHARS_LO].base;
c = term->composed[c - CELL_COMB_CHARS_LO].chars[0];
}
bool is_space = c == 0 || iswspace(c);
@ -328,7 +328,7 @@ selection_find_word_boundary_right(struct terminal *term, struct coord *pos,
if (c >= CELL_COMB_CHARS_LO &&
c < (CELL_COMB_CHARS_LO + term->composed_count))
{
c = term->composed[c - CELL_COMB_CHARS_LO].base;
c = term->composed[c - CELL_COMB_CHARS_LO].chars[0];
}
bool initial_is_space = c == 0 || iswspace(c);
@ -370,7 +370,7 @@ selection_find_word_boundary_right(struct terminal *term, struct coord *pos,
if (c >= CELL_COMB_CHARS_LO &&
c < (CELL_COMB_CHARS_LO + term->composed_count))
{
c = term->composed[c - CELL_COMB_CHARS_LO].base;
c = term->composed[c - CELL_COMB_CHARS_LO].chars[0];
}
bool is_space = c == 0 || iswspace(c);

View file

@ -8,6 +8,10 @@
#include <threads.h>
#include <semaphore.h>
#if defined(FOOT_GRAPHEME_CLUSTERING)
#include <utf8proc.h>
#endif
#include <tllist.h>
#include <fcft/fcft.h>
@ -81,8 +85,7 @@ struct damage {
};
struct composed {
wchar_t base;
wchar_t combining[5];
wchar_t chars[20];
uint8_t count;
};
@ -152,6 +155,9 @@ struct vt_param {
struct vt {
int state; /* enum state */
wchar_t last_printed;
#if defined(FOOT_GRAPHEME_CLUSTERING)
utf8proc_int32_t grapheme_state;
#endif
wchar_t utf8;
struct {
struct vt_param v[16];
@ -720,3 +726,10 @@ void term_collect_urls(struct terminal *term);
void term_osc8_open(struct terminal *term, uint64_t id, const char *uri);
void term_osc8_close(struct terminal *term);
static inline void term_reset_grapheme_state(struct terminal *term)
{
#if defined(FOOT_GRAPHEME_CLUSTERING)
term->vt.grapheme_state = 0;
#endif
}

8
util.h
View file

@ -35,3 +35,11 @@ sdbm_hash(const char *s)
return hash;
}
#include <wchar.h>
static inline int
my_wcswidth(const wchar_t *s, size_t n)
{
int ret = wcswidth(s, n);
return max(0, ret);
}

242
vt.c
View file

@ -4,9 +4,14 @@
#include <string.h>
#include <unistd.h>
#if defined(FOOT_GRAPHEME_CLUSTERING)
#include <utf8proc.h>
#endif
#define LOG_MODULE "vt"
#define LOG_ENABLE_DBG 0
#include "log.h"
#include "config.h"
#include "csi.h"
#include "dcs.h"
#include "debug.h"
@ -283,6 +288,7 @@ action_execute(struct terminal *term, uint8_t c)
static void
action_print(struct terminal *term, uint8_t c)
{
term_reset_grapheme_state(term);
term->ascii_printer(term, c);
}
@ -583,152 +589,166 @@ static void
action_utf8_print(struct terminal *term, wchar_t wc)
{
int width = wcwidth(wc);
const bool grapheme_clustering = term->conf->tweak.grapheme_shaping;
/*
* Is this is combining character? The basic assumption is that if
* wcwdith() returns 0, then it *is* a combining character.
*
* We hen optimize this by ignoring all characters before 0x0300,
* since there aren't any zero-width characters there. This means
* all "normal" western characters will quickly be categorized as
* *not* being combining characters.
*
* TODO: xterm does more or less the same, but also filters a
* small subset of BIDI control characters. Should we too? I think
* what we have here is good enough - a control character
* shouldn't have a glyph associated with it, so rendering
* shouldn't be affected.
*
* TODO: handle line-wrap when locating the base character.
*/
if (width == 0 && wc >= 0x0300 && term->grid->cursor.point.col > 0) {
const struct row *row = term->grid->cur_row;
#if !defined(FOOT_GRAPHEME_CLUSTERING)
xassert(!grapheme_clustering);
#endif
int base_col = term->grid->cursor.point.col;
if (term->grid->cursor.point.col > 0 &&
(grapheme_clustering ||
(!grapheme_clustering && width == 0 && wc >= 0x300)))
{
int col = term->grid->cursor.point.col;
if (!term->grid->cursor.lcf)
base_col--;
col--;
while (row->cells[base_col].wc >= CELL_SPACER && base_col > 0)
base_col--;
/* Skip past spacers */
struct row *row = term->grid->cur_row;
while (row->cells[col].wc >= CELL_SPACER && col > 0)
col--;
xassert(base_col >= 0 && base_col < term->cols);
wchar_t base = row->cells[base_col].wc;
xassert(col >= 0 && col < term->cols);
wchar_t base = row->cells[col].wc;
wchar_t UNUSED last = base;
/* Is base cell already a cluster? */
const struct composed *composed =
(base >= CELL_COMB_CHARS_LO &&
base < (CELL_COMB_CHARS_LO + term->composed_count))
? &term->composed[base - CELL_COMB_CHARS_LO]
: NULL;
if (composed != NULL)
base = composed->base;
if (composed != NULL) {
base = composed->chars[0];
last = composed->chars[composed->count - 1];
}
#if defined(FOOT_GRAPHEME_CLUSTERING)
if (grapheme_clustering) {
/* Check if we're on a grapheme cluster break */
/* Note: utf8proc fails to ZWJ */
if (utf8proc_grapheme_break_stateful(last, wc, &term->vt.grapheme_state) &&
last != 0x200d /* ZWJ */)
{
term_reset_grapheme_state(term);
if (width > 0)
term_print(term, wc, width);
return;
}
}
#endif
int base_width = wcwidth(base);
term->grid->cursor.point.col = col;
term->grid->cursor.lcf = false;
if (base != 0 && base_width > 0) {
if (composed == NULL) {
bool base_from_primary;
bool comb_from_primary;
bool pre_from_primary;
wchar_t precomposed = fcft_precompose(
term->fonts[0], base, wc, &base_from_primary,
&comb_from_primary, &pre_from_primary);
int precomposed_width = wcwidth(precomposed);
/*
* If this is the *first* combining characger, see if
* there's a pre-composed character of this combo, with
* the same column width as the base character.
* Only use the pre-composed character if:
*
* If there is, replace the base character with the
* pre-composed character, as that is likely to produce a
* better looking result.
* 1. we *have* a pre-composed character
* 2. the width matches the base characters width
* 3. it's in the primary font, OR one of the base or
* combining characters are *not* from the primary
* font
*/
term->grid->cursor.point.col = base_col;
term->grid->cursor.lcf = false;
if (composed == NULL) {
bool base_from_primary;
bool comb_from_primary;
bool pre_from_primary;
wchar_t precomposed = fcft_precompose(
term->fonts[0], base, wc, &base_from_primary,
&comb_from_primary, &pre_from_primary);
int precomposed_width = wcwidth(precomposed);
/*
* Only use the pre-composed character if:
*
* 1. we *have* a pre-composed character
* 2. the width matches the base characters width
* 3. it's in the primary font, OR one of the base or
* combining characters are *not* from the primary
* font
*/
if (precomposed != (wchar_t)-1 &&
precomposed_width == base_width &&
(pre_from_primary ||
!base_from_primary ||
!comb_from_primary))
{
term_print(term, precomposed, precomposed_width);
return;
}
if (precomposed != (wchar_t)-1 &&
precomposed_width == base_width &&
(pre_from_primary ||
!base_from_primary ||
!comb_from_primary))
{
term_reset_grapheme_state(term);
term_print(term, precomposed, precomposed_width);
return;
}
}
size_t wanted_count = composed != NULL ? composed->count + 1 : 1;
if (wanted_count > ALEN(composed->combining)) {
xassert(composed != NULL);
size_t wanted_count = composed != NULL ? composed->count + 1 : 2;
if (wanted_count > ALEN(composed->chars)) {
xassert(composed != NULL);
#if defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG
LOG_WARN("combining character overflow:");
LOG_WARN(" base: 0x%04x", composed->base);
for (size_t i = 0; i < composed->count; i++)
LOG_WARN(" cc: 0x%04x", composed->combining[i]);
LOG_ERR(" new: 0x%04x", wc);
LOG_WARN("combining character overflow:");
LOG_WARN(" base: 0x%04x", composed->chars[0]);
for (size_t i = 1; i < composed->count; i++)
LOG_WARN(" cc: 0x%04x", composed->chars[i]);
LOG_ERR(" new: 0x%04x", wc);
#endif
/* This are going to break anyway... */
wanted_count--;
/* This is going to break anyway... */
wanted_count--;
}
xassert(wanted_count <= ALEN(composed->chars));
/* Look for existing combining chain */
for (size_t i = 0; i < term->composed_count; i++) {
const struct composed *cc = &term->composed[i];
if (cc->count != wanted_count)
continue;
if (cc->chars[0] != base)
continue;
bool match = true;
for (size_t j = 1; j < wanted_count - 1; j++) {
if (cc->chars[j] != composed->chars[j]) {
match = false;
break;
}
}
if (!match)
continue;
xassert(wanted_count <= ALEN(composed->combining));
if (cc->chars[wanted_count - 1] != wc)
continue;
/* Look for existing combining chain */
for (size_t i = 0; i < term->composed_count; i++) {
const struct composed *cc = &term->composed[i];
if (cc->base != base)
continue;
int grapheme_width = my_wcswidth(cc->chars, cc->count);
if (grapheme_width > 0)
term_print(term, CELL_COMB_CHARS_LO + i, grapheme_width);
return;
}
if (cc->count != wanted_count)
continue;
/* Allocate new chain */
if (cc->combining[wanted_count - 1] != wc)
continue;
struct composed new_cc;
new_cc.count = wanted_count;
new_cc.chars[0] = base;
for (size_t i = 1; i < wanted_count - 1; i++)
new_cc.chars[i] = composed->chars[i];
new_cc.chars[wanted_count - 1] = wc;
term_print(term, CELL_COMB_CHARS_LO + i, base_width);
return;
}
if (term->composed_count < CELL_COMB_CHARS_HI) {
term->composed_count++;
term->composed = xrealloc(term->composed, term->composed_count * sizeof(term->composed[0]));
term->composed[term->composed_count - 1] = new_cc;
/* Allocate new chain */
struct composed new_cc;
new_cc.base = base;
new_cc.count = wanted_count;
for (size_t i = 0; i < wanted_count - 1; i++)
new_cc.combining[i] = composed->combining[i];
new_cc.combining[wanted_count - 1] = wc;
if (term->composed_count < CELL_COMB_CHARS_HI) {
term->composed_count++;
term->composed = xrealloc(term->composed, term->composed_count * sizeof(term->composed[0]));
term->composed[term->composed_count - 1] = new_cc;
term_print(term, CELL_COMB_CHARS_LO + term->composed_count - 1, base_width);
return;
} else {
/* We reached our maximum number of allowed composed
* character chains. Fall through here and print the
* current zero-width character to the current cell */
LOG_WARN("maximum number of composed characters reached");
}
int grapheme_width = my_wcswidth(new_cc.chars, new_cc.count);
if (grapheme_width > 0)
term_print(term, CELL_COMB_CHARS_LO + term->composed_count - 1, grapheme_width);
return;
} else {
/* We reached our maximum number of allowed composed
* character chains. Fall through here and print the
* current zero-width character to the current cell */
LOG_WARN("maximum number of composed characters reached");
}
}
term_reset_grapheme_state(term);
if (width > 0)
term_print(term, wc, width);
}