diff --git a/config.c b/config.c index 68641647..ac0f9688 100644 --- a/config.c +++ b/config.c @@ -2371,7 +2371,7 @@ parse_section_tweak(struct context *ctx) return value_to_enum( ctx, - (const char *[]){"wcswidth", "double-width", "max", NULL}, + (const char *[]){"default", "wcswidth", "double-width", "max", NULL}, (int *)&conf->tweak.grapheme_width_method); } @@ -2977,7 +2977,7 @@ config_load(struct config *conf, const char *conf_path, #if defined(FOOT_GRAPHEME_CLUSTERING) && FOOT_GRAPHEME_CLUSTERING .grapheme_shaping = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING, #endif - .grapheme_width_method = GRAPHEME_WIDTH_WCSWIDTH, + .grapheme_width_method = GRAPHEME_WIDTH_DEFAULT, .delayed_render_lower_ns = 500000, /* 0.5ms */ .delayed_render_upper_ns = 16666666 / 2, /* half a frame period (60Hz) */ .max_shm_pool_size = 512 * 1024 * 1024, diff --git a/config.h b/config.h index d35abbb2..b2a2d050 100644 --- a/config.h +++ b/config.h @@ -326,6 +326,7 @@ struct config { bool overflowing_glyphs; bool grapheme_shaping; enum { + GRAPHEME_WIDTH_DEFAULT, GRAPHEME_WIDTH_WCSWIDTH, GRAPHEME_WIDTH_DOUBLE, GRAPHEME_WIDTH_MAX, diff --git a/doc/foot.ini.5.scd b/doc/foot.ini.5.scd index e18a6208..b52b3bd2 100644 --- a/doc/foot.ini.5.scd +++ b/doc/foot.ini.5.scd @@ -1229,21 +1229,63 @@ any of these options. *grapheme-width-method* Selects which method to use when calculating the width - (i.e. number of columns) of a grapheme cluster. One of - *wcswidth*, *double-width* and *max*. + (i.e. number of columns) of a grapheme cluster. One of *default*, + *wcswidth*, *double-width* and *max*. See details below. - *wcswidth* simply adds together the individual width of all - codepoints making up the cluster. + Background: + + Glyphs rendered in the terminal may be 1 or 2 columns wide. The + terminal emulator and the application running in the terminal must + agree what that width is, or you will see cursor + de-synchronization. This typically manifests itself as rendering + glitches and the cursor "jumping" when moving it over problematic + characters. + + Most glyphs consists of a single Unicode codepoint. In this case, + there is usually no issues. + + Some glyphs however consists of multiple codepoints. These are + called grapheme clusters, and can often be problematic. + + While there is a standard function to calculate the width of a + single codepoint (*wcwidth*(3p)), there is no such function for + calculating the width of a grapheme cluster. + + The closest thing we have is *wcswidth*(3p), that calculates the + width of a string. It is usually implemented by adding together + the widths of all codepoints that make up the string. This is not + correct for grapheme clusters. Take πŸ‘©πŸΎβ€πŸš€for example (WOMAN + ASTRONAUT: MEDIUM-DARK SKIN TONE). It consists of 4 codepoints: + WOMAN + MEDIUM-DARK SKIN TONE + ZWJ + ROCKET. The sum of the + widths is 6. + + Some applications are better at calculating correct grapheme + cluster widths than other. The *grapheme-width-method* option lets + you choose the method that works best for your use case. + + *default* attempts to do "the right thing". When used with + applications that does not handle Unicode grapheme clusters + correctly, you will see cursor de-synchronization. + + *wcswidth* simply adds together the individual widths of all + codepoints making up the cluster. This is typically the most + compatible mode, at least for "older" applications. + + It will allocate too few cells for legacy emojis with an explicit + variation selector. + + It will allocate too many cells for complex emojis consisting of + multiple codepoints. *double-width* does the same, but limits the maximum number of - columns to 2. This is more correct, but may break some - applications since applications typically use *wcswidth*(3) - internally to calculate the width. This results in cursor - de-synchronization issues. + columns to 2. This is usually more correct, but like *default*, + will result in cursor de-synchronization when used with + applications that are not Unicode grapheme cluster + aware. *max* uses the width of the largest codepoint in the cluster. - Default: _wcswidth_ + Default: _default_ *font-monospace-warn* Boolean. When enabled, foot will use heuristics to try to verify diff --git a/vt.c b/vt.c index 91f00e6f..0ac2c087 100644 --- a/vt.c +++ b/vt.c @@ -831,6 +831,32 @@ action_utf8_print(struct terminal *term, char32_t wc) composed != NULL ? composed->width : base_width; switch (term->conf->tweak.grapheme_width_method) { + case GRAPHEME_WIDTH_DEFAULT: + if (last == 0x200d) { + /* Last character is ZWJ - ignore this characters width */ + width = grapheme_width; + } else if ((wc >= 0x1f3fb && wc <= 0x1f3ff /* skin-tone */) || + (wc >= 0x1f9b0 && wc <= 0x1f9b3 /* hair-style */)) + { + width = max(2, grapheme_width); + } else if (last >= 0x1f1e6 && last <= 0x1f1ff && + wc >= 0x1f1e6 && wc <= 0x1f1ff) + { + /* Last character and this character are country + * letters, meaning the grapheme is a flag */ + /* TODO: only do this for valid flag combinations */ + width = 2; + } else if (unlikely(wc == 0xfe0f)) { + /* Variation selector 16 - graphical presentation */ + width = 2; + } else { + /* Emulate wcswidth() */ + width += grapheme_width; + } + + new_cc->width = width; + break; + case GRAPHEME_WIDTH_MAX: new_cc->width = max(grapheme_width, width); break;