From fbcb30bf9870aa2b4d75376b3a02703576a715e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 13 Jun 2022 13:11:56 +0200 Subject: [PATCH] vt: improve key calculation for compose sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Don’t assume 32 bits when rotating the old key. Use the number of actual bits available, as determined by CELL_COMB_CHARS_{HI,LO} * Multiply with magic hash constant This greatly reduces the number of collisions seen. For example, the Emoji test file (from the Unicode specification), now has zero collisions. --- vt.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/vt.c b/vt.c index 9746364b..54fd643c 100644 --- a/vt.c +++ b/vt.c @@ -622,8 +622,21 @@ action_put(struct terminal *term, uint8_t c) static inline uint32_t chain_key(uint32_t old_key, uint32_t new_wc) { - /* Rotate left 8 bits, xor with new char */ - return ((old_key << 8) | (old_key >> (32 - 8))) ^ new_wc; + unsigned bits = 32 - __builtin_clz(CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO); + + /* Rotate old key 8 bits */ + uint32_t new_key = (old_key << 8) | (old_key >> (bits - 8)); + + /* xor with new char */ + new_key ^= new_wc; + + /* Multiply with magic hash constant */ + new_key *= 2654435761; + + /* And mask, to ensure the new value is within range */ + new_key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; + + return new_key; } static void @@ -668,8 +681,6 @@ action_utf8_print(struct terminal *term, char32_t wc) } else key = chain_key(base, wc); - key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO; - #if defined(FOOT_GRAPHEME_CLUSTERING) if (grapheme_clustering) { /* Check if we're on a grapheme cluster break */ @@ -767,6 +778,10 @@ action_utf8_print(struct terminal *term, char32_t wc) cc->count != wanted_count || cc->chars[wanted_count - 1] != wc) { +#if 0 + LOG_WARN("COLLISION: base: %04x/%04x, count: %d/%zu, last: %04x/%04x", + cc->chars[0], base, cc->count, wanted_count, cc->chars[wanted_count - 1], wc); +#endif key++; collision_count++; continue;