composed: refactor: break out lookup with collision detection

This commit is contained in:
Daniel Eklöf 2025-01-24 14:15:01 +01:00
parent 1181f74d19
commit e248e73753
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
3 changed files with 54 additions and 53 deletions

View file

@ -51,7 +51,7 @@ UNITTEST
xassert(k3 == k4); xassert(k3 == k4);
} }
struct composed * const struct composed *
composed_lookup(struct composed *root, uint32_t key) composed_lookup(struct composed *root, uint32_t key)
{ {
struct composed *node = root; struct composed *node = root;
@ -66,6 +66,41 @@ composed_lookup(struct composed *root, uint32_t key)
return NULL; return NULL;
} }
const struct composed *
composed_lookup_without_collision(struct composed *root, uint32_t *key,
const char32_t *prefix_text, size_t prefix_len,
char32_t wc, int forced_width)
{
while (true) {
const struct composed *cc = composed_lookup(root, *key);
if (cc == NULL)
return NULL;
bool match = cc->count == prefix_len + 1 &&
cc->forced_width == forced_width &&
cc->chars[prefix_len] == wc;
if (match) {
for (size_t i = 0; i < prefix_len; i++) {
if (cc->chars[i] != prefix_text[i]) {
match = false;
break;
}
}
}
if (match)
return cc;
(*key)++;
*key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
/* TODO: this will loop infinitly if the composed table is full */
}
return NULL;
}
void void
composed_insert(struct composed **root, struct composed *node) composed_insert(struct composed **root, struct composed *node)
{ {

View file

@ -10,12 +10,16 @@ struct composed {
uint32_t key; uint32_t key;
uint8_t count; uint8_t count;
uint8_t width; uint8_t width;
uint8_t forced_width;
}; };
uint32_t composed_key_from_chars(const uint32_t chars[], size_t count); uint32_t composed_key_from_chars(const uint32_t chars[], size_t count);
uint32_t composed_key_from_key(uint32_t prev_key, uint32_t next_char); uint32_t composed_key_from_key(uint32_t prev_key, uint32_t next_char);
struct composed *composed_lookup(struct composed *root, uint32_t key); const struct composed *composed_lookup(struct composed *root, uint32_t key);
const struct composed *composed_lookup_without_collision(
struct composed *root, uint32_t *key,
const char32_t *prefix, size_t prefix_len, char32_t wc, int forced_width);
void composed_insert(struct composed **root, struct composed *node); void composed_insert(struct composed **root, struct composed *node);
void composed_free(struct composed *root); void composed_free(struct composed *root);

64
vt.c
View file

@ -793,60 +793,21 @@ action_utf8_print(struct terminal *term, char32_t wc)
xassert(wanted_count <= 255); xassert(wanted_count <= 255);
size_t collision_count = 0; /* Check if we already have a match for the entire compose chain */
const struct composed *cc =
/* Look for existing combining chain */ composed_lookup_without_collision(
while (true) { term->composed, &key,
if (unlikely(collision_count > 128)) { composed != NULL ? composed->chars : &(char32_t){base},
static bool have_warned = false; composed != NULL ? composed->count : 1,
if (!have_warned) { wc, 0);
have_warned = true;
LOG_WARN("ignoring composed character: "
"too many collisions in hash table");
}
return;
}
const struct composed *cc = composed_lookup(term->composed, key);
if (cc == NULL)
break;
/*
* We may have a key collisison, so need to check that
* it's a true match. If not, bump the key and try
* again.
*/
xassert(key == cc->key);
if (cc->chars[0] != base ||
cc->count != wanted_count ||
cc->chars[wanted_count - 1] != wc)
{
#if 0
LOG_WARN("COLLISION: base: %04x/%04x, count: %d/%zu, last: %04x/%04x",
cc->chars[0], base, cc->count, wanted_count, cc->chars[wanted_count - 1], wc);
#endif
key++;
key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
collision_count++;
continue;
}
bool match = composed != NULL
? memcmp(&cc->chars[1], &composed->chars[1],
(wanted_count - 2) * sizeof(cc->chars[0])) == 0
: true;
if (!match) {
key++;
key &= CELL_COMB_CHARS_HI - CELL_COMB_CHARS_LO;
collision_count++;
continue;
}
if (cc != NULL) {
/* We *do* have a match! */
wc = CELL_COMB_CHARS_LO + cc->key; wc = CELL_COMB_CHARS_LO + cc->key;
width = cc->width; width = cc->width;
goto out; goto out;
} else {
/* No match - allocate a new chain below */
} }
if (unlikely(term->composed_count >= if (unlikely(term->composed_count >=
@ -867,6 +828,7 @@ action_utf8_print(struct terminal *term, char32_t wc)
new_cc->count = wanted_count; new_cc->count = wanted_count;
new_cc->chars[0] = base; new_cc->chars[0] = base;
new_cc->chars[wanted_count - 1] = wc; new_cc->chars[wanted_count - 1] = wc;
new_cc->forced_width = 0;
if (composed != NULL) { if (composed != NULL) {
memcpy(&new_cc->chars[1], &composed->chars[1], memcpy(&new_cc->chars[1], &composed->chars[1],
@ -923,7 +885,7 @@ action_utf8_print(struct terminal *term, char32_t wc)
term->composed_count++; term->composed_count++;
composed_insert(&term->composed, new_cc); composed_insert(&term->composed, new_cc);
wc = CELL_COMB_CHARS_LO + key; wc = CELL_COMB_CHARS_LO + new_cc->key;
width = new_cc->width; width = new_cc->width;
xassert(wc >= CELL_COMB_CHARS_LO); xassert(wc >= CELL_COMB_CHARS_LO);