mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-26 01:40:12 -05:00
vt: detect and ignore invalid UTF-8 sequences
This patch detects invalid codepoints in the UTF-8 EDxxxx range, and the F4xxxxxx range. Note that we still allow the E0xxxx and F0xxxxxx ranges. These contains overlong encodings. We allow them, because they still decode into correct UTF-32. Closes #1423
This commit is contained in:
parent
fc973a3bb9
commit
b59fd7c388
1 changed files with 21 additions and 0 deletions
21
vt.c
21
vt.c
|
|
@ -913,6 +913,16 @@ action_utf8_33(struct terminal *term, uint8_t c)
|
||||||
{
|
{
|
||||||
// wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f)
|
// wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f)
|
||||||
term->vt.utf8 |= c & 0x3f;
|
term->vt.utf8 |= c & 0x3f;
|
||||||
|
|
||||||
|
const char32_t utf32 = term->vt.utf8;
|
||||||
|
if (unlikely(utf32 >= 0xd800 && utf32 <= 0xdfff)) {
|
||||||
|
/* Invalid sequence - invalid UTF-16 surrogate halves */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note: the E0 range contains overlong encodings. We don’t try to
|
||||||
|
detect, as they’ll still decode to valid UTF-32. */
|
||||||
|
|
||||||
action_utf8_print(term, term->vt.utf8);
|
action_utf8_print(term, term->vt.utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -942,6 +952,17 @@ action_utf8_44(struct terminal *term, uint8_t c)
|
||||||
{
|
{
|
||||||
// wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f);
|
// wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f);
|
||||||
term->vt.utf8 |= c & 0x3f;
|
term->vt.utf8 |= c & 0x3f;
|
||||||
|
|
||||||
|
const char32_t utf32 = term->vt.utf8;
|
||||||
|
|
||||||
|
if (unlikely(utf32 > 0x10FFFF)) {
|
||||||
|
/* Invalid UTF-8 */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note: the F0 range contains overlong encodings. We don’t try to
|
||||||
|
detect, as they’ll still decode to valid UTF-32. */
|
||||||
|
|
||||||
action_utf8_print(term, term->vt.utf8);
|
action_utf8_print(term, term->vt.utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue