From fc973a3bb934de7fe6001be7b6c39714e7b5470d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 22 Jul 2023 11:21:12 +0200 Subject: [PATCH 1/3] selection: send_clipboard_or_primary(): handle selection text being NULL --- selection.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selection.c b/selection.c index f6349d7f..d1a7ea28 100644 --- a/selection.c +++ b/selection.c @@ -1662,7 +1662,7 @@ send_clipboard_or_primary(struct seat *seat, int fd, const char *selection, return; } - size_t len = strlen(selection); + size_t len = selection != NULL ? strlen(selection) : 0; size_t async_idx = 0; switch (async_write(fd, selection, len, &async_idx)) { @@ -1701,7 +1701,6 @@ send(void *data, struct wl_data_source *wl_data_source, const char *mime_type, struct seat *seat = data; const struct wl_clipboard *clipboard = &seat->clipboard; - xassert(clipboard->text != NULL); send_clipboard_or_primary(seat, fd, clipboard->text, "clipboard"); } @@ -1756,7 +1755,6 @@ primary_send(void *data, struct seat *seat = data; const struct wl_primary *primary = &seat->primary; - xassert(primary->text != NULL); send_clipboard_or_primary(seat, fd, primary->text, "primary"); } From b59fd7c388c8d59a08e7e30f07e4639d2fd5451f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 22 Jul 2023 11:21:41 +0200 Subject: [PATCH 2/3] vt: detect and ignore invalid UTF-8 sequences This patch detects invalid codepoints in the UTF-8 EDxxxx range, and the F4xxxxxx range. Note that we still allow the E0xxxx and F0xxxxxx ranges. These contains overlong encodings. We allow them, because they still decode into correct UTF-32. Closes #1423 --- vt.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/vt.c b/vt.c index 51b69c7e..772bd41f 100644 --- a/vt.c +++ b/vt.c @@ -913,6 +913,16 @@ action_utf8_33(struct terminal *term, uint8_t c) { // wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f) term->vt.utf8 |= c & 0x3f; + + const char32_t utf32 = term->vt.utf8; + if (unlikely(utf32 >= 0xd800 && utf32 <= 0xdfff)) { + /* Invalid sequence - invalid UTF-16 surrogate halves */ + return; + } + + /* Note: the E0 range contains overlong encodings. We don’t try to + detect, as they’ll still decode to valid UTF-32. */ + action_utf8_print(term, term->vt.utf8); } @@ -942,6 +952,17 @@ action_utf8_44(struct terminal *term, uint8_t c) { // wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f); term->vt.utf8 |= c & 0x3f; + + const char32_t utf32 = term->vt.utf8; + + if (unlikely(utf32 > 0x10FFFF)) { + /* Invalid UTF-8 */ + return; + } + + /* Note: the F0 range contains overlong encodings. We don’t try to + detect, as they’ll still decode to valid UTF-32. */ + action_utf8_print(term, term->vt.utf8); } From 8223b4b76cb6ab0d4320859b2497dc222c9762bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sat, 22 Jul 2023 11:23:22 +0200 Subject: [PATCH 3/3] changelog: ignore invalid UTF-8 in input --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84a8aef4..ed0990eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,12 @@ ### Deprecated ### Removed ### Fixed + +* Crash when copying text that contains invalid UTF-8 ([#1423][1423]). + +[1423]: https://codeberg.org/dnkl/foot/issues/1423 + + ### Security ### Contributors