mirror of
https://gitlab.freedesktop.org/wayland/wayland.git
synced 2025-10-29 05:40:16 -04:00
Merge branch 'utf8-check' into 'main'
connection: check that strings are valid UTF-8 See merge request wayland/wayland!422
This commit is contained in:
commit
4f7b9e3e5a
1 changed files with 132 additions and 8 deletions
140
src/connection.c
140
src/connection.c
|
|
@ -887,6 +887,123 @@ wl_closure_vmarshal(struct wl_object *sender, uint32_t opcode, va_list ap,
|
|||
return wl_closure_marshal(sender, opcode, args, message);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the provided NUL-terminated string is valid UTF-8. Returns a
|
||||
* pointer to the byte _after_ the terminating NUL, or NULL if invalid UTF-8
|
||||
* is found first.
|
||||
*/
|
||||
static const unsigned char *
|
||||
wayland_string_validate(const unsigned char *const s,
|
||||
const struct wl_message *const message)
|
||||
{
|
||||
const unsigned char *p = s;
|
||||
unsigned char first, b;
|
||||
|
||||
for (;;) {
|
||||
bool okay;
|
||||
|
||||
/*
|
||||
* Many strings are ASCII, so handle them in a fast path.
|
||||
* This loop skips all non-NUL ASCII characters.
|
||||
*/
|
||||
do {
|
||||
first = *p++;
|
||||
} while (first > 0 && first < 0x80);
|
||||
|
||||
if (first == 0x0)
|
||||
return p;
|
||||
|
||||
/*
|
||||
* Validate that the string is well-formed UTF-8.
|
||||
* ASCII bytes have already been checked for. Start by rejecting the
|
||||
* following values, which are not valid as the start of a multibyte UTF-8
|
||||
* sequence.
|
||||
*
|
||||
* 0x80 ... 0xBF: 0b10xxxxxx, continuation bytes.
|
||||
* 0xC0 ... 0xC1: 0b1100000x 0b10xxxxxx, overlong encoding of 0x0 ... 0x7F.
|
||||
* 0xF5 ... 0xF7: 0b111101xx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx where at least
|
||||
* one of the first two "x" bits is set. This corresponds
|
||||
* to 0b1 xxxx xxxx xxxx xxxx xxxx with at least one of the
|
||||
* first two "x" bits set, or 0x140000 ... 0x1FFFFF. These
|
||||
* all exceed 0x10FFFF, the largest Unicode code point.
|
||||
* 0xF8 ... 0xFF: These are of the form 0b11111xxx. They
|
||||
* correspond to 5-, 6-, 7-, or 8-byte encodings,
|
||||
* which are all invalid: every code point can be expressed
|
||||
* in at most 4 bytes, so longer encodings are either
|
||||
* overlong or exceed the 0x10FFFF limit.
|
||||
*/
|
||||
if (first < 0xC2 || first > 0xF4) {
|
||||
wl_log("string has invalid UTF-8 start byte 0x%hhx "
|
||||
"at offset %td, message %s(%s)\n", first, p - s,
|
||||
message->name, message->signature);
|
||||
return NULL;
|
||||
}
|
||||
b = *p++;
|
||||
switch (first) {
|
||||
case 0xe0:
|
||||
/*
|
||||
* 3-byte encoding of form 0b11100000 0b10xxxxxx 0b10xxxxxx.
|
||||
* The greatest 2-byte encoding is 0b11011111 0b10111111 or
|
||||
* 0b011111111111, which is 0x7FF. Therefore, for the encoding
|
||||
* to not be overlong, the first "x" bit must be set.
|
||||
*/
|
||||
okay = (b >= 0xa0 && b <= 0xbf);
|
||||
break;
|
||||
case 0xed:
|
||||
/*
|
||||
* 3-byte encoding of form 0b11101101 0b10xxxxxx 0b10xxxxxx,
|
||||
* or 0xDxxx. Values in the range 0xD800 ... 0xDFFF are surrogates,
|
||||
* which are not valid Unicode code points. Therefore, only values
|
||||
* in the range 0xD000 ... 0xD7FF are permitted, meaning that the
|
||||
* first "x" bit must be clear.
|
||||
*/
|
||||
okay = (b >= 0x80 && b <= 0x9f);
|
||||
break;
|
||||
case 0xf0:
|
||||
/*
|
||||
* 4-byte encoding of form 0b11110000 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx.
|
||||
* The greatest 3-byte encoding is 0b11101111 0b10111111 0b10111111 or
|
||||
* 0b001111 111111 111111, which is 0xFFFF. Therefore, for the encoding
|
||||
* to not be overlong, the first continuation byte must be at least
|
||||
* 0b10010000, or 0x90.
|
||||
*/
|
||||
okay = (b >= 0x90 && b <= 0xbf);
|
||||
break;
|
||||
case 0xf4:
|
||||
/*
|
||||
* 4-byte encoding of form 0b11110100 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx.
|
||||
* The largest Unicode code point is 0x10FFFF, encoded as
|
||||
* 0b11110100 0b10001111 0b10111111 0b10111111. Therefore,
|
||||
* the first continuation byte must not exceed 0b10001111,
|
||||
* or 0x8F.
|
||||
*/
|
||||
okay = (b >= 0x80 && b <= 0x8F);
|
||||
break;
|
||||
default:
|
||||
/* Default range is 0x80 to 0xBF */
|
||||
okay = (b >= 0x80 && b <= 0xBF);
|
||||
break;
|
||||
}
|
||||
if (!okay)
|
||||
break; /* invalid */
|
||||
if (first < 0xE0)
|
||||
continue; /* 2 byte */
|
||||
b = *p++;
|
||||
if (b < 0x80 || b > 0xBF)
|
||||
break; /* invalid */
|
||||
if (first < 0xF0)
|
||||
continue; /* 3 bytes */
|
||||
b = *p++;
|
||||
if (b < 0x80 || b > 0xBF)
|
||||
break; /* invalid */
|
||||
/* valid 4 byte */
|
||||
}
|
||||
wl_log("string has invalid UTF-8 continuation byte 0x%hhx "
|
||||
"at offset %td, message %s(%s)\n", b, p - s,
|
||||
message->name, message->signature);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct wl_closure *
|
||||
wl_connection_demarshal(struct wl_connection *connection,
|
||||
uint32_t size,
|
||||
|
|
@ -895,7 +1012,7 @@ wl_connection_demarshal(struct wl_connection *connection,
|
|||
{
|
||||
uint32_t *p, *next, *end, length, length_in_u32, id;
|
||||
int fd;
|
||||
char *s;
|
||||
const unsigned char *s, *str_end;
|
||||
int i, count, num_arrays;
|
||||
const char *signature;
|
||||
struct argument_details arg;
|
||||
|
|
@ -975,7 +1092,7 @@ wl_connection_demarshal(struct wl_connection *connection,
|
|||
}
|
||||
next = p + length_in_u32;
|
||||
|
||||
s = (char *) p;
|
||||
s = (const unsigned char *)p;
|
||||
|
||||
if (s[length - 1] != '\0') {
|
||||
wl_log("string not nul-terminated, "
|
||||
|
|
@ -985,15 +1102,22 @@ wl_connection_demarshal(struct wl_connection *connection,
|
|||
goto err;
|
||||
}
|
||||
|
||||
if (strlen(s) != length - 1) {
|
||||
wl_log("string has embedded nul at offset %zu, "
|
||||
"message %s(%s)\n", strlen(s),
|
||||
message->name, message->signature);
|
||||
errno = EINVAL;
|
||||
str_end = wayland_string_validate(s, message);
|
||||
if (str_end != (const unsigned char *)p + length) {
|
||||
if (str_end != NULL) {
|
||||
/* NUL byte before end of string */
|
||||
wl_log("string has embedded nul at offset %td, "
|
||||
"message %s(%s)\n", str_end - s,
|
||||
message->name, message->signature);
|
||||
errno = EINVAL;
|
||||
} else {
|
||||
/* Invalid UTF-8 */
|
||||
errno = EILSEQ;
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
|
||||
closure->args[i].s = s;
|
||||
closure->args[i].s = (const char *)s;
|
||||
p = next;
|
||||
break;
|
||||
case WL_ARG_OBJECT:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue