From b47c07b9cd35a096dcef2efb37a4d660845c03a5 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Tue, 12 May 2026 23:34:44 -0700 Subject: [PATCH 1/8] module-avb: add UTF-8 validation function --- .../aecp-aem-cmds-resps/cmd-get-set-name.c | 1 + src/modules/module-avb/strings.c | 78 +++++++++++++++++++ src/modules/module-avb/strings.h | 12 +++ 3 files changed, 91 insertions(+) create mode 100644 src/modules/module-avb/strings.c create mode 100644 src/modules/module-avb/strings.h diff --git a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c index f1482996c..b7a5560af 100644 --- a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c +++ b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c @@ -12,6 +12,7 @@ #include "../aecp-aem.h" #include "../aecp-aem-state.h" #include "../aecp-aem-descriptors.h" +#include "../strings.h" #include "cmd-get-set-name.h" diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c new file mode 100644 index 000000000..2f680715d --- /dev/null +++ b/src/modules/module-avb/strings.c @@ -0,0 +1,78 @@ +/* SPDX-FileCopyrightText: Copyright © 2026 Nils Tonnaett + * Nils Tonnaett */ +/* SPDX-License-Identifier: MIT */ + +#include "strings.h" + +typedef enum { + ST_START, + ST_A, + ST_B, + ST_C, + ST_D, + ST_E, + ST_F, + ST_G, + ST_ERROR, +} UTF8_STATE; + +int validate_utf8(uint8_t *str, size_t len) { + UTF8_STATE state = ST_START; + + for (int i = 0; i < len; ++i) { + switch (state) { + case ST_START: + if (str[i] <= 0x7F) { + continue; + } else if (str[i] >= 0xC2 && str[i] <= 0xDF) { + state = ST_A; + } else if (str[i] >= 0xE1 && str[i] <= 0xEC) { + state = ST_B; + } else if (str[i] >= 0xEE && str[i] <= 0xEF) { + state = ST_B; + } else if (str[i] == 0xE0) { + state = ST_C; + } else if (str[i] == 0xED) { + state = ST_D; + } else if (str[i] >= 0xF1 && str[i] <= 0xF3) { + state = ST_E; + } else if (str[i] == 0xF0) { + state = ST_F; + } else if (str[i] >= 0xF4) { + state = ST_G; + } else { + state = ST_ERROR; + } + break; + case ST_A: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_START : ST_ERROR; + break; + case ST_B: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + break; + case ST_C: + state = (str[i] >= 0xA0 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + break; + case ST_D: + state = (str[i] >= 0x80 && str[i] <= 0x9F) ? ST_A : ST_ERROR; + break; + case ST_E: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + break; + case ST_F: + state = (str[i] >= 0x90 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + break; + case ST_G: + state = (str[i] >= 0x80 && str[i] <= 0x8F) ? ST_B : ST_ERROR; + break; + } + if (state == ST_ERROR) { + return -1; + } + } + if (state != ST_START) { + return -1; + } else { + return 0; + } +} diff --git a/src/modules/module-avb/strings.h b/src/modules/module-avb/strings.h new file mode 100644 index 000000000..3cdafc303 --- /dev/null +++ b/src/modules/module-avb/strings.h @@ -0,0 +1,12 @@ +/* SPDX-FileCopyrightText: Copyright © 2026 Nils Tonnaett*/ +/* SPDX-License-Identifier: MIT */ + +#ifndef AVB_STRINGS_H +#define AVB_STRINGS_H + +#include +#include + +int validate_utf8(uint8_t *str, size_t len); + +#endif /* AVB_STRINGS_H */ From 14b1c4d3dd9ff4e45c305e9ca52a5c19b0fbe475 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Wed, 13 May 2026 11:53:43 -0700 Subject: [PATCH 2/8] module-avb: add zero padding check function --- src/modules/module-avb/strings.c | 20 ++++++++++++++++++++ src/modules/module-avb/strings.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c index 2f680715d..169c745c1 100644 --- a/src/modules/module-avb/strings.c +++ b/src/modules/module-avb/strings.c @@ -76,3 +76,23 @@ int validate_utf8(uint8_t *str, size_t len) { return 0; } } + +int check_zero_padding(uint8_t const *str, size_t len) +{ + size_t str_len = strnlen((char *)str, len); + /* String doesn't need to be null-terminated. Return success if there is no null in str */ + if (str_len == len) + { + return 0; + } + + for (int i = str_len; i < len; ++i) + { + if (str[i] != 0x00) + { + return -1; + } + } + + return 0; +} diff --git a/src/modules/module-avb/strings.h b/src/modules/module-avb/strings.h index 3cdafc303..7c607df09 100644 --- a/src/modules/module-avb/strings.h +++ b/src/modules/module-avb/strings.h @@ -6,7 +6,9 @@ #include #include +#include int validate_utf8(uint8_t *str, size_t len); +int check_zero_padding(uint8_t *str, size_t len); #endif /* AVB_STRINGS_H */ From ef77d995cd640a364e2cb6aa129571977a21f851 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Wed, 13 May 2026 11:59:45 -0700 Subject: [PATCH 3/8] module-avb: SET_NAME: check that string is valid utf8 and zero padded --- .../aecp-aem-cmds-resps/cmd-get-set-name.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c index b7a5560af..d5cc552df 100644 --- a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c +++ b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c @@ -146,7 +146,6 @@ int handle_cmd_get_name_common(struct aecp *aecp, int64_t now, /** * IEEE 1722.1-2021 7.4.17 SET_NAME - * For now this is not handling UTF characters, only ASCII */ int handle_cmd_set_name_common(struct aecp *aecp, int64_t now, const void *m, int len) @@ -182,14 +181,17 @@ int handle_cmd_set_name_common(struct aecp *aecp, int64_t now, /** * IEEE 1722.1-2021: 7.4.17.1: The name does not contain a trailing NULL * but if the name is less than 64 bytes in length then it is zero - * padded + * padded. */ - memcpy(name_ptr, cmd->name, 64); + if (check_zero_padding(cmd->name, 64) == -1) + return reply_status(aecp, + AVB_AECP_AEM_STATUS_BAD_ARGUMENTS, m, len); - /** TODO: According to the specification, the string should alwasy be 0 - * terminated, the goal would be to check whether a string is UTF-8 and - * that it is correctly zero terminitaed if less than 64 char, if not - * then a simple memcpy is enough */ + if (validate_utf8(cmd->name, 64) == -1) + return reply_status(aecp, + AVB_AECP_AEM_STATUS_BAD_ARGUMENTS, m, len); + + memcpy(name_ptr, cmd->name, 64); rc = reply_success(aecp, m, len); if (rc < 0) From c9ba3ced910da7086c80042fb8768787bcff9bfc Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Wed, 13 May 2026 21:22:54 -0700 Subject: [PATCH 4/8] module-avb: format strings.c --- src/modules/module-avb/strings.c | 167 ++++++++++++++++++------------- 1 file changed, 97 insertions(+), 70 deletions(-) diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c index 169c745c1..c63bf1ba0 100644 --- a/src/modules/module-avb/strings.c +++ b/src/modules/module-avb/strings.c @@ -5,82 +5,109 @@ #include "strings.h" typedef enum { - ST_START, - ST_A, - ST_B, - ST_C, - ST_D, - ST_E, - ST_F, - ST_G, - ST_ERROR, + ST_START, + ST_A, + ST_B, + ST_C, + ST_D, + ST_E, + ST_F, + ST_G, + ST_ERROR, } UTF8_STATE; -int validate_utf8(uint8_t *str, size_t len) { - UTF8_STATE state = ST_START; +int validate_utf8 (uint8_t *str, size_t len) +{ + UTF8_STATE state = ST_START; - for (int i = 0; i < len; ++i) { - switch (state) { - case ST_START: - if (str[i] <= 0x7F) { - continue; - } else if (str[i] >= 0xC2 && str[i] <= 0xDF) { - state = ST_A; - } else if (str[i] >= 0xE1 && str[i] <= 0xEC) { - state = ST_B; - } else if (str[i] >= 0xEE && str[i] <= 0xEF) { - state = ST_B; - } else if (str[i] == 0xE0) { - state = ST_C; - } else if (str[i] == 0xED) { - state = ST_D; - } else if (str[i] >= 0xF1 && str[i] <= 0xF3) { - state = ST_E; - } else if (str[i] == 0xF0) { - state = ST_F; - } else if (str[i] >= 0xF4) { - state = ST_G; - } else { - state = ST_ERROR; - } - break; - case ST_A: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_START : ST_ERROR; - break; - case ST_B: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_A : ST_ERROR; - break; - case ST_C: - state = (str[i] >= 0xA0 && str[i] <= 0xBF) ? ST_A : ST_ERROR; - break; - case ST_D: - state = (str[i] >= 0x80 && str[i] <= 0x9F) ? ST_A : ST_ERROR; - break; - case ST_E: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_B : ST_ERROR; - break; - case ST_F: - state = (str[i] >= 0x90 && str[i] <= 0xBF) ? ST_B : ST_ERROR; - break; - case ST_G: - state = (str[i] >= 0x80 && str[i] <= 0x8F) ? ST_B : ST_ERROR; - break; - } - if (state == ST_ERROR) { - return -1; - } - } - if (state != ST_START) { - return -1; - } else { - return 0; - } + for (int i = 0; i < len; ++i) + { + switch (state) + { + case ST_START: + if (str[i] <= 0x7F) + { + continue; + } + else if (str[i] >= 0xC2 && str[i] <= 0xDF) + { + state = ST_A; + } + else if (str[i] >= 0xE1 && str[i] <= 0xEC) + { + state = ST_B; + } + else if (str[i] >= 0xEE && str[i] <= 0xEF) + { + state = ST_B; + } + else if (str[i] == 0xE0) + { + state = ST_C; + } + else if (str[i] == 0xED) + { + state = ST_D; + } + else if (str[i] >= 0xF1 && str[i] <= 0xF3) + { + state = ST_E; + } + else if (str[i] == 0xF0) + { + state = ST_F; + } + else if (str[i] >= 0xF4) + { + state = ST_G; + } + else + { + state = ST_ERROR; + } + break; + case ST_A: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_START : ST_ERROR; + break; + case ST_B: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + break; + case ST_C: + state = (str[i] >= 0xA0 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + break; + case ST_D: + state = (str[i] >= 0x80 && str[i] <= 0x9F) ? ST_A : ST_ERROR; + break; + case ST_E: + state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + break; + case ST_F: + state = (str[i] >= 0x90 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + break; + case ST_G: + state = (str[i] >= 0x80 && str[i] <= 0x8F) ? ST_B : ST_ERROR; + break; + } + if (state == ST_ERROR) + { + return -1; + } + } + if (state != ST_START) + { + return -1; + } + else + { + return 0; + } } -int check_zero_padding(uint8_t const *str, size_t len) +int check_zero_padding (uint8_t const *str, size_t len) { - size_t str_len = strnlen((char *)str, len); - /* String doesn't need to be null-terminated. Return success if there is no null in str */ + size_t str_len = strnlen ((char *)str, len); + /* String doesn't need to be null-terminated. Return success if there is no + * null in str */ if (str_len == len) { return 0; From 3bde62bc1dff8a765c99cb0458a096ba53a79e10 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Mon, 18 May 2026 12:46:34 -0700 Subject: [PATCH 5/8] module-avb: build strings.c --- src/modules/meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/modules/meson.build b/src/modules/meson.build index 439a37693..6bd108e95 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -828,7 +828,8 @@ if build_module_avb 'module-avb/msrp.c', 'module-avb/mvrp.c', 'module-avb/srp.c', - 'module-avb/stream.c' + 'module-avb/stream.c', + 'module-avb/strings.c' ], include_directories : [configinc], install : true, From 2dd60fdbc655412ba5cfcf635f4a656e8847b750 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Mon, 18 May 2026 12:47:02 -0700 Subject: [PATCH 6/8] module-avb: fix types --- .../module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c | 4 ++-- src/modules/module-avb/aecp-aem.h | 2 +- src/modules/module-avb/strings.c | 8 ++++---- src/modules/module-avb/strings.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c index d5cc552df..fc51dbf0d 100644 --- a/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c +++ b/src/modules/module-avb/aecp-aem-cmds-resps/cmd-get-set-name.c @@ -156,7 +156,7 @@ int handle_cmd_set_name_common(struct aecp *aecp, int64_t now, const struct avb_packet_aecp_aem_setget_name *cmd; struct descriptor *desc; uint16_t desc_type, desc_id, name_index; - char *name_ptr; + unsigned char *name_ptr; int rc; cmd = (const struct avb_packet_aecp_aem_setget_name *)p->payload; @@ -169,7 +169,7 @@ int handle_cmd_set_name_common(struct aecp *aecp, int64_t now, return reply_status(aecp, AVB_AECP_AEM_STATUS_NO_SUCH_DESCRIPTOR, m, len); - name_ptr = get_name_ptr(desc_type, descriptor_body(desc), name_index); + name_ptr = (unsigned char *)get_name_ptr(desc_type, descriptor_body(desc), name_index); if (name_ptr == NULL) return reply_status(aecp, AVB_AECP_AEM_STATUS_BAD_ARGUMENTS, m, len); diff --git a/src/modules/module-avb/aecp-aem.h b/src/modules/module-avb/aecp-aem.h index c94f416dc..997747ed8 100644 --- a/src/modules/module-avb/aecp-aem.h +++ b/src/modules/module-avb/aecp-aem.h @@ -250,7 +250,7 @@ struct avb_packet_aecp_aem_setget_name { uint16_t descriptor_index; uint16_t name_index; uint16_t configuration_index; - char name[64]; + unsigned char name[64]; } __attribute__ ((__packed__)); struct avb_packet_aecp_aem_setget_association_id { diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c index c63bf1ba0..2a4490a04 100644 --- a/src/modules/module-avb/strings.c +++ b/src/modules/module-avb/strings.c @@ -16,11 +16,11 @@ typedef enum { ST_ERROR, } UTF8_STATE; -int validate_utf8 (uint8_t *str, size_t len) +int validate_utf8 (const unsigned char *str, size_t len) { UTF8_STATE state = ST_START; - for (int i = 0; i < len; ++i) + for (unsigned int i = 0; i < len; ++i) { switch (state) { @@ -103,7 +103,7 @@ int validate_utf8 (uint8_t *str, size_t len) } } -int check_zero_padding (uint8_t const *str, size_t len) +int check_zero_padding (const unsigned char *str, size_t len) { size_t str_len = strnlen ((char *)str, len); /* String doesn't need to be null-terminated. Return success if there is no @@ -113,7 +113,7 @@ int check_zero_padding (uint8_t const *str, size_t len) return 0; } - for (int i = str_len; i < len; ++i) + for (unsigned int i = str_len; i < len; ++i) { if (str[i] != 0x00) { diff --git a/src/modules/module-avb/strings.h b/src/modules/module-avb/strings.h index 7c607df09..731b9e0eb 100644 --- a/src/modules/module-avb/strings.h +++ b/src/modules/module-avb/strings.h @@ -8,7 +8,7 @@ #include #include -int validate_utf8(uint8_t *str, size_t len); -int check_zero_padding(uint8_t *str, size_t len); +int check_zero_padding(const unsigned char *str, size_t len); +int validate_utf8(const unsigned char *str, size_t len); #endif /* AVB_STRINGS_H */ From 4831ba60af44042b60108d6af688359c14f68991 Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Mon, 18 May 2026 12:53:53 -0700 Subject: [PATCH 7/8] module-avb: don't encode error as state --- src/modules/module-avb/strings.c | 43 ++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c index 2a4490a04..f45d04bc0 100644 --- a/src/modules/module-avb/strings.c +++ b/src/modules/module-avb/strings.c @@ -4,6 +4,8 @@ #include "strings.h" +#include + typedef enum { ST_START, ST_A, @@ -13,12 +15,12 @@ typedef enum { ST_E, ST_F, ST_G, - ST_ERROR, } UTF8_STATE; int validate_utf8 (const unsigned char *str, size_t len) { UTF8_STATE state = ST_START; + bool err = false; for (unsigned int i = 0; i < len; ++i) { @@ -63,32 +65,53 @@ int validate_utf8 (const unsigned char *str, size_t len) } else { - state = ST_ERROR; + err = true; } break; case ST_A: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_START : ST_ERROR; + if (str[i] >= 0x80 && str[i] <= 0xBF) + state = ST_START; + else + err = true; break; case ST_B: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + if (str[i] >= 0x80 && str[i] <= 0xBF) + state = ST_A; + else + err = true; break; case ST_C: - state = (str[i] >= 0xA0 && str[i] <= 0xBF) ? ST_A : ST_ERROR; + if (str[i] >= 0xA0 && str[i] <= 0xBF) + state = ST_A; + else + err = true; break; case ST_D: - state = (str[i] >= 0x80 && str[i] <= 0x9F) ? ST_A : ST_ERROR; + if (str[i] >= 0x80 && str[i] <= 0x9F) + state = ST_A; + else + err = true; break; case ST_E: - state = (str[i] >= 0x80 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + if (str[i] >= 0x80 && str[i] <= 0xBF) + state = ST_B; + else + err = true; break; case ST_F: - state = (str[i] >= 0x90 && str[i] <= 0xBF) ? ST_B : ST_ERROR; + if (str[i] >= 0x90 && str[i] <= 0xBF) + state = ST_B; + else + err = true; break; case ST_G: - state = (str[i] >= 0x80 && str[i] <= 0x8F) ? ST_B : ST_ERROR; + if (str[i] >= 0x80 && str[i] <= 0x8F) + state = ST_B; + else + err = true; break; } - if (state == ST_ERROR) + if (err == true) { return -1; } From c732df412c71a2be4632f7ca90234fac83c1ebea Mon Sep 17 00:00:00 2001 From: Nils Tonnaett Date: Fri, 22 May 2026 17:12:32 -0700 Subject: [PATCH 8/8] module-avb: add documentation to strings.c --- src/modules/module-avb/strings.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/modules/module-avb/strings.c b/src/modules/module-avb/strings.c index f45d04bc0..9877c1f8f 100644 --- a/src/modules/module-avb/strings.c +++ b/src/modules/module-avb/strings.c @@ -17,6 +17,14 @@ typedef enum { ST_G, } UTF8_STATE; +/* + * IEEE 1722.1 Section 7.4.17.1 + * + * We need to check if the buffer str of length len is valid UTF-8. + * The algorithm implemented here is based on the state machine by Frank Yung-Fong Tang + * described here at + * https://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + */ int validate_utf8 (const unsigned char *str, size_t len) { UTF8_STATE state = ST_START; @@ -126,6 +134,12 @@ int validate_utf8 (const unsigned char *str, size_t len) } } +/* + * For SET_NAME, strings need to be zero-padded if shorter than 64 bytes. A + * string of 64 bytes would NOT be nul-terminated. + * + * IEEE 1722.1 Section 7.4.17.1 + */ int check_zero_padding (const unsigned char *str, size_t len) { size_t str_len = strnlen ((char *)str, len);