From 14a55de4e721283ab4e9a04ed58db6cecd5fcaa1 Mon Sep 17 00:00:00 2001 From: Craig Barnes Date: Tue, 25 May 2021 19:14:06 +0100 Subject: [PATCH 1/2] vt: remove partial support for 8-bit C1 control chars These are part of the "anywhere" state in Paul Flo Williams' VT parser state diagram[1]. That means that they should be accepted *anywhere* in a byte sequence, including in the middle of other sequences or even in the middle of a multi-byte UTF-8 sequence. Adhering to this requirement makes them incompatible with the use of UTF-8 as a universal encoding. Not adhering to the aforementioned requirement by making a special case for UTF-8 sequences may seem tempting, but it's much more at odds with the relevant standards[2] than it appears on the surface. UTF-8 is not an "8-bit code", at least not according to the parlance of ECMA-43, nor does it map the C1 control range in a compatible way. [1]: https://vt100.net/emu/dec_ansi_parser [2]: ECMA-35, ECMA-43, ECMA-48 --- vt.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/vt.c b/vt.c index 669ba8c1..daa634d1 100644 --- a/vt.c +++ b/vt.c @@ -770,16 +770,9 @@ anywhere(struct terminal *term, uint8_t data) case 0x18: action_execute(term, data); return STATE_GROUND; case 0x1a: action_execute(term, data); return STATE_GROUND; case 0x1b: action_clear(term); return STATE_ESCAPE; - case 0x80 ... 0x8f: action_execute(term, data); return STATE_GROUND; - case 0x90: action_clear(term); return STATE_DCS_ENTRY; - case 0x91 ... 0x97: action_execute(term, data); return STATE_GROUND; - case 0x98: return STATE_SOS_PM_APC_STRING; - case 0x99: action_execute(term, data); return STATE_GROUND; - case 0x9a: action_execute(term, data); return STATE_GROUND; - case 0x9b: action_clear(term); return STATE_CSI_ENTRY; - case 0x9c: return STATE_GROUND; - case 0x9d: action_osc_start(term, data); return STATE_OSC_STRING; - case 0x9e ... 0x9f: return STATE_SOS_PM_APC_STRING; + + /* 8-bit C1 control characters (not supported) */ + case 0x80 ... 0x9f: return STATE_GROUND; } return term->vt.state; @@ -1039,16 +1032,9 @@ state_dcs_passthrough_switch(struct terminal *term, uint8_t data) case 0x18: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; case 0x1a: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; case 0x1b: action_unhook(term, data); action_clear(term); return STATE_ESCAPE; - case 0x80 ... 0x8f: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; - case 0x90: action_unhook(term, data); action_clear(term); return STATE_DCS_ENTRY; - case 0x91 ... 0x97: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; - case 0x98: action_unhook(term, data); return STATE_SOS_PM_APC_STRING; - case 0x99: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; - case 0x9a: action_unhook(term, data); action_execute(term, data); return STATE_GROUND; - case 0x9b: action_unhook(term, data); action_clear(term); return STATE_CSI_ENTRY; - case 0x9c: action_unhook(term, data); return STATE_GROUND; - case 0x9d: action_unhook(term, data); action_osc_start(term, data); return STATE_OSC_STRING; - case 0x9e ... 0x9f: action_unhook(term, data); return STATE_SOS_PM_APC_STRING; + + /* 8-bit C1 control characters (not supported) */ + case 0x80 ... 0x9f: action_unhook(term, data); return STATE_GROUND; default: return STATE_DCS_PASSTHROUGH; } From f14b294dccca8c77b9f0aedb0fafe9663c67e71b Mon Sep 17 00:00:00 2001 From: Craig Barnes Date: Tue, 25 May 2021 21:45:55 +0100 Subject: [PATCH 2/2] vt: remove action_utf8_print(term, 0) calls from UTF-8 state handlers These calls appear to be left over from a previous refactoring of the code. Calling this function with `wc == 0` is a no-op. --- vt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vt.c b/vt.c index daa634d1..67ec547a 100644 --- a/vt.c +++ b/vt.c @@ -1059,7 +1059,7 @@ state_utf8_21_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_22(term, data); return STATE_GROUND; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } } @@ -1069,7 +1069,7 @@ state_utf8_31_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_32(term, data); return STATE_UTF8_32; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } } @@ -1079,7 +1079,7 @@ state_utf8_32_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_33(term, data); return STATE_GROUND; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } } @@ -1089,7 +1089,7 @@ state_utf8_41_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_42(term, data); return STATE_UTF8_42; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } } @@ -1099,7 +1099,7 @@ state_utf8_42_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_43(term, data); return STATE_UTF8_43; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } } @@ -1109,7 +1109,7 @@ state_utf8_43_switch(struct terminal *term, uint8_t data) switch (data) { /* exit current enter new state */ case 0x80 ... 0xbf: action_utf8_44(term, data); return STATE_GROUND; - default: action_utf8_print(term, 0); return STATE_GROUND; + default: return STATE_GROUND; } }