vt: remove partial support for 8-bit C1 control chars

These are part of the "anywhere" state in Paul Flo Williams' VT parser
state diagram[1]. That means that they should be accepted *anywhere* in
a byte sequence, including in the middle of other sequences or even in
the middle of a multi-byte UTF-8 sequence. Adhering to this requirement
makes them incompatible with the use of UTF-8 as a universal encoding.

Not adhering to the aforementioned requirement by making a special case
for UTF-8 sequences may seem tempting, but it's much more at odds with
the relevant standards[2] than it appears on the surface. UTF-8 is not
an "8-bit code", at least not according to the parlance of ECMA-43, nor
does it map the C1 control range in a compatible way.

[1]: https://vt100.net/emu/dec_ansi_parser
[2]: ECMA-35, ECMA-43, ECMA-48
This commit is contained in:
Craig Barnes 2021-05-25 19:14:06 +01:00
parent 6761d50ba5
commit 14a55de4e7

26
vt.c
View file

@ -770,16 +770,9 @@ anywhere(struct terminal *term, uint8_t data)
case 0x18: action_execute(term, data); return STATE_GROUND;
case 0x1a: action_execute(term, data); return STATE_GROUND;
case 0x1b: action_clear(term); return STATE_ESCAPE;
case 0x80 ... 0x8f: action_execute(term, data); return STATE_GROUND;
case 0x90: action_clear(term); return STATE_DCS_ENTRY;
case 0x91 ... 0x97: action_execute(term, data); return STATE_GROUND;
case 0x98: return STATE_SOS_PM_APC_STRING;
case 0x99: action_execute(term, data); return STATE_GROUND;
case 0x9a: action_execute(term, data); return STATE_GROUND;
case 0x9b: action_clear(term); return STATE_CSI_ENTRY;
case 0x9c: return STATE_GROUND;
case 0x9d: action_osc_start(term, data); return STATE_OSC_STRING;
case 0x9e ... 0x9f: return STATE_SOS_PM_APC_STRING;
/* 8-bit C1 control characters (not supported) */
case 0x80 ... 0x9f: return STATE_GROUND;
}
return term->vt.state;
@ -1039,16 +1032,9 @@ state_dcs_passthrough_switch(struct terminal *term, uint8_t data)
case 0x18: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x1a: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x1b: action_unhook(term, data); action_clear(term); return STATE_ESCAPE;
case 0x80 ... 0x8f: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x90: action_unhook(term, data); action_clear(term); return STATE_DCS_ENTRY;
case 0x91 ... 0x97: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x98: action_unhook(term, data); return STATE_SOS_PM_APC_STRING;
case 0x99: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x9a: action_unhook(term, data); action_execute(term, data); return STATE_GROUND;
case 0x9b: action_unhook(term, data); action_clear(term); return STATE_CSI_ENTRY;
case 0x9c: action_unhook(term, data); return STATE_GROUND;
case 0x9d: action_unhook(term, data); action_osc_start(term, data); return STATE_OSC_STRING;
case 0x9e ... 0x9f: action_unhook(term, data); return STATE_SOS_PM_APC_STRING;
/* 8-bit C1 control characters (not supported) */
case 0x80 ... 0x9f: action_unhook(term, data); return STATE_GROUND;
default: return STATE_DCS_PASSTHROUGH;
}