From 9452aff0208f62716304022f4496bafbebdf50e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Sun, 7 Jun 2020 16:16:50 +0200 Subject: [PATCH 1/3] vt: initial version of UTF-8 decoding built-in into the VT parser --- terminal.h | 5 +- vt.c | 166 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 134 insertions(+), 37 deletions(-) diff --git a/terminal.h b/terminal.h index eecebbf6..798307cb 100644 --- a/terminal.h +++ b/terminal.h @@ -130,6 +130,7 @@ struct vt_param { struct vt { int state; /* enum state */ wchar_t last_printed; + wchar_t utf8; struct { struct vt_param v[16]; uint8_t idx; @@ -140,10 +141,6 @@ struct vt { size_t size; size_t idx; } osc; - struct { - uint8_t data[4]; - uint8_t idx; - } utf8; struct { uint8_t *data; size_t size; diff --git a/vt.c b/vt.c index 0db90069..ed3a8eae 100644 --- a/vt.c +++ b/vt.c @@ -38,9 +38,12 @@ enum state { STATE_SOS_PM_APC_STRING, - STATE_UTF8_COLLECT_1, - STATE_UTF8_COLLECT_2, - STATE_UTF8_COLLECT_3, + STATE_UTF8_21, + STATE_UTF8_31, + STATE_UTF8_32, + STATE_UTF8_41, + STATE_UTF8_42, + STATE_UTF8_43, }; #if defined(_DEBUG) && defined(LOG_ENABLE_DBG) && LOG_ENABLE_DBG && 0 @@ -65,9 +68,9 @@ static const char *const state_names[] = { [STATE_SOS_PM_APC_STRING] = "sos/pm/apc string", - [STATE_UTF8_COLLECT_1] = "UTF8 collect (1 left)", - [STATE_UTF8_COLLECT_2] = "UTF8 collect (2 left)", - [STATE_UTF8_COLLECT_3] = "UTF8 collect (3 left)", + [STATE_UTF8_21] = "UTF8 2-byte 1/2", + [STATE_UTF8_31] = "UTF8 3-byte 1/3", + [STATE_UTF8_32] = "UTF8 3-byte 2/3", }; #endif @@ -503,18 +506,8 @@ action_put(struct terminal *term, uint8_t c) } static void -action_utf8_entry(struct terminal *term, uint8_t c) +action_utf8_print(struct terminal *term, wchar_t wc) { - term->vt.utf8.data[0] = c; - term->vt.utf8.idx = 1; -} - -static void -action_utf8_print(struct terminal *term, uint8_t c) -{ - wchar_t wc = 0; - mbtowc(&wc, (const char *)term->vt.utf8.data, term->vt.utf8.idx); - int width = wcwidth(wc); /* @@ -667,6 +660,72 @@ action_utf8_print(struct terminal *term, uint8_t c) term_print(term, wc, width); } +static void +action_utf8_21(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 0x1f) << 6) | (utf8[1] & 0x3f) + term->vt.utf8 = (c & 0x1f) << 6; +} + +static void +action_utf8_22(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 0x1f) << 6) | (utf8[1] & 0x3f) + term->vt.utf8 |= c & 0x3f; + action_utf8_print(term, term->vt.utf8); +} + +static void +action_utf8_31(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f) + term->vt.utf8 = (c & 0x0f) << 12; +} + +static void +action_utf8_32(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f) + term->vt.utf8 |= (c & 0x3f) << 6; +} + +static void +action_utf8_33(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 0xf) << 12) | ((utf8[1] & 0x3f) << 6) | (utf8[2] & 0x3f) + term->vt.utf8 |= c & 0x3f; + action_utf8_print(term, term->vt.utf8); +} + +static void +action_utf8_41(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f); + term->vt.utf8 = (c & 0x07) << 18; +} + +static void +action_utf8_42(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f); + term->vt.utf8 |= (c & 0x3f) << 12; +} + +static void +action_utf8_43(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f); + term->vt.utf8 |= (c & 0x3f) << 6; +} + +static void +action_utf8_44(struct terminal *term, uint8_t c) +{ + // wc = ((utf8[0] & 7) << 18) | ((utf8[1] & 0x3f) << 12) | ((utf8[2] & 0x3f) << 6) | (utf8[3] & 0x3f); + term->vt.utf8 |= c & 0x3f; + action_utf8_print(term, term->vt.utf8); +} + static enum state state_ground_switch(struct terminal *term, uint8_t data) { @@ -678,9 +737,9 @@ state_ground_switch(struct terminal *term, uint8_t data) case 0x20 ... 0x7f: action_print(term, data); return STATE_GROUND; - case 0xc0 ... 0xdf: action_utf8_entry(term, data); return STATE_UTF8_COLLECT_1; - case 0xe0 ... 0xef: action_utf8_entry(term, data); return STATE_UTF8_COLLECT_2; - case 0xf0 ... 0xf7: action_utf8_entry(term, data); return STATE_UTF8_COLLECT_3; + case 0xc2 ... 0xdf: action_utf8_21(term, data); return STATE_UTF8_21; + case 0xe0 ... 0xef: action_utf8_31(term, data); return STATE_UTF8_31; + case 0xf0 ... 0xf4: action_utf8_41(term, data); return STATE_UTF8_41; /* Anywhere */ case 0x18: action_execute(term, data); return STATE_GROUND; @@ -1129,25 +1188,63 @@ state_sos_pm_apc_string_switch(struct terminal *term, uint8_t data) } static enum state -state_utf8_collect_1_switch(struct terminal *term, uint8_t data) +state_utf8_21_switch(struct terminal *term, uint8_t data) { - term->vt.utf8.data[term->vt.utf8.idx++] = data; - action_utf8_print(term, data); - return STATE_GROUND; + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_22(term, data); return STATE_GROUND; + default: action_utf8_print(term, 0); return STATE_GROUND; + } } static enum state -state_utf8_collect_2_switch(struct terminal *term, uint8_t data) +state_utf8_31_switch(struct terminal *term, uint8_t data) { - term->vt.utf8.data[term->vt.utf8.idx++] = data; - return STATE_UTF8_COLLECT_1; + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_32(term, data); return STATE_UTF8_32; + default: action_utf8_print(term, 0); return STATE_GROUND; + } } static enum state -state_utf8_collect_3_switch(struct terminal *term, uint8_t data) +state_utf8_32_switch(struct terminal *term, uint8_t data) { - term->vt.utf8.data[term->vt.utf8.idx++] = data; - return STATE_UTF8_COLLECT_2; + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_33(term, data); return STATE_GROUND; + default: action_utf8_print(term, 0); return STATE_GROUND; + } +} + +static enum state +state_utf8_41_switch(struct terminal *term, uint8_t data) +{ + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_42(term, data); return STATE_UTF8_42; + default: action_utf8_print(term, 0); return STATE_GROUND; + } +} + +static enum state +state_utf8_42_switch(struct terminal *term, uint8_t data) +{ + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_43(term, data); return STATE_UTF8_43; + default: action_utf8_print(term, 0); return STATE_GROUND; + } +} + +static enum state +state_utf8_43_switch(struct terminal *term, uint8_t data) +{ + switch (data) { + /* exit current enter new state */ + case 0x80 ... 0xbf: action_utf8_44(term, data); return STATE_GROUND; + default: action_utf8_print(term, 0); return STATE_GROUND; + } } void @@ -1173,9 +1270,12 @@ vt_from_slave(struct terminal *term, const uint8_t *data, size_t len) case STATE_DCS_PASSTHROUGH: current_state = state_dcs_passthrough_switch(term, *p); break; case STATE_SOS_PM_APC_STRING: current_state = state_sos_pm_apc_string_switch(term, *p); break; - case STATE_UTF8_COLLECT_1: current_state = state_utf8_collect_1_switch(term, *p); break; - case STATE_UTF8_COLLECT_2: current_state = state_utf8_collect_2_switch(term, *p); break; - case STATE_UTF8_COLLECT_3: current_state = state_utf8_collect_3_switch(term, *p); break; + case STATE_UTF8_21: current_state = state_utf8_21_switch(term, *p); break; + case STATE_UTF8_31: current_state = state_utf8_31_switch(term, *p); break; + case STATE_UTF8_32: current_state = state_utf8_32_switch(term, *p); break; + case STATE_UTF8_41: current_state = state_utf8_41_switch(term, *p); break; + case STATE_UTF8_42: current_state = state_utf8_42_switch(term, *p); break; + case STATE_UTF8_43: current_state = state_utf8_43_switch(term, *p); break; } } From 97221dd09b013c4ed67b11d6a7102232471c2114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Tue, 9 Jun 2020 17:30:49 +0200 Subject: [PATCH 2/3] vt: utf8-print: check width == 0 first, when deciding whether to do combining --- vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.c b/vt.c index ed3a8eae..454ba11d 100644 --- a/vt.c +++ b/vt.c @@ -527,7 +527,7 @@ action_utf8_print(struct terminal *term, wchar_t wc) * * TODO: handle line-wrap when locating the base character. */ - if (wc >= 0x0300 && width == 0 && term->grid->cursor.point.col > 0) { + if (width == 0 && wc >= 0x0300 && term->grid->cursor.point.col > 0) { const struct row *row = term->grid->cur_row; int base_col = term->grid->cursor.point.col; From 9df7e8fa07a234b6a7b33b52310e7ddaa884850f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Tue, 9 Jun 2020 17:31:28 +0200 Subject: [PATCH 3/3] term: print_insert: early return --- terminal.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/terminal.c b/terminal.c index 8181a610..f43c7320 100644 --- a/terminal.c +++ b/terminal.c @@ -2353,19 +2353,20 @@ print_insert(struct terminal *term, int width) { assert(width > 0); - if (unlikely(term->insert_mode)) { - struct row *row = term->grid->cur_row; - const size_t move_count = max(0, term->cols - term->grid->cursor.point.col - width); + if (likely(!term->insert_mode)) + return; - memmove( - &row->cells[term->grid->cursor.point.col + width], - &row->cells[term->grid->cursor.point.col], - move_count * sizeof(struct cell)); + struct row *row = term->grid->cur_row; + const size_t move_count = max(0, term->cols - term->grid->cursor.point.col - width); - /* Mark moved cells as dirty */ - for (size_t i = term->grid->cursor.point.col + width; i < term->cols; i++) - row->cells[i].attrs.clean = 0; - } + memmove( + &row->cells[term->grid->cursor.point.col + width], + &row->cells[term->grid->cursor.point.col], + move_count * sizeof(struct cell)); + + /* Mark moved cells as dirty */ + for (size_t i = term->grid->cursor.point.col + width; i < term->cols; i++) + row->cells[i].attrs.clean = 0; } void