From 0c03e9a7668505c77bfcb31d999a46079574e7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 9 Aug 2021 18:25:36 +0200 Subject: [PATCH 1/2] config: add url.uri-characters This option specifies the characters allowed in the auto-detected URLs. Any character not in this set constitutes an URL delimiter, and will never be included in auto-detected URLs. This option does not affect OSC-8 URLs. Closes #654 --- config.c | 35 +++++++++++++ config.h | 1 + doc/foot.ini.5.scd | 7 +++ foot.ini | 3 +- url-mode.c | 119 ++++++++++++++++++++++++++++----------------- 5 files changed, 119 insertions(+), 46 deletions(-) diff --git a/config.c b/config.c index f57129a7..57345ebf 100644 --- a/config.c +++ b/config.c @@ -364,6 +364,14 @@ done: goto out; } +static int +wccmp(const void *_a, const void *_b) +{ + const wchar_t *a = _a; + const wchar_t *b = _b; + return *a - *b; +} + static bool str_has_prefix(const char *str, const char *prefix) { @@ -1213,6 +1221,24 @@ parse_section_url(const char *key, const char *value, struct config *conf, free(copy); } + else if (strcmp(key, "uri-characters") == 0) { + wchar_t *uri_characters; + if (!str_to_wchars(value, &uri_characters, conf, path, lineno, + "url", "uri-characters")) + { + return false; + } + + free(conf->url.uri_characters); + + qsort( + uri_characters, + wcslen(uri_characters), + sizeof(uri_characters[0]), + &wccmp); + conf->url.uri_characters = uri_characters; + } + else { LOG_AND_NOTIFY_ERR("%s:%d: [url]: %s: invalid key", path, lineno, key); return false; @@ -2842,6 +2868,7 @@ config_load(struct config *conf, const char *conf_path, }, .url = { .label_letters = xwcsdup(L"sadfjklewcmpgh"), + .uri_characters = xwcsdup(L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+=\"'()[]"), .osc8_underline = OSC8_UNDERLINE_URL_MODE, }, .can_shape_grapheme = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING, @@ -2985,6 +3012,12 @@ config_load(struct config *conf, const char *conf_path, conf->url.protocols[i] = xwcsdup(url_protocols[i]); } + qsort( + conf->url.uri_characters, + wcslen(conf->url.uri_characters), + sizeof(conf->url.uri_characters[0]), + &wccmp); + tll_foreach(*initial_user_notifications, it) { tll_push_back(conf->notifications, it->item); tll_remove(*initial_user_notifications, it); @@ -3207,6 +3240,7 @@ config_clone(const struct config *old) config_font_list_clone(&conf->csd.font, &old->csd.font); conf->url.label_letters = xwcsdup(old->url.label_letters); + conf->url.uri_characters = xwcsdup(old->url.uri_characters); spawn_template_clone(&conf->url.launch, &old->url.launch); conf->url.protocols = xmalloc( old->url.prot_count * sizeof(conf->url.protocols[0])); @@ -3274,6 +3308,7 @@ config_free(struct config conf) for (size_t i = 0; i < conf.url.prot_count; i++) free(conf.url.protocols[i]); free(conf.url.protocols); + free(conf.url.uri_characters); key_binding_list_free(&conf.bindings.key); key_binding_list_free(&conf.bindings.search); diff --git a/config.h b/config.h index ae7ff79a..ca75a177 100644 --- a/config.h +++ b/config.h @@ -151,6 +151,7 @@ struct config { } osc8_underline; wchar_t **protocols; + wchar_t *uri_characters; size_t prot_count; size_t max_prot_len; } url; diff --git a/doc/foot.ini.5.scd b/doc/foot.ini.5.scd index 5e854420..1e9152aa 100644 --- a/doc/foot.ini.5.scd +++ b/doc/foot.ini.5.scd @@ -367,6 +367,13 @@ in this order: of protocol. Default: _http, https, ftp, ftps, file, gemini, gopher_. +*uri-characters* + Set of characters allowed in auto-detected URLs. Any character not + included in this set constitutes an URL delimiter. + + Default: + _abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-\_.,~:;/?#@!$&%\*+="'_. + # SECTION: cursor diff --git a/foot.ini b/foot.ini index 38f15b2e..94bd2f1b 100644 --- a/foot.ini +++ b/foot.ini @@ -50,7 +50,8 @@ # launch=xdg-open ${url} # label-letters=sadfjklewcmpgh # osc8-underline=url-mode -# protocols = http, https, ftp, ftps, file, gemini, gopher +# protocols=http, https, ftp, ftps, file, gemini, gopher +# uri-characters=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+="' [cursor] # style=block diff --git a/url-mode.c b/url-mode.c index a802cc6c..8fb8a8a5 100644 --- a/url-mode.c +++ b/url-mode.c @@ -206,7 +206,13 @@ urls_input(struct seat *seat, struct terminal *term, uint32_t key, } } -IGNORE_WARNING("-Wpedantic") +static int +wccmp(const void *_a, const void *_b) +{ + const wchar_t *a = _a; + const wchar_t *b = _b; + return *a - *b; +} static void auto_detected(const struct terminal *term, enum url_action action, @@ -214,6 +220,14 @@ auto_detected(const struct terminal *term, enum url_action action, { const struct config *conf = term->conf; + const wchar_t *uri_characters = conf->url.uri_characters; + if (uri_characters == NULL) + return; + + const size_t uri_characters_count = wcslen(uri_characters); + if (uri_characters_count == 0) + return; + size_t max_prot_len = conf->url.max_prot_len; wchar_t proto_chars[max_prot_len]; struct coord proto_start[max_prot_len]; @@ -230,6 +244,7 @@ auto_detected(const struct terminal *term, enum url_action action, ssize_t parenthesis = 0; ssize_t brackets = 0; + ssize_t ltgts = 0; for (int r = 0; r < term->rows; r++) { const struct row *row = grid_row_in_view(term->grid, r); @@ -267,57 +282,73 @@ auto_detected(const struct terminal *term, enum url_action action, wcsncpy(url, proto, prot_len); len = prot_len; - parenthesis = brackets = 0; + parenthesis = brackets = ltgts = 0; break; } } break; case STATE_URL: { - // static const wchar_t allowed[] = - // L"abcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="; - // static const wchar_t unwise[] = L"{}|\\^[]`"; - // static const wchar_t reserved[] = L";/?:@&=+$,"; + const wchar_t *match = bsearch( + &wc, + uri_characters, + uri_characters_count, + sizeof(uri_characters[0]), + &wccmp); bool emit_url = false; - switch (wc) { - case L'a'...L'z': - case L'A'...L'Z': - case L'0'...L'9': - case L'-': case L'.': case L'_': case L'~': case L':': - case L'/': case L'?': case L'#': case L'@': case L'!': - case L'$': case L'&': case L'\'': case L'*': case L'+': - case L',': case L';': case L'=': case L'"': case L'%': - url[len++] = wc; - break; - case L'(': - parenthesis++; - url[len++] = wc; - break; - - case L'[': - brackets++; - url[len++] = wc; - break; - - case L')': - if (--parenthesis < 0) - emit_url = true; - else - url[len++] = wc; - break; - - case L']': - if (--brackets < 0) - emit_url = true; - else - url[len++] = wc; - break; - - default: + if (match == NULL) { + /* + * Character is not a valid URI character. Emit + * the URL we’ve collected so far, *without* + * including _this_ character. + */ emit_url = true; - break; + } else { + xassert(*match == wc); + + switch (wc) { + default: + url[len++] = wc; + break; + + case L'(': + parenthesis++; + url[len++] = wc; + break; + + case L'[': + brackets++; + url[len++] = wc; + break; + + case L'<': + ltgts++; + url[len++] = wc; + break; + + case L')': + if (--parenthesis < 0) + emit_url = true; + else + url[len++] = wc; + break; + + case L']': + if (--brackets < 0) + emit_url = true; + else + url[len++] = wc; + break; + + case L'>': + if (--ltgts < 0) + emit_url = true; + else + url[len++] = wc; + break; + } } if (c >= term->cols - 1 && row->linebreak) { @@ -382,7 +413,7 @@ auto_detected(const struct terminal *term, enum url_action action, state = STATE_PROTOCOL; len = 0; - parenthesis = brackets = 0; + parenthesis = brackets = ltgts = 0; } break; } @@ -391,8 +422,6 @@ auto_detected(const struct terminal *term, enum url_action action, } } -UNIGNORE_WARNINGS - static void osc8_uris(const struct terminal *term, enum url_action action, url_list_t *urls) { From 5ce48a65743cf0677415582a8a6cf70a58f2b08f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Mon, 9 Aug 2021 19:58:40 +0200 Subject: [PATCH 2/2] changelog: url.uri-characters --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e724a75..a22e77e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,8 @@ font glyphs) (https://codeberg.org/dnkl/foot/issues/474). * `XM`+`xm` to terminfo. * Mouse buttons 6/7 (mouse wheel left/right). +* `url.uri-characters` option to `foot.ini` + (https://codeberg.org/dnkl/foot/issues/654). ### Changed