diff --git a/CHANGELOG.md b/CHANGELOG.md index a14b0fab..05bda33c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,11 @@ * Support for the new Wayland protocol `xdg-system-bell-v1` protocol (added in wayland-protocols 1.38), via the new config option `bell.system=no|yes` (defaults to `yes`). +* Added support for custom regex matching ([#1386][1386], + [#1872][1872]) + +[1386]: https://codeberg.org/dnkl/foot/issues/1386 +[1872]: https://codeberg.org/dnkl/foot/issues/1872 ### Changed @@ -72,12 +77,19 @@ * Do not try to set a zero width, or height, if the compositor sends a _configure_ event with only one dimension being zero ([#1925][1925]). +* Auto-detection of URLs (i.e. not OSC-8 based URLs) are now regex + based. [1925]: https://codeberg.org/dnkl/foot/issues/1925 ### Deprecated ### Removed + +* `url.uri-characters` and `url.protocols`. Both options have been + replaced by `url.regex`. + + ### Fixed * Kitty keyboard protocol: alternate key reporting failing to report diff --git a/config.c b/config.c index a9db87d5..604c0a76 100644 --- a/config.c +++ b/config.c @@ -140,6 +140,8 @@ static const char *const binding_action_map[] = { [BIND_ACTION_PROMPT_NEXT] = "prompt-next", [BIND_ACTION_UNICODE_INPUT] = "unicode-input", [BIND_ACTION_QUIT] = "quit", + [BIND_ACTION_REGEX_LAUNCH] = "regex-launch", + [BIND_ACTION_REGEX_COPY] = "regex-copy", /* Mouse-specific actions */ [BIND_ACTION_SCROLLBACK_UP_MOUSE] = "scrollback-up-mouse", @@ -207,6 +209,7 @@ static_assert(ALEN(url_binding_action_map) == BIND_ACTION_URL_COUNT, struct context { struct config *conf; const char *section; + const char *section_suffix; const char *key; const char *value; @@ -257,8 +260,9 @@ log_contextual(struct context *ctx, enum log_class log_class, char *formatted_msg = xvasprintf(fmt, va); va_end(va); - bool print_dot = ctx->key != NULL; - bool print_colon = ctx->value != NULL; + const bool print_dot = ctx->key != NULL; + const bool print_colon = ctx->value != NULL; + const bool print_section_suffix = ctx->section_suffix != NULL; if (!print_dot) ctx->key = ""; @@ -266,10 +270,15 @@ log_contextual(struct context *ctx, enum log_class log_class, if (!print_colon) ctx->value = ""; + if (!print_section_suffix) + ctx->section_suffix = ""; + log_and_notify( - ctx->conf, log_class, file, lineno, "%s:%d: [%s]%s%s%s%s: %s", - ctx->path, ctx->lineno, ctx->section, print_dot ? "." : "", - ctx->key, print_colon ? ": " : "", ctx->value, formatted_msg); + ctx->conf, log_class, file, lineno, "%s:%d: [%s%s%s]%s%s%s%s: %s", + ctx->path, ctx->lineno, ctx->section, + print_section_suffix ? ":" : "", ctx->section_suffix, + print_dot ? "." : "", ctx->key, print_colon ? ": " : "", + ctx->value, formatted_msg); free(formatted_msg); } @@ -420,14 +429,6 @@ done: return ret; } -static int -c32cmp_single(const void *_a, const void *_b) -{ - const char32_t *a = _a; - const char32_t *b = _b; - return *a - *b; -} - static bool str_has_prefix(const char *str, const char *prefix) { @@ -1225,7 +1226,6 @@ parse_section_url(struct context *ctx) { struct config *conf = ctx->conf; const char *key = ctx->key; - const char *value = ctx->value; if (streq(key, "launch")) return value_to_spawn_template(ctx, &conf->url.launch); @@ -1243,67 +1243,102 @@ parse_section_url(struct context *ctx) (int *)&conf->url.osc8_underline); } - else if (streq(key, "protocols")) { - for (size_t i = 0; i < conf->url.prot_count; i++) - free(conf->url.protocols[i]); - free(conf->url.protocols); + else if (streq(key, "regex")) { + const char *regex = ctx->value; + regex_t preg; - conf->url.max_prot_len = 0; - conf->url.prot_count = 0; - conf->url.protocols = NULL; + int r = regcomp(&preg, regex, REG_EXTENDED); - char *copy = xstrdup(value); - - for (char *prot = strtok(copy, ","); - prot != NULL; - prot = strtok(NULL, ",")) - { - - /* Strip leading whitespace */ - while (isspace(prot[0])) - prot++; - - /* Strip trailing whitespace */ - size_t len = strlen(prot); - while (isspace(prot[len - 1])) - len--; - prot[len] = '\0'; - - size_t chars = mbsntoc32(NULL, prot, len, 0); - if (chars == (size_t)-1) { - ctx->value = prot; - LOG_CONTEXTUAL_ERRNO("invalid protocol"); - return false; - } - - conf->url.prot_count++; - conf->url.protocols = xrealloc( - conf->url.protocols, - conf->url.prot_count * sizeof(conf->url.protocols[0])); - - size_t idx = conf->url.prot_count - 1; - conf->url.protocols[idx] = xmalloc((chars + 1 + 3) * sizeof(char32_t)); - mbsntoc32(conf->url.protocols[idx], prot, len, chars + 1); - c32cpy(&conf->url.protocols[idx][chars], U"://"); - - chars += 3; /* Include the "://" */ - if (chars > conf->url.max_prot_len) - conf->url.max_prot_len = chars; + if (r != 0) { + char err_buf[128]; + regerror(r, &preg, err_buf, sizeof(err_buf)); + LOG_CONTEXTUAL_ERR("invalid regex: %s", err_buf); + return false; } - free(copy); + if (preg.re_nsub == 0) { + LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)"); + regfree(&preg); + return false; + } + + regfree(&conf->url.preg); + free(conf->url.regex); + + conf->url.regex = xstrdup(regex); + conf->url.preg = preg; return true; } - else if (streq(key, "uri-characters")) { - if (!value_to_wchars(ctx, &conf->url.uri_characters)) + else { + LOG_CONTEXTUAL_ERR("not a valid option: %s", key); + return false; + } +} + +static bool +parse_section_regex(struct context *ctx) +{ + struct config *conf = ctx->conf; + const char *key = ctx->key; + + const char *regex_name = + ctx->section_suffix != NULL ? ctx->section_suffix : ""; + + struct custom_regex *regex = NULL; + tll_foreach(conf->custom_regexes, it) { + if (streq(it->item.name, regex_name)) { + regex = &it->item; + break; + } + } + + if (streq(key, "regex")) { + const char *regex_string = ctx->value; + regex_t preg; + + int r = regcomp(&preg, regex_string, REG_EXTENDED); + + if (r != 0) { + char err_buf[128]; + regerror(r, &preg, err_buf, sizeof(err_buf)); + LOG_CONTEXTUAL_ERR("invalid regex: %s", err_buf); + return false; + } + + if (preg.re_nsub == 0) { + LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)"); + regfree(&preg); + return false; + } + + if (regex == NULL) { + tll_push_back(conf->custom_regexes, + ((struct custom_regex){.name = xstrdup(regex_name)})); + regex = &tll_back(conf->custom_regexes); + } + + regfree(®ex->preg); + free(regex->regex); + + regex->regex = xstrdup(regex_string); + regex->preg = preg; + return true; + } + + else if (streq(key, "launch")) { + struct config_spawn_template launch; + if (!value_to_spawn_template(ctx, &launch)) return false; - qsort( - conf->url.uri_characters, - c32len(conf->url.uri_characters), - sizeof(conf->url.uri_characters[0]), - &c32cmp_single); + if (regex == NULL) { + tll_push_back(conf->custom_regexes, + ((struct custom_regex){.name = xstrdup(regex_name)})); + regex = &tll_back(conf->custom_regexes); + } + + spawn_template_free(®ex->launch); + regex->launch = launch; return true; } @@ -1654,6 +1689,7 @@ free_binding_aux(struct binding_aux *aux) case BINDING_AUX_NONE: break; case BINDING_AUX_PIPE: free_argv(&aux->pipe); break; case BINDING_AUX_TEXT: free(aux->text.data); break; + case BINDING_AUX_REGEX: free(aux->regex_name); break; } } @@ -1743,7 +1779,10 @@ binding_aux_equal(const struct binding_aux *a, case BINDING_AUX_TEXT: return a->text.len == b->text.len && - memcmp(a->text.data, b->text.data, a->text.len) == 0; + memcmp(a->text.data, b->text.data, a->text.len) == 0; + + case BINDING_AUX_REGEX: + return streq(a->regex_name, b->regex_name); } BUG("invalid AUX type: %d", a->type); @@ -2017,19 +2056,23 @@ modifiers_disjoint(const config_modifier_list_t *mods1, } static char * NOINLINE -modifiers_to_str(const config_modifier_list_t *mods) +modifiers_to_str(const config_modifier_list_t *mods, bool strip_last_plus) { - size_t len = tll_length(*mods); /* '+' , and NULL terminator */ + size_t len = tll_length(*mods); /* '+' separator */ tll_foreach(*mods, it) len += strlen(it->item); - char *ret = xmalloc(len); + char *ret = xmalloc(len + 1); size_t idx = 0; tll_foreach(*mods, it) { idx += snprintf(&ret[idx], len - idx, "%s", it->item); ret[idx++] = '+'; } - ret[--idx] = '\0'; + + if (strip_last_plus) + idx--; + + ret[idx] = '\0'; return ret; } @@ -2088,21 +2131,40 @@ pipe_argv_from_value(struct context *ctx, struct argv *argv) return remove_len; } +static ssize_t NOINLINE +regex_name_from_value(struct context *ctx, char **regex_name) +{ + *regex_name = NULL; + + if (ctx->value[0] != '[') + return 0; + + const char *regex_end = strrchr(ctx->value, ']'); + if (regex_end == NULL) { + LOG_CONTEXTUAL_ERR("unclosed '['"); + return -1; + } + + size_t regex_len = regex_end - ctx->value - 1; + *regex_name = xstrndup(&ctx->value[1], regex_len); + + ssize_t remove_len = regex_end + 1 - ctx->value; + ctx->value = regex_end + 1; + while (isspace(*ctx->value)) { + ctx->value++; + remove_len++; + } + + return remove_len; +} + + static bool NOINLINE parse_key_binding_section(struct context *ctx, int action_count, const char *const action_map[static action_count], struct config_key_binding_list *bindings) { - struct binding_aux aux; - - ssize_t pipe_remove_len = pipe_argv_from_value(ctx, &aux.pipe); - if (pipe_remove_len < 0) - return false; - - aux.type = pipe_remove_len == 0 ? BINDING_AUX_NONE : BINDING_AUX_PIPE; - aux.master_copy = true; - for (int action = 0; action < action_count; action++) { if (action_map[action] == NULL) continue; @@ -2110,6 +2172,33 @@ parse_key_binding_section(struct context *ctx, if (!streq(ctx->key, action_map[action])) continue; + struct binding_aux aux = {.type = BINDING_AUX_NONE, .master_copy = true}; + + /* TODO: this is ugly... */ + if (action_map == binding_action_map && + action >= BIND_ACTION_PIPE_SCROLLBACK && + action <= BIND_ACTION_PIPE_COMMAND_OUTPUT) + { + ssize_t pipe_remove_len = pipe_argv_from_value(ctx, &aux.pipe); + if (pipe_remove_len <= 0) + return false; + + aux.type = BINDING_AUX_PIPE; + aux.master_copy = true; + } else if (action_map == binding_action_map && + action >= BIND_ACTION_REGEX_LAUNCH && + action <= BIND_ACTION_REGEX_COPY) + { + char *regex_name = NULL; + ssize_t regex_remove_len = regex_name_from_value(ctx, ®ex_name); + if (regex_remove_len <= 0) + return false; + + aux.type = BINDING_AUX_REGEX; + aux.master_copy = true; + aux.regex_name = regex_name; + } + if (!value_to_key_combos(ctx, action, &aux, bindings, KEY_BINDING)) { free_binding_aux(&aux); return false; @@ -2119,7 +2208,6 @@ parse_key_binding_section(struct context *ctx, } LOG_CONTEXTUAL_ERR("not a valid action: %s", ctx->key); - free_binding_aux(&aux); return false; } @@ -2317,7 +2405,7 @@ resolve_key_binding_collisions(struct config *conf, const char *section_name, } if (collision_type != COLLISION_NONE) { - char *modifier_names = modifiers_to_str(mods1); + char *modifier_names = modifiers_to_str(mods1, false); char sym_name[64]; switch (type){ @@ -2359,7 +2447,7 @@ resolve_key_binding_collisions(struct config *conf, const char *section_name, case COLLISION_OVERRIDE: { char *override_names = modifiers_to_str( - &conf->mouse.selection_override_modifiers); + &conf->mouse.selection_override_modifiers, true); if (override_names[0] != '\0') override_names[strlen(override_names) - 1] = '\0'; @@ -2698,7 +2786,7 @@ parse_section_touch(struct context *ctx) { } static bool -parse_key_value(char *kv, const char **section, const char **key, const char **value) +parse_key_value(char *kv, char **section, const char **key, const char **value) { bool section_is_needed = section != NULL; @@ -2767,6 +2855,7 @@ enum section { SECTION_DESKTOP_NOTIFICATIONS, SECTION_SCROLLBACK, SECTION_URL, + SECTION_REGEX, SECTION_COLORS, SECTION_CURSOR, SECTION_MOUSE, @@ -2788,6 +2877,7 @@ typedef bool (*parser_fun_t)(struct context *ctx); static const struct { parser_fun_t fun; const char *name; + bool allow_colon_suffix; } section_info[] = { [SECTION_MAIN] = {&parse_section_main, "main"}, [SECTION_SECURITY] = {&parse_section_security, "security"}, @@ -2795,6 +2885,7 @@ static const struct { [SECTION_DESKTOP_NOTIFICATIONS] = {&parse_section_desktop_notifications, "desktop-notifications"}, [SECTION_SCROLLBACK] = {&parse_section_scrollback, "scrollback"}, [SECTION_URL] = {&parse_section_url, "url"}, + [SECTION_REGEX] = {&parse_section_regex, "regex", true}, [SECTION_COLORS] = {&parse_section_colors, "colors"}, [SECTION_CURSOR] = {&parse_section_cursor, "cursor"}, [SECTION_MOUSE] = {&parse_section_mouse, "mouse"}, @@ -2812,11 +2903,29 @@ static const struct { static_assert(ALEN(section_info) == SECTION_COUNT, "section info array size mismatch"); static enum section -str_to_section(const char *str) +str_to_section(char *str, char **suffix) { + *suffix = NULL; + for (enum section section = SECTION_MAIN; section < SECTION_COUNT; ++section) { - if (streq(str, section_info[section].name)) + const char *name = section_info[section].name; + + if (streq(str, name)) return section; + + else if (section_info[section].allow_colon_suffix) { + const size_t str_len = strlen(str); + const size_t name_len = strlen(name); + + /* At least "section:" chars? */ + if (str_len > name_len + 1) { + if (strncmp(str, name, name_len) == 0 && str[name_len] == ':') { + str[name_len] = '\0'; + *suffix = &str[name_len + 1]; + return section; + } + } + } } return SECTION_COUNT; } @@ -2840,10 +2949,12 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar } char *section_name = xstrdup("main"); + char *section_suffix = NULL; struct context context = { .conf = conf, .section = section_name, + .section_suffix = section_suffix, .path = path, .lineno = 0, .errors_are_fatal = errors_are_fatal, @@ -2924,7 +3035,8 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar error_or_continue(); } - section = str_to_section(key_value); + char *maybe_section_suffix; + section = str_to_section(key_value, &maybe_section_suffix); if (section == SECTION_COUNT) { context.section = key_value; LOG_CONTEXTUAL_ERR("invalid section name: %s", key_value); @@ -2933,8 +3045,11 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar } free(section_name); + free(section_suffix); section_name = xstrdup(key_value); + section_suffix = maybe_section_suffix != NULL ? xstrdup(maybe_section_suffix) : NULL; context.section = section_name; + context.section_suffix = section_suffix; /* Process next line */ continue; @@ -2974,6 +3089,7 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar done: free(section_name); + free(section_suffix); free(_line); return ret; } @@ -3068,7 +3184,6 @@ add_default_search_bindings(struct config *conf) {BIND_ACTION_SEARCH_DELETE_NEXT_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_Delete}}}, {BIND_ACTION_SEARCH_DELETE_NEXT_WORD, m(XKB_MOD_NAME_ALT), {{XKB_KEY_d}}}, {BIND_ACTION_SEARCH_EXTEND_CHAR, m(XKB_MOD_NAME_SHIFT), {{XKB_KEY_Right}}}, - {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_w}}}, {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL "+" XKB_MOD_NAME_SHIFT), {{XKB_KEY_Right}}}, {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_w}}}, {BIND_ACTION_SEARCH_EXTEND_WORD_WS, m(XKB_MOD_NAME_CTRL "+" XKB_MOD_NAME_SHIFT), {{XKB_KEY_w}}}, @@ -3196,9 +3311,9 @@ config_load(struct config *conf, const char *conf_path, }, .url = { .label_letters = xc32dup(U"sadfjklewcmpgh"), - .uri_characters = xc32dup(U"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+=\"'()[]"), .osc8_underline = OSC8_UNDERLINE_URL_MODE, }, + .custom_regexes = tll_init(), .can_shape_grapheme = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING, .scrollback = { .lines = 1000, @@ -3315,35 +3430,47 @@ config_load(struct config *conf, const char *conf_path, tokenize_cmdline("--action ${action-name}=${action-label}", &conf->desktop_notifications.command_action_arg.argv.args); tokenize_cmdline("xdg-open ${url}", &conf->url.launch.argv.args); - static const char32_t *url_protocols[] = { - U"http://", - U"https://", - U"ftp://", - U"ftps://", - U"file://", - U"gemini://", - U"gopher://", - U"irc://", - U"ircs://", - }; - conf->url.protocols = xmalloc( - ALEN(url_protocols) * sizeof(conf->url.protocols[0])); - conf->url.prot_count = ALEN(url_protocols); - conf->url.max_prot_len = 0; + { + /* + * Based on https://gist.github.com/gruber/249502, but modified: + * - Do not allow {} at all + * - Do allow matched [] + */ + const char *url_regex_string = + "(" + "(" + "[a-z][[:alnum:]-]+:" // protocol + "(" + "/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?) + ")" + "|" + "www[:digit:]{0,3}[.]" + //"|" + //"[a-z0-9.\\-]+[.][a-z]{2,4}/" /* "looks like domain name followed by a slash" - remove? */ + ")" + "(" + "[^[:space:](){}<>]+" + "|" + "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" + "|" + "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" + ")+" + "(" + "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" + "|" + "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" + "|" + "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]" + ")" + ")" + ; - for (size_t i = 0; i < ALEN(url_protocols); i++) { - size_t len = c32len(url_protocols[i]); - if (len > conf->url.max_prot_len) - conf->url.max_prot_len = len; - conf->url.protocols[i] = xc32dup(url_protocols[i]); + int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED); + xassert(r == 0); + conf->url.regex = xstrdup(url_regex_string); + xassert(conf->url.preg.re_nsub >= 1); } - qsort( - conf->url.uri_characters, - c32len(conf->url.uri_characters), - sizeof(conf->url.uri_characters[0]), - &c32cmp_single); - tll_foreach(*initial_user_notifications, it) { tll_push_back(conf->notifications, it->item); tll_remove(*initial_user_notifications, it); @@ -3430,6 +3557,8 @@ bool config_override_apply(struct config *conf, config_override_t *overrides, bool errors_are_fatal) { + char *section_name = NULL; + struct context context = { .conf = conf, .path = "override", @@ -3441,8 +3570,7 @@ config_override_apply(struct config *conf, config_override_t *overrides, tll_foreach(*overrides, it) { context.lineno++; - if (!parse_key_value( - it->item, &context.section, &context.key, &context.value)) + if (!parse_key_value(it->item, §ion_name, &context.key, &context.value)) { LOG_CONTEXTUAL_ERR("syntax error: key/value pair has no %s", context.key == NULL ? "key" : "value"); @@ -3451,20 +3579,26 @@ config_override_apply(struct config *conf, config_override_t *overrides, continue; } - if (context.section[0] == '\0') { + if (section_name[0] == '\0') { LOG_CONTEXTUAL_ERR("empty section name"); if (errors_are_fatal) return false; continue; } - enum section section = str_to_section(context.section); + char *maybe_section_suffix = NULL; + enum section section = str_to_section(section_name, &maybe_section_suffix); + + context.section = section_name; + context.section_suffix = maybe_section_suffix; + if (section == SECTION_COUNT) { - LOG_CONTEXTUAL_ERR("invalid section name: %s", context.section); + LOG_CONTEXTUAL_ERR("invalid section name: %s", section_name); if (errors_are_fatal) return false; continue; } + parser_fun_t section_parser = section_info[section].fun; xassert(section_parser != NULL); @@ -3500,6 +3634,7 @@ key_binding_list_clone(struct config_key_binding_list *dst, struct argv *last_master_argv = NULL; uint8_t *last_master_text_data = NULL; size_t last_master_text_len = 0; + char *last_master_regex_name = NULL; dst->count = src->count; dst->arr = xmalloc(src->count * sizeof(dst->arr[0])); @@ -3547,6 +3682,16 @@ key_binding_list_clone(struct config_key_binding_list *dst, } last_master_argv = NULL; break; + + case BINDING_AUX_REGEX: + if (old->aux.master_copy) { + new->aux.regex_name = xstrdup(old->aux.regex_name); + last_master_regex_name = new->aux.regex_name; + } else { + xassert(last_master_regex_name != NULL); + new->aux.regex_name = last_master_regex_name; + } + break; } } } @@ -3577,12 +3722,23 @@ config_clone(const struct config *old) config_font_list_clone(&conf->csd.font, &old->csd.font); conf->url.label_letters = xc32dup(old->url.label_letters); - conf->url.uri_characters = xc32dup(old->url.uri_characters); spawn_template_clone(&conf->url.launch, &old->url.launch); - conf->url.protocols = xmalloc( - old->url.prot_count * sizeof(conf->url.protocols[0])); - for (size_t i = 0; i < old->url.prot_count; i++) - conf->url.protocols[i] = xc32dup(old->url.protocols[i]); + conf->url.regex = xstrdup(old->url.regex); + regcomp(&conf->url.preg, conf->url.regex, REG_EXTENDED); + + memset(&conf->custom_regexes, 0, sizeof(conf->custom_regexes)); + tll_foreach(old->custom_regexes, it) { + const struct custom_regex *old_regex = &it->item; + + tll_push_back(conf->custom_regexes, + ((struct custom_regex){.name = xstrdup(old_regex->name), + .regex = xstrdup(old_regex->regex)})); + + + struct custom_regex *new_regex = &tll_back(conf->custom_regexes); + regcomp(&new_regex->preg, new_regex->regex, REG_EXTENDED); + spawn_template_clone(&new_regex->launch, &old_regex->launch); + } key_binding_list_clone(&conf->bindings.key, &old->bindings.key); key_binding_list_clone(&conf->bindings.search, &old->bindings.search); @@ -3663,10 +3819,17 @@ config_free(struct config *conf) free(conf->url.label_letters); spawn_template_free(&conf->url.launch); - for (size_t i = 0; i < conf->url.prot_count; i++) - free(conf->url.protocols[i]); - free(conf->url.protocols); - free(conf->url.uri_characters); + regfree(&conf->url.preg); + free(conf->url.regex); + + tll_foreach(conf->custom_regexes, it) { + struct custom_regex *regex = &it->item; + free(regex->name); + free(regex->regex); + regfree(®ex->preg); + spawn_template_free(®ex->launch); + tll_remove(conf->custom_regexes, it); + } free_key_binding_list(&conf->bindings.key); free_key_binding_list(&conf->bindings.search); diff --git a/config.h b/config.h index 7d9f88c3..3535064e 100644 --- a/config.h +++ b/config.h @@ -1,7 +1,8 @@ #pragma once -#include +#include #include +#include #include #include @@ -60,6 +61,7 @@ enum binding_aux_type { BINDING_AUX_NONE, BINDING_AUX_PIPE, BINDING_AUX_TEXT, + BINDING_AUX_REGEX, }; struct binding_aux { @@ -73,6 +75,8 @@ struct binding_aux { uint8_t *data; size_t len; } text; + + char *regex_name; }; }; @@ -120,6 +124,13 @@ struct env_var { }; typedef tll(struct env_var) env_var_list_t; +struct custom_regex { + char *name; + char *regex; + regex_t preg; + struct config_spawn_template launch; +}; + struct config { char *term; char *shell; @@ -220,12 +231,12 @@ struct config { OSC8_UNDERLINE_ALWAYS, } osc8_underline; - char32_t **protocols; - char32_t *uri_characters; - size_t prot_count; - size_t max_prot_len; + char *regex; + regex_t preg; } url; + tll(struct custom_regex) custom_regexes; + struct { uint32_t fg; uint32_t bg; diff --git a/doc/foot.ini.5.scd b/doc/foot.ini.5.scd index 903d3375..61216c75 100644 --- a/doc/foot.ini.5.scd +++ b/doc/foot.ini.5.scd @@ -755,6 +755,9 @@ xdgtoken=95ebdfe56e4f47ddb5bba9d7dc3a2c35 # SECTION: url +Note that you can also add custom regular expressions, see the 'regex' +section. + *launch* Command to execute when opening URLs. _${url}_ will be replaced with the actual URL. Default: _xdg-open ${url}_. @@ -782,19 +785,48 @@ xdgtoken=95ebdfe56e4f47ddb5bba9d7dc3a2c35 Default: _sadfjklewcmpgh_. -*protocols* - Comma separated list of protocols (schemes) that should be - recognized in URL mode. Note that only auto-detected URLs are - affected by this option. OSC-8 URLs are always enabled, regardless - of protocol. Default: _http, https, ftp, ftps, file, gemini, - gopher, irc, ircs_. - -*uri-characters* - Set of characters allowed in auto-detected URLs. Any character not - included in this set constitutes a URL delimiter. +*regex* + Regular expression to use when auto-detecting URLs. The format is + "POSIX-Extended Regular Expressions". Note that the first marked + subexpression is used as the URL. In other words, if you want the + whole regex match to be used as an URL, surround all of it with + parenthesis: *(regex-pattern)*. - Default: - _abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-\_.,~:;/?#@!$&%\*+="'()[]_ + Default: _(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))_ + +# SECTION: regex + +Similar to the 'url' mode, but with custom defined regular expressions +(and launchers). + +To use a custom defined regular expression, you also need to add a key +binding for it. This is done in the *key-binding* section, see below +for details. For example, a regex to detect hash digests (e.g. git +commit hashes) could look like: + +``` +[regex:hashes] +regex=([a-fA-f0-9]{7,128}) +launch=path-to-script-or-application ${match} + +[key-bindings] +regex-launch=[hashes] Control+Shift+q +regex-copy=[hashes] Control+Mod1+Shift+q +``` + +*launch* + Command to execute when "launching" a regex match. _${match}_ will + be replaced with the actual URL. Default: _not set_. + +*regex* + Regular expression to use when matching text. The format is + "POSIX-Extended Regular Expressions". Note that the first marked + subexpression is used as the match. In other words, if you want + the whole regex match to be used, surround all of it with + parenthesis: *(regex-pattern)*. + + Default: _not set_. + # SECTION: cursor @@ -1238,6 +1270,30 @@ e.g. *search-start=none*. jump label with a key sequence that will place the URL in the clipboard. Default: _none_. +*regex-launch* + Enter regex mode. This works exactly the same as URL mode; all + regex matches are tagged with a jump label with a key sequence + that will "launch" to match (and exit regex mode). + + The name of the regex section must be specified in the key + binding: + + ``` + [regex:hashes] + regex=([a-fA-f0-9]{7,128}) + launch=path-to-script-or-application ${match} + + [key-bindings] + regex-launch=[hashes] Control+Shift+q + regex-copy=[hashes] Control+Mod1+Shift+q + ``` + + Default: _none_. + +*regex-copy* + Same as *regex-copy*, but the match is placed in the clipboard, + instead of "launched", upon activation. Default: _none_. + *prompt-prev* Jump to the previous, currently not visible, prompt (requires shell integration, see *foot*(1)). Default: _Control+Shift+z_. diff --git a/foot.ini b/foot.ini index 17fabd3d..a1aa118c 100644 --- a/foot.ini +++ b/foot.ini @@ -69,8 +69,19 @@ # launch=xdg-open ${url} # label-letters=sadfjklewcmpgh # osc8-underline=url-mode -# protocols=http, https, ftp, ftps, file, gemini, gopher -# uri-characters=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+="'()[] +# regex=(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’])) + +# You can define your own regex's, by adding a section called +# 'regex:' with a 'regex' and 'launch' key. These can then be tied +# to a key-binding. See foot.ini(5) for details + +# [regex:your-fancy-name] +# regex= +# launch= ${match} +# +# [key-bindings] +# regex-launch=[your-fancy-name] Control+Shift+q +# regex-copy=[your-fancy-name] Control+Alt+Shift+q [cursor] # style=block diff --git a/grid.c b/grid.c index 3f5c617d..b7c0447c 100644 --- a/grid.c +++ b/grid.c @@ -36,7 +36,8 @@ grid_row_abs_to_sb(const struct grid *grid, int screen_rows, int abs_row) return rebased_row; } -int grid_row_sb_to_abs(const struct grid *grid, int screen_rows, int sb_rel_row) +int +grid_row_sb_to_abs(const struct grid *grid, int screen_rows, int sb_rel_row) { const int scrollback_start = grid->offset + screen_rows; int abs_row = sb_rel_row + scrollback_start; diff --git a/input.c b/input.c index c3ddbf13..916f30e4 100644 --- a/input.c +++ b/input.c @@ -349,9 +349,9 @@ execute_binding(struct seat *seat, struct terminal *term, action == BIND_ACTION_SHOW_URLS_LAUNCH ? URL_ACTION_LAUNCH : URL_ACTION_PERSISTENT; - urls_collect(term, url_action, &term->urls); + urls_collect(term, url_action, &term->conf->url.preg, true, &term->urls); urls_assign_key_combos(term->conf, &term->urls); - urls_render(term); + urls_render(term, &term->conf->url.launch); return true; } @@ -448,6 +448,42 @@ execute_binding(struct seat *seat, struct terminal *term, term_shutdown(term); return true; + case BIND_ACTION_REGEX_LAUNCH: + case BIND_ACTION_REGEX_COPY: + if (binding->aux->type != BINDING_AUX_REGEX) + return true; + + tll_foreach(term->conf->custom_regexes, it) { + const struct custom_regex *regex = &it->item; + + if (streq(regex->name, binding->aux->regex_name)) { + xassert(!urls_mode_is_active(term)); + + enum url_action url_action = action == BIND_ACTION_REGEX_LAUNCH + ? URL_ACTION_LAUNCH : URL_ACTION_COPY; + + if (regex->regex == NULL) { + LOG_ERR("regex:%s has no regex defined", regex->name); + return true; + } + if (url_action == URL_ACTION_LAUNCH && regex->launch.argv.args == NULL) { + LOG_ERR("regex:%s has no launch command defined", regex->name); + return true; + } + + urls_collect(term, url_action, ®ex->preg, false, &term->urls); + urls_assign_key_combos(term->conf, &term->urls); + urls_render(term, ®ex->launch); + return true; + } + } + + LOG_ERR( + "no regex section named '%s' defined in the configuration", + binding->aux->regex_name); + + return true; + case BIND_ACTION_SELECT_BEGIN: selection_start( term, seat->mouse.col, seat->mouse.row, SELECTION_CHAR_WISE, false); diff --git a/key-binding.h b/key-binding.h index f42dbc48..5f5bb9d7 100644 --- a/key-binding.h +++ b/key-binding.h @@ -41,6 +41,8 @@ enum bind_action_normal { BIND_ACTION_PROMPT_NEXT, BIND_ACTION_UNICODE_INPUT, BIND_ACTION_QUIT, + BIND_ACTION_REGEX_LAUNCH, + BIND_ACTION_REGEX_COPY, /* Mouse specific actions - i.e. they require a mouse coordinate */ BIND_ACTION_SCROLLBACK_UP_MOUSE, @@ -54,7 +56,7 @@ enum bind_action_normal { BIND_ACTION_SELECT_QUOTE, BIND_ACTION_SELECT_ROW, - BIND_ACTION_KEY_COUNT = BIND_ACTION_QUIT + 1, + BIND_ACTION_KEY_COUNT = BIND_ACTION_REGEX_COPY + 1, BIND_ACTION_COUNT = BIND_ACTION_SELECT_ROW + 1, }; diff --git a/terminal.h b/terminal.h index 813510fe..4242ed1d 100644 --- a/terminal.h +++ b/terminal.h @@ -789,6 +789,7 @@ struct terminal { bool urls_show_uri_on_jump_label; struct grid *url_grid_snapshot; bool ime_reenable_after_url_mode; + const struct config_spawn_template *url_launch; #if defined(FOOT_IME_ENABLED) && FOOT_IME_ENABLED bool ime_enabled; diff --git a/tests/test-config.c b/tests/test-config.c index 303ddd6f..c9f6586c 100644 --- a/tests/test-config.c +++ b/tests/test-config.c @@ -106,50 +106,6 @@ test_c32string(struct context *ctx, bool (*parse_fun)(struct context *ctx), } } -static void -test_protocols(struct context *ctx, bool (*parse_fun)(struct context *ctx), - const char *key, char32_t **const *ptr) -{ - ctx->key = key; - - static const struct { - const char *option_string; - int count; - const char32_t *value[2]; - bool invalid; - } input[] = { - {""}, - {"http", 1, {U"http://"}}, - {" http", 1, {U"http://"}}, - {"http, https", 2, {U"http://", U"https://"}}, - {"longprotocolislong", 1, {U"longprotocolislong://"}}, - }; - - for (size_t i = 0; i < ALEN(input); i++) { - ctx->value = input[i].option_string; - - if (input[i].invalid) { - if (parse_fun(ctx)) { - BUG("[%s].%s=%s: did not fail to parse as expected", - ctx->section, ctx->key, &ctx->value[0]); - } - } else { - if (!parse_fun(ctx)) { - BUG("[%s].%s=%s: failed to parse", - ctx->section, ctx->key, &ctx->value[0]); - } - for (int c = 0; c < input[i].count; c++) { - if (c32cmp((*ptr)[c], input[i].value[c]) != 0) { - BUG("[%s].%s=%s: set value[%d] (%ls) not the expected one (%ls)", - ctx->section, ctx->key, &ctx->value[c], c, - (const wchar_t *)(*ptr)[c], - (const wchar_t *)input[i].value[c]); - } - } - } - } -} - static void test_boolean(struct context *ctx, bool (*parse_fun)(struct context *ctx), const char *key, const bool *ptr) @@ -647,9 +603,6 @@ test_section_url(void) (int []){OSC8_UNDERLINE_URL_MODE, OSC8_UNDERLINE_ALWAYS}, (int *)&conf.url.osc8_underline); test_c32string(&ctx, &parse_section_url, "label-letters", &conf.url.label_letters); - test_protocols(&ctx, &parse_section_url, "protocols", &conf.url.protocols); - - /* TODO: uri-characters (wchar string, but sorted) */ config_free(&conf); } @@ -843,7 +796,7 @@ static void test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx), int action, int max_action, const char *const *map, struct config_key_binding_list *bindings, - enum key_binding_type type) + enum key_binding_type type, bool need_argv, bool need_section_id) { xassert(map[action] != NULL); xassert(bindings->count == 0); @@ -855,7 +808,10 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx), const bool alt = action % 3; const bool shift = action % 4; const bool super = action % 5; - const bool argv = action % 6; + const bool argv = need_argv; + const bool section_id = need_section_id; + + xassert(!(argv && section_id)); static const char *const args[] = { "command", "arg1", "arg2", "arg3 has spaces"}; @@ -894,7 +850,7 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx), xkb_keysym_get_name(sym, sym_name, sizeof(sym_name)); snprintf(value, sizeof(value), "%s%s%s", - argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : "", + argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : section_id ? "[foobar]" : "", modifier_string, sym_name); break; } @@ -903,7 +859,7 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx), const char *const button_name = button_map[button_idx].name; int chars = snprintf( value, sizeof(value), "%s%s%s", - argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : "", + argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : section_id ? "[foobar]" : "", modifier_string, button_name); xassert(click_count > 0); @@ -944,6 +900,18 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx), ctx->section, ctx->key, ctx->value, ALEN(args), binding->aux.pipe.args[ALEN(args)]); } + } else if (section_id) { + if (binding->aux.regex_name == NULL) { + BUG("[%s].%s=%s: regex name is NULL", + ctx->section, ctx->key, ctx->value); + } + + if (!streq(binding->aux.regex_name, "foobar")) { + BUG("[%s].%s=%s: regex name not the expected one: " + "expected=\"%s\", got=\"%s\"", + ctx->section, ctx->key, ctx->value, + "foobar", binding->aux.regex_name); + } } else { if (binding->aux.pipe.args != NULL) { BUG("[%s].%s=%s: pipe argv not NULL", @@ -1139,7 +1107,9 @@ test_section_key_bindings(void) test_key_binding( &ctx, &parse_section_key_bindings, action, BIND_ACTION_KEY_COUNT - 1, - binding_action_map, &conf.bindings.key, KEY_BINDING); + binding_action_map, &conf.bindings.key, KEY_BINDING, + action >= BIND_ACTION_PIPE_SCROLLBACK && action <= BIND_ACTION_PIPE_COMMAND_OUTPUT, + action >= BIND_ACTION_REGEX_LAUNCH && action <= BIND_ACTION_REGEX_COPY); } config_free(&conf); @@ -1174,7 +1144,8 @@ test_section_search_bindings(void) test_key_binding( &ctx, &parse_section_search_bindings, action, BIND_ACTION_SEARCH_COUNT - 1, - search_binding_action_map, &conf.bindings.search, KEY_BINDING); + search_binding_action_map, &conf.bindings.search, KEY_BINDING, + false, false); } config_free(&conf); @@ -1210,7 +1181,8 @@ test_section_url_bindings(void) test_key_binding( &ctx, &parse_section_url_bindings, action, BIND_ACTION_URL_COUNT - 1, - url_binding_action_map, &conf.bindings.url, KEY_BINDING); + url_binding_action_map, &conf.bindings.url, KEY_BINDING, + false, false); } config_free(&conf); @@ -1246,7 +1218,8 @@ test_section_mouse_bindings(void) test_key_binding( &ctx, &parse_section_mouse_bindings, action, BIND_ACTION_COUNT - 1, - binding_action_map, &conf.bindings.mouse, MOUSE_BINDING); + binding_action_map, &conf.bindings.mouse, MOUSE_BINDING, + false, false); } config_free(&conf); diff --git a/url-mode.c b/url-mode.c index 986860af..0101de19 100644 --- a/url-mode.c +++ b/url-mode.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -66,12 +67,13 @@ spawn_url_launcher_with_token(struct terminal *term, return false; } + xassert(term->url_launch != NULL); bool ret = false; if (spawn_expand_template( - &term->conf->url.launch, 1, - (const char *[]){"url"}, - (const char *[]){url}, + term->url_launch, 2, + (const char *[]){"url", "match"}, + (const char *[]){url, url}, &argc, &argv)) { ret = spawn( @@ -83,6 +85,8 @@ spawn_url_launcher_with_token(struct terminal *term, free(argv); } + term->url_launch = NULL; + close(dev_null); return ret; } @@ -106,6 +110,8 @@ static bool spawn_url_launcher(struct seat *seat, struct terminal *term, const char *url, uint32_t serial) { + xassert(term->url_launch != NULL); + struct spawn_activation_context *ctx = xmalloc(sizeof(*ctx)); *ctx = (struct spawn_activation_context){ .term = term, @@ -291,243 +297,135 @@ urls_input(struct seat *seat, struct terminal *term, } } -static int -c32cmp_single(const void *_a, const void *_b) -{ - const char32_t *a = _a; - const char32_t *b = _b; - return *a - *b; -} +struct vline { + char *utf8; + size_t len; /* Length of utf8[] */ + size_t sz; /* utf8[] allocated size */ + struct coord *map; /* Maps utf8[ofs] to grid coordinates */ +}; static void -auto_detected(const struct terminal *term, enum url_action action, - url_list_t *urls) +regex_detected(const struct terminal *term, enum url_action action, + const regex_t *preg, url_list_t *urls) { - const struct config *conf = term->conf; + /* + * Use regcomp()+regexec() to find patterns. + * + * Since we can't feed regexec() one character at a time, and + * since it doesn't accept wide characters, we need to build utf8 + * strings. + * + * Each string represents a logical line (i.e. handle line-wrap). + * To be able to map regex matches back to the grid, we store the + * grid coordinates of *each* character, in the line struct as + * well. This is offset based; utf8[ofs] has its grid coordinates + * in map[ofs. + */ - const char32_t *uri_characters = conf->url.uri_characters; - if (uri_characters == NULL) - return; + /* There is *at most* term->rows logical lines */ + struct vline vlines[term->rows]; + size_t vline_idx = 0; - const size_t uri_characters_count = c32len(uri_characters); - if (uri_characters_count == 0) - return; + memset(vlines, 0, sizeof(vlines)); + struct vline *vline = &vlines[vline_idx]; - size_t max_prot_len = conf->url.max_prot_len; - char32_t proto_chars[max_prot_len]; - struct coord proto_start[max_prot_len]; - size_t proto_char_count = 0; - - enum { - STATE_PROTOCOL, - STATE_URL, - } state = STATE_PROTOCOL; - - struct coord start = {-1, -1}; - char32_t url[term->cols * term->rows + 1]; - size_t len = 0; - - ssize_t parenthesis = 0; - ssize_t brackets = 0; - ssize_t ltgts = 0; + mbstate_t ps = {0}; for (int r = 0; r < term->rows; r++) { const struct row *row = grid_row_in_view(term->grid, r); for (int c = 0; c < term->cols; c++) { const struct cell *cell = &row->cells[c]; + const char32_t *wc = &cell->wc; + size_t wc_count = 1; - if (cell->wc >= CELL_SPACER) - continue; + /* Expand combining characters */ + if (wc[0] >= CELL_COMB_CHARS_LO && wc[0] <= CELL_COMB_CHARS_HI) { + const struct composed *composed = + composed_lookup(term->composed, wc[0] - CELL_COMB_CHARS_LO); + xassert(composed != NULL); - const char32_t *wcs = NULL; - size_t wc_count = 0; - - if (cell->wc >= CELL_COMB_CHARS_LO && cell->wc <= CELL_COMB_CHARS_HI) { - struct composed *composed = - composed_lookup(term->composed, cell->wc - CELL_COMB_CHARS_LO); - wcs = composed->chars; + wc = composed->chars; wc_count = composed->count; - } else { - wcs = &cell->wc; - wc_count = 1; } - for (size_t w_idx = 0; w_idx < wc_count; w_idx++) { - char32_t wc = wcs[w_idx]; + /* Convert wide character to utf8 */ + for (size_t i = 0; i < wc_count; i++) { + char buf[16]; + size_t char_len = c32rtomb(buf, wc[i], &ps); - switch (state) { - case STATE_PROTOCOL: - for (size_t i = 0; i < max_prot_len - 1; i++) { - proto_chars[i] = proto_chars[i + 1]; - proto_start[i] = proto_start[i + 1]; - } + if (char_len == (size_t)-1) + continue; - if (proto_char_count >= max_prot_len) - proto_char_count = max_prot_len - 1; + for (size_t j = 0; j < char_len; j++) { + const size_t requires_size = vline->len + char_len; - proto_chars[max_prot_len - 1] = wc; - proto_start[max_prot_len - 1] = (struct coord){c, r}; - proto_char_count++; - - for (size_t i = 0; i < conf->url.prot_count; i++) { - size_t prot_len = c32len(conf->url.protocols[i]); - - if (proto_char_count < prot_len) - continue; - - const char32_t *proto = - &proto_chars[max_prot_len - prot_len]; - - if (c32ncasecmp(conf->url.protocols[i], proto, prot_len) == - 0) { - state = STATE_URL; - start = proto_start[max_prot_len - prot_len]; - - c32ncpy(url, proto, prot_len); - len = prot_len; - - parenthesis = brackets = ltgts = 0; - break; - } - } - break; - - case STATE_URL: { - const char32_t *match = - bsearch(&wc, uri_characters, uri_characters_count, - sizeof(uri_characters[0]), &c32cmp_single); - - bool emit_url = false; - - if (match == NULL) { - /* - * Character is not a valid URI character. Emit - * the URL we've collected so far, *without* - * including _this_ character. - */ - emit_url = true; - } else { - xassert(*match == wc); - - switch (wc) { - default: - url[len++] = wc; - break; - - case U'(': - parenthesis++; - url[len++] = wc; - break; - - case U'[': - brackets++; - url[len++] = wc; - break; - - case U'<': - ltgts++; - url[len++] = wc; - break; - - case U')': - if (--parenthesis < 0) - emit_url = true; - else - url[len++] = wc; - break; - - case U']': - if (--brackets < 0) - emit_url = true; - else - url[len++] = wc; - break; - - case U'>': - if (--ltgts < 0) - emit_url = true; - else - url[len++] = wc; - break; - } - } - - if (c >= term->cols - 1 && row->linebreak) { - /* - * Endpoint is inclusive, and we'll be subtracting - * 1 from the column when emitting the URL. - */ - c++; - emit_url = true; - } - - if (emit_url) { - struct coord end = {c, r}; - - if (--end.col < 0) { - end.row--; - end.col = term->cols - 1; + /* Need to grow? Remember to save at least one byte for terminator */ + if (vline->sz == 0 || requires_size > vline->sz - 1) { + const size_t new_size = requires_size * 2; + vline->utf8 = xreallocarray(vline->utf8, new_size, 1); + vline->map = xreallocarray(vline->map, new_size, sizeof(vline->map[0])); + vline->sz = new_size; } - /* Heuristic to remove trailing characters that - * are valid URL characters, but typically not at - * the end of the URL */ - bool done = false; - do { - switch (url[len - 1]) { - case U'.': - case U',': - case U':': - case U';': - case U'?': - case U'!': - case U'"': - case U'\'': - case U'%': - len--; - end.col--; - if (end.col < 0) { - end.row--; - end.col = term->cols - 1; - } - break; - - default: - done = true; - break; - } - } while (!done); - - url[len] = U'\0'; - - start.row += term->grid->view; - end.row += term->grid->view; - - char *url_utf8 = ac32tombs(url); - if (url_utf8 != NULL) { - tll_push_back( - *urls, - ((struct url){.id = (uint64_t)rand() << 32 | rand(), - .url = url_utf8, - .range = - { - .start = start, - .end = end, - }, - .action = action, - .osc8 = false})); - } - - state = STATE_PROTOCOL; - len = 0; - parenthesis = brackets = ltgts = 0; - } - break; - } + vline->utf8[vline->len + j] = buf[j]; + vline->map[vline->len + j] = (struct coord){c, term->grid->view + r}; } + + vline->len += char_len; } } + + if (row->linebreak) { + if (vline->len > 0) { + vline->utf8[vline->len++] = '\0'; + ps = (mbstate_t){0}; + + vline_idx++; + vline = &vlines[vline_idx]; + } + } + } + + for (size_t i = 0; i < ALEN(vlines); i++) { + const struct vline *v = &vlines[i]; + if (v->utf8 == NULL) + continue;; + + const char *search_string = v->utf8; + while (true) { + regmatch_t matches[preg->re_nsub + 1]; + int r = regexec(preg, search_string, preg->re_nsub + 1, matches, 0); + + if (r == REG_NOMATCH) + break; + + const size_t mlen = matches[1].rm_eo - matches[1].rm_so; + const size_t start = &search_string[matches[1].rm_so] - v->utf8; + const size_t end = start + mlen; + + LOG_DBG( + "regex match at row %d: %.*srow/col = %dx%d", + matches[1].rm_so, (int)mlen, &search_string[matches[1].rm_so], + v->map[start].row, v->map[start].col); + + tll_push_back( + *urls, + ((struct url){ + .id = (uint64_t)rand() << 32 | rand(), + .url = xstrndup(&v->utf8[start], mlen), + .range = { + .start = v->map[start], + .end = v->map[end - 1], /* Inclusive */ + }, + .action = action, + .osc8 = false})); + + search_string += matches[0].rm_eo; + } + + free(v->utf8); + free(v->map); } } @@ -629,11 +527,13 @@ remove_overlapping(url_list_t *urls, int cols) } void -urls_collect(const struct terminal *term, enum url_action action, url_list_t *urls) +urls_collect(const struct terminal *term, enum url_action action, + const regex_t *preg, bool osc8, url_list_t *urls) { xassert(tll_length(term->urls) == 0); - osc8_uris(term, action, urls); - auto_detected(term, action, urls); + if (osc8) + osc8_uris(term, action, urls); + regex_detected(term, action, preg, urls); remove_overlapping(urls, term->grid->num_cols); } @@ -816,7 +716,7 @@ tag_cells_for_url(struct terminal *term, const struct url *url, bool value) } void -urls_render(struct terminal *term) +urls_render(struct terminal *term, const struct config_spawn_template *launch) { struct wl_window *win = term->window; @@ -851,6 +751,9 @@ urls_render(struct terminal *term) /* Snapshot the current grid */ term->url_grid_snapshot = grid_snapshot(term->grid); + /* Remember which launcher to use */ + term->url_launch = launch; + xassert(tll_length(win->urls) == 0); tll_foreach(win->term->urls, it) { struct wl_url url = {.url = &it->item}; diff --git a/url-mode.h b/url-mode.h index eefe07c0..758cd92f 100644 --- a/url-mode.h +++ b/url-mode.h @@ -14,10 +14,11 @@ static inline bool urls_mode_is_active(const struct terminal *term) } void urls_collect( - const struct terminal *term, enum url_action action, url_list_t *urls); + const struct terminal *term, enum url_action action, const regex_t *preg, + bool osc8, url_list_t *urls); void urls_assign_key_combos(const struct config *conf, url_list_t *urls); -void urls_render(struct terminal *term); +void urls_render(struct terminal *term, const struct config_spawn_template *launch); void urls_reset(struct terminal *term); void urls_input(struct seat *seat, struct terminal *term, diff --git a/xmalloc.c b/xmalloc.c index ded7f4e3..ccfb5c48 100644 --- a/xmalloc.c +++ b/xmalloc.c @@ -32,8 +32,17 @@ xcalloc(size_t nmemb, size_t size) void * xrealloc(void *ptr, size_t size) { + xassert(size != 0); void *alloc = realloc(ptr, size); - return unlikely(size == 0) ? alloc : check_alloc(alloc); + return check_alloc(alloc); +} + +void * +xreallocarray(void *ptr, size_t n, size_t size) +{ + xassert(n != 0 && size != 0); + void *alloc = reallocarray(ptr, n, size); + return check_alloc(alloc); } char * diff --git a/xmalloc.h b/xmalloc.h index 8a2c208f..03e6eb0d 100644 --- a/xmalloc.h +++ b/xmalloc.h @@ -12,6 +12,7 @@ void *xmalloc(size_t size) XMALLOC; void *xcalloc(size_t nmemb, size_t size) XMALLOC; void *xrealloc(void *ptr, size_t size); +void *xreallocarray(void *ptr, size_t n, size_t size); char *xstrdup(const char *str) XSTRDUP; char *xstrndup(const char *str, size_t n) XSTRDUP; char *xasprintf(const char *format, ...) PRINTF(1) XMALLOC;