Merge branch 'regex-mode'

Closes #1386 Closes #1872
2026-02-06 04:06:06 -05:00 · 2025-02-05 13:47:06 +01:00 · 2025-02-05 13:47:06 +01:00 · dd01783f88
commit dd01783f88
parent 9443ac7e29 9d8021de47
14 changed files with 609 additions and 429 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -65,6 +65,11 @@
 * Support for the new Wayland protocol `xdg-system-bell-v1` protocol
  (added in wayland-protocols 1.38), via the new config option
  `bell.system=no|yes` (defaults to `yes`).
+* Added support for custom regex matching ([#1386][1386],
+  [#1872][1872])
+
+[1386]: https://codeberg.org/dnkl/foot/issues/1386
+[1872]: https://codeberg.org/dnkl/foot/issues/1872


 ### Changed
@ -72,12 +77,19 @@
 * Do not try to set a zero width, or height, if the compositor sends a
  _configure_ event with only one dimension being zero
  ([#1925][1925]).
+* Auto-detection of URLs (i.e. not OSC-8 based URLs) are now regex
+  based.

 [1925]: https://codeberg.org/dnkl/foot/issues/1925


 ### Deprecated
 ### Removed
+
+* `url.uri-characters` and `url.protocols`. Both options have been
+  replaced by `url.regex`.
+
+
 ### Fixed

 * Kitty keyboard protocol: alternate key reporting failing to report
--- a/config.c
+++ b/config.c
@ -140,6 +140,8 @@ static const char *const binding_action_map[] = {
    [BIND_ACTION_PROMPT_NEXT] = "prompt-next",
    [BIND_ACTION_UNICODE_INPUT] = "unicode-input",
    [BIND_ACTION_QUIT] = "quit",
+    [BIND_ACTION_REGEX_LAUNCH] = "regex-launch",
+    [BIND_ACTION_REGEX_COPY] = "regex-copy",

    /* Mouse-specific actions */
    [BIND_ACTION_SCROLLBACK_UP_MOUSE] = "scrollback-up-mouse",
@ -207,6 +209,7 @@ static_assert(ALEN(url_binding_action_map) == BIND_ACTION_URL_COUNT,
 struct context {
    struct config *conf;
    const char *section;
+    const char *section_suffix;
    const char *key;
    const char *value;

@ -257,8 +260,9 @@ log_contextual(struct context *ctx, enum log_class log_class,
    char *formatted_msg = xvasprintf(fmt, va);
    va_end(va);

-    bool print_dot = ctx->key != NULL;
-    bool print_colon = ctx->value != NULL;
+    const bool print_dot = ctx->key != NULL;
+    const bool print_colon = ctx->value != NULL;
+    const bool print_section_suffix = ctx->section_suffix != NULL;

    if (!print_dot)
        ctx->key = "";
@ -266,10 +270,15 @@ log_contextual(struct context *ctx, enum log_class log_class,
    if (!print_colon)
        ctx->value = "";

+    if (!print_section_suffix)
+        ctx->section_suffix = "";
+
    log_and_notify(
-        ctx->conf, log_class, file, lineno, "%s:%d: [%s]%s%s%s%s: %s",
-        ctx->path, ctx->lineno, ctx->section, print_dot ? "." : "",
-        ctx->key, print_colon ? ": " : "", ctx->value, formatted_msg);
+        ctx->conf, log_class, file, lineno, "%s:%d: [%s%s%s]%s%s%s%s: %s",
+        ctx->path, ctx->lineno, ctx->section,
+        print_section_suffix ? ":" : "", ctx->section_suffix,
+        print_dot ? "." : "", ctx->key, print_colon ? ": " : "",
+        ctx->value, formatted_msg);
    free(formatted_msg);
 }

@ -420,14 +429,6 @@ done:
    return ret;
 }

-static int
-c32cmp_single(const void *_a, const void *_b)
-{
-    const char32_t *a = _a;
-    const char32_t *b = _b;
-    return *a - *b;
-}
-
 static bool
 str_has_prefix(const char *str, const char *prefix)
 {
@ -1225,7 +1226,6 @@ parse_section_url(struct context *ctx)
 {
    struct config *conf = ctx->conf;
    const char *key = ctx->key;
-    const char *value = ctx->value;

    if (streq(key, "launch"))
        return value_to_spawn_template(ctx, &conf->url.launch);
@ -1243,67 +1243,102 @@ parse_section_url(struct context *ctx)
            (int *)&conf->url.osc8_underline);
    }

-    else if (streq(key, "protocols")) {
-        for (size_t i = 0; i < conf->url.prot_count; i++)
-            free(conf->url.protocols[i]);
-        free(conf->url.protocols);
+    else if (streq(key, "regex")) {
+        const char *regex = ctx->value;
+        regex_t preg;

-        conf->url.max_prot_len = 0;
-        conf->url.prot_count = 0;
-        conf->url.protocols = NULL;
+        int r = regcomp(&preg, regex, REG_EXTENDED);

-        char *copy = xstrdup(value);
-
-        for (char *prot = strtok(copy, ",");
-             prot != NULL;
-             prot = strtok(NULL, ","))
-        {
-
-            /* Strip leading whitespace */
-            while (isspace(prot[0]))
-                prot++;
-
-            /* Strip trailing whitespace */
-            size_t len = strlen(prot);
-            while (isspace(prot[len - 1]))
-                len--;
-            prot[len] = '\0';
-
-            size_t chars = mbsntoc32(NULL, prot, len, 0);
-            if (chars == (size_t)-1) {
-                ctx->value = prot;
-                LOG_CONTEXTUAL_ERRNO("invalid protocol");
-                return false;
-            }
-
-            conf->url.prot_count++;
-            conf->url.protocols = xrealloc(
-                conf->url.protocols,
-                conf->url.prot_count * sizeof(conf->url.protocols[0]));
-
-            size_t idx = conf->url.prot_count - 1;
-            conf->url.protocols[idx] = xmalloc((chars + 1 + 3) * sizeof(char32_t));
-            mbsntoc32(conf->url.protocols[idx], prot, len, chars + 1);
-            c32cpy(&conf->url.protocols[idx][chars], U"://");
-
-            chars += 3;  /* Include the "://" */
-            if (chars > conf->url.max_prot_len)
-                conf->url.max_prot_len = chars;
+        if (r != 0) {
+            char err_buf[128];
+            regerror(r, &preg, err_buf, sizeof(err_buf));
+            LOG_CONTEXTUAL_ERR("invalid regex: %s", err_buf);
+            return false;
        }

-        free(copy);
+        if (preg.re_nsub == 0) {
+            LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)");
+            regfree(&preg);
+            return false;
+        }
+
+        regfree(&conf->url.preg);
+        free(conf->url.regex);
+
+        conf->url.regex = xstrdup(regex);
+        conf->url.preg = preg;
        return true;
    }

-    else if (streq(key, "uri-characters")) {
-        if (!value_to_wchars(ctx, &conf->url.uri_characters))
+    else {
+        LOG_CONTEXTUAL_ERR("not a valid option: %s", key);
+        return false;
+    }
+}
+
+static bool
+parse_section_regex(struct context *ctx)
+{
+    struct config *conf = ctx->conf;
+    const char *key = ctx->key;
+
+    const char *regex_name =
+        ctx->section_suffix != NULL ? ctx->section_suffix : "";
+
+    struct custom_regex *regex = NULL;
+    tll_foreach(conf->custom_regexes, it) {
+        if (streq(it->item.name, regex_name)) {
+            regex = &it->item;
+            break;
+        }
+    }
+
+    if (streq(key, "regex")) {
+        const char *regex_string = ctx->value;
+        regex_t preg;
+
+        int r = regcomp(&preg, regex_string, REG_EXTENDED);
+
+        if (r != 0) {
+            char err_buf[128];
+            regerror(r, &preg, err_buf, sizeof(err_buf));
+            LOG_CONTEXTUAL_ERR("invalid regex: %s", err_buf);
+            return false;
+        }
+
+        if (preg.re_nsub == 0) {
+            LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)");
+            regfree(&preg);
+            return false;
+        }
+
+        if (regex == NULL) {
+            tll_push_back(conf->custom_regexes,
+                          ((struct custom_regex){.name = xstrdup(regex_name)}));
+            regex = &tll_back(conf->custom_regexes);
+        }
+
+        regfree(&regex->preg);
+        free(regex->regex);
+
+        regex->regex = xstrdup(regex_string);
+        regex->preg = preg;
+        return true;
+    }
+
+    else if (streq(key, "launch")) {
+        struct config_spawn_template launch;
+        if (!value_to_spawn_template(ctx, &launch))
            return false;

-        qsort(
-            conf->url.uri_characters,
-            c32len(conf->url.uri_characters),
-            sizeof(conf->url.uri_characters[0]),
-            &c32cmp_single);
+        if (regex == NULL) {
+            tll_push_back(conf->custom_regexes,
+                          ((struct custom_regex){.name = xstrdup(regex_name)}));
+            regex = &tll_back(conf->custom_regexes);
+        }
+
+        spawn_template_free(&regex->launch);
+        regex->launch = launch;
        return true;
    }

@ -1654,6 +1689,7 @@ free_binding_aux(struct binding_aux *aux)
    case BINDING_AUX_NONE: break;
    case BINDING_AUX_PIPE: free_argv(&aux->pipe); break;
    case BINDING_AUX_TEXT: free(aux->text.data); break;
+    case BINDING_AUX_REGEX: free(aux->regex_name); break;
    }
 }

@ -1743,7 +1779,10 @@ binding_aux_equal(const struct binding_aux *a,

    case BINDING_AUX_TEXT:
        return a->text.len == b->text.len &&
-            memcmp(a->text.data, b->text.data, a->text.len) == 0;
+               memcmp(a->text.data, b->text.data, a->text.len) == 0;
+
+    case BINDING_AUX_REGEX:
+        return streq(a->regex_name, b->regex_name);
    }

    BUG("invalid AUX type: %d", a->type);
@ -2017,19 +2056,23 @@ modifiers_disjoint(const config_modifier_list_t *mods1,
 }

 static char * NOINLINE
-modifiers_to_str(const config_modifier_list_t *mods)
+modifiers_to_str(const config_modifier_list_t *mods, bool strip_last_plus)
 {
-    size_t len = tll_length(*mods);  /* '+' , and NULL terminator */
+    size_t len = tll_length(*mods);  /* '+' separator */
    tll_foreach(*mods, it)
        len += strlen(it->item);

-    char *ret = xmalloc(len);
+    char *ret = xmalloc(len + 1);
    size_t idx = 0;
    tll_foreach(*mods, it) {
        idx += snprintf(&ret[idx], len - idx, "%s", it->item);
        ret[idx++] = '+';
    }
-    ret[--idx] = '\0';
+
+    if (strip_last_plus)
+        idx--;
+
+    ret[idx] = '\0';
    return ret;
 }

@ -2088,21 +2131,40 @@ pipe_argv_from_value(struct context *ctx, struct argv *argv)
    return remove_len;
 }

+static ssize_t NOINLINE
+regex_name_from_value(struct context *ctx, char **regex_name)
+{
+    *regex_name = NULL;
+
+    if (ctx->value[0] != '[')
+        return 0;
+
+    const char *regex_end = strrchr(ctx->value, ']');
+    if (regex_end == NULL) {
+        LOG_CONTEXTUAL_ERR("unclosed '['");
+        return -1;
+    }
+
+    size_t regex_len = regex_end - ctx->value - 1;
+    *regex_name = xstrndup(&ctx->value[1], regex_len);
+
+    ssize_t remove_len = regex_end + 1 - ctx->value;
+    ctx->value = regex_end + 1;
+    while (isspace(*ctx->value)) {
+        ctx->value++;
+        remove_len++;
+    }
+
+    return remove_len;
+}
+
+
 static bool NOINLINE
 parse_key_binding_section(struct context *ctx,
                          int action_count,
                          const char *const action_map[static action_count],
                          struct config_key_binding_list *bindings)
 {
-    struct binding_aux aux;
-
-    ssize_t pipe_remove_len = pipe_argv_from_value(ctx, &aux.pipe);
-    if (pipe_remove_len < 0)
-        return false;
-
-    aux.type = pipe_remove_len == 0 ? BINDING_AUX_NONE : BINDING_AUX_PIPE;
-    aux.master_copy = true;
-
    for (int action = 0; action < action_count; action++) {
        if (action_map[action] == NULL)
            continue;
@ -2110,6 +2172,33 @@ parse_key_binding_section(struct context *ctx,
        if (!streq(ctx->key, action_map[action]))
            continue;

+        struct binding_aux aux = {.type = BINDING_AUX_NONE, .master_copy = true};
+
+        /* TODO: this is ugly... */
+        if (action_map == binding_action_map &&
+            action >= BIND_ACTION_PIPE_SCROLLBACK &&
+            action <= BIND_ACTION_PIPE_COMMAND_OUTPUT)
+        {
+            ssize_t pipe_remove_len = pipe_argv_from_value(ctx, &aux.pipe);
+            if (pipe_remove_len <= 0)
+                return false;
+
+            aux.type = BINDING_AUX_PIPE;
+            aux.master_copy = true;
+        } else if (action_map == binding_action_map &&
+                   action >= BIND_ACTION_REGEX_LAUNCH &&
+                   action <= BIND_ACTION_REGEX_COPY)
+        {
+            char *regex_name = NULL;
+            ssize_t regex_remove_len = regex_name_from_value(ctx, &regex_name);
+            if (regex_remove_len <= 0)
+                return false;
+
+            aux.type = BINDING_AUX_REGEX;
+            aux.master_copy = true;
+            aux.regex_name = regex_name;
+        }
+
        if (!value_to_key_combos(ctx, action, &aux, bindings, KEY_BINDING)) {
            free_binding_aux(&aux);
            return false;
@ -2119,7 +2208,6 @@ parse_key_binding_section(struct context *ctx,
    }

    LOG_CONTEXTUAL_ERR("not a valid action: %s", ctx->key);
-    free_binding_aux(&aux);
    return false;
 }

@ -2317,7 +2405,7 @@ resolve_key_binding_collisions(struct config *conf, const char *section_name,
        }

        if (collision_type != COLLISION_NONE) {
-            char *modifier_names = modifiers_to_str(mods1);
+            char *modifier_names = modifiers_to_str(mods1, false);
            char sym_name[64];

            switch (type){
@ -2359,7 +2447,7 @@ resolve_key_binding_collisions(struct config *conf, const char *section_name,

            case COLLISION_OVERRIDE: {
                char *override_names = modifiers_to_str(
-                    &conf->mouse.selection_override_modifiers);
+                    &conf->mouse.selection_override_modifiers, true);

                if (override_names[0] != '\0')
                    override_names[strlen(override_names) - 1] = '\0';
@ -2698,7 +2786,7 @@ parse_section_touch(struct context *ctx) {
 }

 static bool
-parse_key_value(char *kv, const char **section, const char **key, const char **value)
+parse_key_value(char *kv, char **section, const char **key, const char **value)
 {
    bool section_is_needed = section != NULL;

@ -2767,6 +2855,7 @@ enum section {
    SECTION_DESKTOP_NOTIFICATIONS,
    SECTION_SCROLLBACK,
    SECTION_URL,
+    SECTION_REGEX,
    SECTION_COLORS,
    SECTION_CURSOR,
    SECTION_MOUSE,
@ -2788,6 +2877,7 @@ typedef bool (*parser_fun_t)(struct context *ctx);
 static const struct {
    parser_fun_t fun;
    const char *name;
+    bool allow_colon_suffix;
 } section_info[] = {
    [SECTION_MAIN] =            {&parse_section_main, "main"},
    [SECTION_SECURITY] =        {&parse_section_security, "security"},
@ -2795,6 +2885,7 @@ static const struct {
    [SECTION_DESKTOP_NOTIFICATIONS] = {&parse_section_desktop_notifications, "desktop-notifications"},
    [SECTION_SCROLLBACK] =      {&parse_section_scrollback, "scrollback"},
    [SECTION_URL] =             {&parse_section_url, "url"},
+    [SECTION_REGEX] =           {&parse_section_regex, "regex", true},
    [SECTION_COLORS] =          {&parse_section_colors, "colors"},
    [SECTION_CURSOR] =          {&parse_section_cursor, "cursor"},
    [SECTION_MOUSE] =           {&parse_section_mouse, "mouse"},
@ -2812,11 +2903,29 @@ static const struct {
 static_assert(ALEN(section_info) == SECTION_COUNT, "section info array size mismatch");

 static enum section
-str_to_section(const char *str)
+str_to_section(char *str, char **suffix)
 {
+    *suffix = NULL;
+
    for (enum section section = SECTION_MAIN; section < SECTION_COUNT; ++section) {
-        if (streq(str, section_info[section].name))
+        const char *name = section_info[section].name;
+
+        if (streq(str, name))
            return section;
+
+        else if (section_info[section].allow_colon_suffix) {
+            const size_t str_len = strlen(str);
+            const size_t name_len = strlen(name);
+
+            /* At least "section:" chars? */
+            if (str_len > name_len + 1) {
+                if (strncmp(str, name, name_len) == 0 && str[name_len] == ':') {
+                    str[name_len] = '\0';
+                    *suffix = &str[name_len + 1];
+                    return section;
+                }
+            }
+        }
    }
    return SECTION_COUNT;
 }
@ -2840,10 +2949,12 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar
    }

    char *section_name = xstrdup("main");
+    char *section_suffix = NULL;

    struct context context = {
        .conf = conf,
        .section = section_name,
+        .section_suffix = section_suffix,
        .path = path,
        .lineno = 0,
        .errors_are_fatal = errors_are_fatal,
@ -2924,7 +3035,8 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar
                error_or_continue();
            }

-            section = str_to_section(key_value);
+            char *maybe_section_suffix;
+            section = str_to_section(key_value, &maybe_section_suffix);
            if (section == SECTION_COUNT) {
                context.section = key_value;
                LOG_CONTEXTUAL_ERR("invalid section name: %s", key_value);
@ -2933,8 +3045,11 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar
            }

            free(section_name);
+            free(section_suffix);
            section_name = xstrdup(key_value);
+            section_suffix = maybe_section_suffix != NULL ? xstrdup(maybe_section_suffix) : NULL;
            context.section = section_name;
+            context.section_suffix = section_suffix;

            /* Process next line */
            continue;
@ -2974,6 +3089,7 @@ parse_config_file(FILE *f, struct config *conf, const char *path, bool errors_ar

 done:
    free(section_name);
+    free(section_suffix);
    free(_line);
    return ret;
 }
@ -3068,7 +3184,6 @@ add_default_search_bindings(struct config *conf)
        {BIND_ACTION_SEARCH_DELETE_NEXT_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_Delete}}},
        {BIND_ACTION_SEARCH_DELETE_NEXT_WORD, m(XKB_MOD_NAME_ALT), {{XKB_KEY_d}}},
        {BIND_ACTION_SEARCH_EXTEND_CHAR, m(XKB_MOD_NAME_SHIFT), {{XKB_KEY_Right}}},
-        {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_w}}},
        {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL "+" XKB_MOD_NAME_SHIFT), {{XKB_KEY_Right}}},
        {BIND_ACTION_SEARCH_EXTEND_WORD, m(XKB_MOD_NAME_CTRL), {{XKB_KEY_w}}},
        {BIND_ACTION_SEARCH_EXTEND_WORD_WS, m(XKB_MOD_NAME_CTRL "+" XKB_MOD_NAME_SHIFT), {{XKB_KEY_w}}},
@ -3196,9 +3311,9 @@ config_load(struct config *conf, const char *conf_path,
        },
        .url = {
            .label_letters = xc32dup(U"sadfjklewcmpgh"),
-            .uri_characters = xc32dup(U"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+=\"'()[]"),
            .osc8_underline = OSC8_UNDERLINE_URL_MODE,
        },
+        .custom_regexes = tll_init(),
        .can_shape_grapheme = fcft_caps & FCFT_CAPABILITY_GRAPHEME_SHAPING,
        .scrollback = {
            .lines = 1000,
@ -3315,35 +3430,47 @@ config_load(struct config *conf, const char *conf_path,
    tokenize_cmdline("--action ${action-name}=${action-label}", &conf->desktop_notifications.command_action_arg.argv.args);
    tokenize_cmdline("xdg-open ${url}", &conf->url.launch.argv.args);

-    static const char32_t *url_protocols[] = {
-        U"http://",
-        U"https://",
-        U"ftp://",
-        U"ftps://",
-        U"file://",
-        U"gemini://",
-        U"gopher://",
-        U"irc://",
-        U"ircs://",
-    };
-    conf->url.protocols = xmalloc(
-        ALEN(url_protocols) * sizeof(conf->url.protocols[0]));
-    conf->url.prot_count = ALEN(url_protocols);
-    conf->url.max_prot_len = 0;
+    {
+        /*
+         * Based on https://gist.github.com/gruber/249502, but modified:
+         *  - Do not allow {} at all
+         *  - Do allow matched []
+         */
+        const char *url_regex_string =
+            "("
+                "("
+                    "[a-z][[:alnum:]-]+:"       // protocol
+                    "("
+                        "/{1,3}|[a-z0-9%]"     // slashes (what's the OR part for?)
+                    ")"
+                    "|"
+                    "www[:digit:]{0,3}[.]"
+                    //"|"
+                    //"[a-z0-9.\\-]+[.][a-z]{2,4}/"  /* "looks like domain name followed by a slash" - remove? */
+                ")"
+                "("
+                    "[^[:space:](){}<>]+"
+                    "|"
+                    "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
+                    "|"
+                    "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
+                ")+"
+                "("
+                    "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
+                    "|"
+                    "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
+                    "|"
+                    "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
+                ")"
+            ")"
+        ;

-    for (size_t i = 0; i < ALEN(url_protocols); i++) {
-        size_t len = c32len(url_protocols[i]);
-        if (len > conf->url.max_prot_len)
-            conf->url.max_prot_len = len;
-        conf->url.protocols[i] = xc32dup(url_protocols[i]);
+        int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED);
+        xassert(r == 0);
+        conf->url.regex = xstrdup(url_regex_string);
+        xassert(conf->url.preg.re_nsub >= 1);
    }

-    qsort(
-        conf->url.uri_characters,
-        c32len(conf->url.uri_characters),
-        sizeof(conf->url.uri_characters[0]),
-        &c32cmp_single);
-
    tll_foreach(*initial_user_notifications, it) {
        tll_push_back(conf->notifications, it->item);
        tll_remove(*initial_user_notifications, it);
@ -3430,6 +3557,8 @@ bool
 config_override_apply(struct config *conf, config_override_t *overrides,
                      bool errors_are_fatal)
 {
+    char *section_name = NULL;
+
    struct context context = {
        .conf = conf,
        .path = "override",
@ -3441,8 +3570,7 @@ config_override_apply(struct config *conf, config_override_t *overrides,
    tll_foreach(*overrides, it) {
        context.lineno++;

-        if (!parse_key_value(
-                it->item, &context.section, &context.key, &context.value))
+        if (!parse_key_value(it->item, &section_name, &context.key, &context.value))
        {
            LOG_CONTEXTUAL_ERR("syntax error: key/value pair has no %s",
                               context.key == NULL ? "key" : "value");
@ -3451,20 +3579,26 @@ config_override_apply(struct config *conf, config_override_t *overrides,
            continue;
        }

-        if (context.section[0] == '\0') {
+        if (section_name[0] == '\0') {
            LOG_CONTEXTUAL_ERR("empty section name");
            if (errors_are_fatal)
                return false;
            continue;
        }

-        enum section section = str_to_section(context.section);
+        char *maybe_section_suffix = NULL;
+        enum section section = str_to_section(section_name, &maybe_section_suffix);
+
+        context.section = section_name;
+        context.section_suffix = maybe_section_suffix;
+
        if (section == SECTION_COUNT) {
-            LOG_CONTEXTUAL_ERR("invalid section name: %s", context.section);
+            LOG_CONTEXTUAL_ERR("invalid section name: %s", section_name);
            if (errors_are_fatal)
                return false;
            continue;
        }
+
        parser_fun_t section_parser = section_info[section].fun;
        xassert(section_parser != NULL);

@ -3500,6 +3634,7 @@ key_binding_list_clone(struct config_key_binding_list *dst,
    struct argv *last_master_argv = NULL;
    uint8_t *last_master_text_data = NULL;
    size_t last_master_text_len = 0;
+    char *last_master_regex_name = NULL;

    dst->count = src->count;
    dst->arr = xmalloc(src->count * sizeof(dst->arr[0]));
@ -3547,6 +3682,16 @@ key_binding_list_clone(struct config_key_binding_list *dst,
            }
            last_master_argv = NULL;
            break;
+
+        case BINDING_AUX_REGEX:
+            if (old->aux.master_copy) {
+                new->aux.regex_name = xstrdup(old->aux.regex_name);
+                last_master_regex_name = new->aux.regex_name;
+            } else {
+                xassert(last_master_regex_name != NULL);
+                new->aux.regex_name = last_master_regex_name;
+            }
+            break;
        }
    }
 }
@ -3577,12 +3722,23 @@ config_clone(const struct config *old)
    config_font_list_clone(&conf->csd.font, &old->csd.font);

    conf->url.label_letters = xc32dup(old->url.label_letters);
-    conf->url.uri_characters = xc32dup(old->url.uri_characters);
    spawn_template_clone(&conf->url.launch, &old->url.launch);
-    conf->url.protocols = xmalloc(
-        old->url.prot_count * sizeof(conf->url.protocols[0]));
-    for (size_t i = 0; i < old->url.prot_count; i++)
-        conf->url.protocols[i] = xc32dup(old->url.protocols[i]);
+    conf->url.regex = xstrdup(old->url.regex);
+    regcomp(&conf->url.preg, conf->url.regex, REG_EXTENDED);
+
+    memset(&conf->custom_regexes, 0, sizeof(conf->custom_regexes));
+    tll_foreach(old->custom_regexes, it) {
+        const struct custom_regex *old_regex = &it->item;
+
+        tll_push_back(conf->custom_regexes,
+                      ((struct custom_regex){.name = xstrdup(old_regex->name),
+                                             .regex = xstrdup(old_regex->regex)}));
+
+
+        struct custom_regex *new_regex = &tll_back(conf->custom_regexes);
+        regcomp(&new_regex->preg, new_regex->regex, REG_EXTENDED);
+        spawn_template_clone(&new_regex->launch, &old_regex->launch);
+    }

    key_binding_list_clone(&conf->bindings.key, &old->bindings.key);
    key_binding_list_clone(&conf->bindings.search, &old->bindings.search);
@ -3663,10 +3819,17 @@ config_free(struct config *conf)

    free(conf->url.label_letters);
    spawn_template_free(&conf->url.launch);
-    for (size_t i = 0; i < conf->url.prot_count; i++)
-        free(conf->url.protocols[i]);
-    free(conf->url.protocols);
-    free(conf->url.uri_characters);
+    regfree(&conf->url.preg);
+    free(conf->url.regex);
+
+    tll_foreach(conf->custom_regexes, it) {
+        struct custom_regex *regex = &it->item;
+        free(regex->name);
+        free(regex->regex);
+        regfree(&regex->preg);
+        spawn_template_free(&regex->launch);
+        tll_remove(conf->custom_regexes, it);
+    }

    free_key_binding_list(&conf->bindings.key);
    free_key_binding_list(&conf->bindings.search);
--- a/config.h
+++ b/config.h
@ -1,7 +1,8 @@
 #pragma once

-#include <stdint.h>
+#include <regex.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <uchar.h>

 #include <xkbcommon/xkbcommon.h>
@ -60,6 +61,7 @@ enum binding_aux_type {
    BINDING_AUX_NONE,
    BINDING_AUX_PIPE,
    BINDING_AUX_TEXT,
+    BINDING_AUX_REGEX,
 };

 struct binding_aux {
@ -73,6 +75,8 @@ struct binding_aux {
            uint8_t *data;
            size_t len;
        } text;
+
+        char *regex_name;
    };
 };

@ -120,6 +124,13 @@ struct env_var {
 };
 typedef tll(struct env_var) env_var_list_t;

+struct custom_regex {
+    char *name;
+    char *regex;
+    regex_t preg;
+    struct config_spawn_template launch;
+};
+
 struct config {
    char *term;
    char *shell;
@ -220,12 +231,12 @@ struct config {
            OSC8_UNDERLINE_ALWAYS,
        } osc8_underline;

-        char32_t **protocols;
-        char32_t *uri_characters;
-        size_t prot_count;
-        size_t max_prot_len;
+        char *regex;
+        regex_t preg;
    } url;

+    tll(struct custom_regex) custom_regexes;
+
    struct {
        uint32_t fg;
        uint32_t bg;
--- a/doc/foot.ini.5.scd
+++ b/doc/foot.ini.5.scd
@ -755,6 +755,9 @@ xdgtoken=95ebdfe56e4f47ddb5bba9d7dc3a2c35

 # SECTION: url

+Note that you can also add custom regular expressions, see the 'regex'
+section.
+
 *launch*
 	Command to execute when opening URLs. _${url}_ will be replaced
 	with the actual URL. Default: _xdg-open ${url}_.
@ -782,19 +785,48 @@ xdgtoken=95ebdfe56e4f47ddb5bba9d7dc3a2c35
 	
 	Default: _sadfjklewcmpgh_.

-*protocols*
-	Comma separated list of protocols (schemes) that should be
-	recognized in URL mode. Note that only auto-detected URLs are
-	affected by this option. OSC-8 URLs are always enabled, regardless
-	of protocol. Default: _http, https, ftp, ftps, file, gemini,
-	gopher, irc, ircs_.
-
-*uri-characters*
-	Set of characters allowed in auto-detected URLs. Any character not
-	included in this set constitutes a URL delimiter.
+*regex*
+	Regular expression to use when auto-detecting URLs. The format is
+	"POSIX-Extended Regular Expressions". Note that the first marked
+	subexpression is used as the URL. In other words, if you want the
+	whole regex match to be used as an URL, surround all of it with
+	parenthesis: *(regex-pattern)*.
 	
-	Default:
-	_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-\_.,~:;/?#@!$&%\*+="'()[]_
+	Default: _(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))_
+
+# SECTION: regex
+
+Similar to the 'url' mode, but with custom defined regular expressions
+(and launchers).
+
+To use a custom defined regular expression, you also need to add a key
+binding for it. This is done in the *key-binding* section, see below
+for details. For example, a regex to detect hash digests (e.g. git
+commit hashes) could look like:
+
+```
+[regex:hashes]
+regex=([a-fA-f0-9]{7,128})
+launch=path-to-script-or-application ${match}
+
+[key-bindings]
+regex-launch=[hashes] Control+Shift+q
+regex-copy=[hashes] Control+Mod1+Shift+q
+```
+
+*launch*
+	Command to execute when "launching" a regex match. _${match}_ will
+	be replaced with the actual URL. Default: _not set_.
+
+*regex*
+	Regular expression to use when matching text. The format is
+	"POSIX-Extended Regular Expressions". Note that the first marked
+	subexpression is used as the match. In other words, if you want
+	the whole regex match to be used, surround all of it with
+	parenthesis: *(regex-pattern)*.
+	
+	Default: _not set_.
+

 # SECTION: cursor

@ -1238,6 +1270,30 @@ e.g. *search-start=none*.
 	jump label with a key sequence that will place the URL in the
 	clipboard. Default: _none_.

+*regex-launch*
+	Enter regex mode. This works exactly the same as URL mode; all
+	regex matches are tagged with a jump label with a key sequence
+	that will "launch" to match (and exit regex mode).
+	
+	The name of the regex section must be specified in the key
+	binding:
+	
+	```
+	[regex:hashes]
+	regex=([a-fA-f0-9]{7,128})
+	launch=path-to-script-or-application ${match}
+	
+	[key-bindings]
+	regex-launch=[hashes] Control+Shift+q
+	regex-copy=[hashes] Control+Mod1+Shift+q
+	```
+	
+	Default: _none_.
+
+*regex-copy*
+	Same as *regex-copy*, but the match is placed in the clipboard,
+	instead of "launched", upon activation. Default: _none_.
+
 *prompt-prev*
 	Jump to the previous, currently not visible, prompt (requires
 	shell integration, see *foot*(1)). Default: _Control+Shift+z_.
--- a/foot.ini
+++ b/foot.ini
@ -69,8 +69,19 @@
 # launch=xdg-open ${url}
 # label-letters=sadfjklewcmpgh
 # osc8-underline=url-mode
-# protocols=http, https, ftp, ftps, file, gemini, gopher
-# uri-characters=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.,~:;/?#@!$&%*+="'()[]
+# regex=(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))
+
+# You can define your own regex's, by adding a section called
+# 'regex:<ID>' with a 'regex' and 'launch' key. These can then be tied
+# to a key-binding. See foot.ini(5) for details
+
+# [regex:your-fancy-name]
+# regex=<a POSIX-Extended Regular Expression>
+# launch=<path to script or application> ${match}
+#
+# [key-bindings]
+# regex-launch=[your-fancy-name] Control+Shift+q
+# regex-copy=[your-fancy-name] Control+Alt+Shift+q

 [cursor]
 # style=block
--- a/grid.c
+++ b/grid.c
@ -36,7 +36,8 @@ grid_row_abs_to_sb(const struct grid *grid, int screen_rows, int abs_row)
    return rebased_row;
 }

-int grid_row_sb_to_abs(const struct grid *grid, int screen_rows, int sb_rel_row)
+int
+grid_row_sb_to_abs(const struct grid *grid, int screen_rows, int sb_rel_row)
 {
    const int scrollback_start = grid->offset + screen_rows;
    int abs_row = sb_rel_row + scrollback_start;
--- a/input.c
+++ b/input.c
@ -349,9 +349,9 @@ execute_binding(struct seat *seat, struct terminal *term,
            action == BIND_ACTION_SHOW_URLS_LAUNCH ? URL_ACTION_LAUNCH :
            URL_ACTION_PERSISTENT;

-        urls_collect(term, url_action, &term->urls);
+        urls_collect(term, url_action, &term->conf->url.preg, true, &term->urls);
        urls_assign_key_combos(term->conf, &term->urls);
-        urls_render(term);
+        urls_render(term, &term->conf->url.launch);
        return true;
    }

@ -448,6 +448,42 @@ execute_binding(struct seat *seat, struct terminal *term,
        term_shutdown(term);
        return true;

+    case BIND_ACTION_REGEX_LAUNCH:
+    case BIND_ACTION_REGEX_COPY:
+        if (binding->aux->type != BINDING_AUX_REGEX)
+            return true;
+
+        tll_foreach(term->conf->custom_regexes, it) {
+            const struct custom_regex *regex = &it->item;
+
+            if (streq(regex->name, binding->aux->regex_name)) {
+                xassert(!urls_mode_is_active(term));
+
+                enum url_action url_action = action == BIND_ACTION_REGEX_LAUNCH
+                    ? URL_ACTION_LAUNCH : URL_ACTION_COPY;
+
+                if (regex->regex == NULL) {
+                    LOG_ERR("regex:%s has no regex defined", regex->name);
+                    return true;
+                }
+                if (url_action == URL_ACTION_LAUNCH && regex->launch.argv.args == NULL) {
+                    LOG_ERR("regex:%s has no launch command defined", regex->name);
+                    return true;
+                }
+
+                urls_collect(term, url_action, &regex->preg, false, &term->urls);
+                urls_assign_key_combos(term->conf, &term->urls);
+                urls_render(term, &regex->launch);
+                return true;
+            }
+        }
+
+        LOG_ERR(
+            "no regex section named '%s' defined in the configuration",
+            binding->aux->regex_name);
+
+        return true;
+
    case BIND_ACTION_SELECT_BEGIN:
        selection_start(
            term, seat->mouse.col, seat->mouse.row, SELECTION_CHAR_WISE, false);
--- a/key-binding.h
+++ b/key-binding.h
@ -41,6 +41,8 @@ enum bind_action_normal {
    BIND_ACTION_PROMPT_NEXT,
    BIND_ACTION_UNICODE_INPUT,
    BIND_ACTION_QUIT,
+    BIND_ACTION_REGEX_LAUNCH,
+    BIND_ACTION_REGEX_COPY,

    /* Mouse specific actions - i.e. they require a mouse coordinate */
    BIND_ACTION_SCROLLBACK_UP_MOUSE,
@ -54,7 +56,7 @@ enum bind_action_normal {
    BIND_ACTION_SELECT_QUOTE,
    BIND_ACTION_SELECT_ROW,

-    BIND_ACTION_KEY_COUNT = BIND_ACTION_QUIT + 1,
+    BIND_ACTION_KEY_COUNT = BIND_ACTION_REGEX_COPY + 1,
    BIND_ACTION_COUNT = BIND_ACTION_SELECT_ROW + 1,
 };

--- a/terminal.h
+++ b/terminal.h
@ -789,6 +789,7 @@ struct terminal {
    bool urls_show_uri_on_jump_label;
    struct grid *url_grid_snapshot;
    bool ime_reenable_after_url_mode;
+    const struct config_spawn_template *url_launch;

 #if defined(FOOT_IME_ENABLED) && FOOT_IME_ENABLED
    bool ime_enabled;
--- a/tests/test-config.c
+++ b/tests/test-config.c
@ -106,50 +106,6 @@ test_c32string(struct context *ctx, bool (*parse_fun)(struct context *ctx),
    }
 }

-static void
-test_protocols(struct context *ctx, bool (*parse_fun)(struct context *ctx),
-               const char *key, char32_t **const *ptr)
-{
-    ctx->key = key;
-
-    static const struct {
-        const char *option_string;
-        int count;
-        const char32_t *value[2];
-        bool invalid;
-    } input[] = {
-        {""},
-        {"http", 1, {U"http://"}},
-        {" http", 1, {U"http://"}},
-        {"http, https", 2, {U"http://", U"https://"}},
-        {"longprotocolislong", 1, {U"longprotocolislong://"}},
-    };
-
-    for (size_t i = 0; i < ALEN(input); i++) {
-        ctx->value = input[i].option_string;
-
-        if (input[i].invalid) {
-            if (parse_fun(ctx)) {
-                BUG("[%s].%s=%s: did not fail to parse as expected",
-                    ctx->section, ctx->key, &ctx->value[0]);
-            }
-        } else {
-            if (!parse_fun(ctx)) {
-                BUG("[%s].%s=%s: failed to parse",
-                    ctx->section, ctx->key, &ctx->value[0]);
-            }
-            for (int c = 0; c < input[i].count; c++) {
-                if (c32cmp((*ptr)[c], input[i].value[c]) != 0) {
-                    BUG("[%s].%s=%s: set value[%d] (%ls) not the expected one (%ls)",
-                        ctx->section, ctx->key, &ctx->value[c], c,
-                        (const wchar_t *)(*ptr)[c],
-                        (const wchar_t *)input[i].value[c]);
-                }
-            }
-        }
-    }
-}
-
 static void
 test_boolean(struct context *ctx, bool (*parse_fun)(struct context *ctx),
             const char *key, const bool *ptr)
@ -647,9 +603,6 @@ test_section_url(void)
              (int []){OSC8_UNDERLINE_URL_MODE, OSC8_UNDERLINE_ALWAYS},
              (int *)&conf.url.osc8_underline);
    test_c32string(&ctx, &parse_section_url, "label-letters", &conf.url.label_letters);
-    test_protocols(&ctx, &parse_section_url, "protocols", &conf.url.protocols);
-
-    /* TODO: uri-characters (wchar string, but sorted) */

    config_free(&conf);
 }
@ -843,7 +796,7 @@ static void
 test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx),
                 int action, int max_action, const char *const *map,
                 struct config_key_binding_list *bindings,
-                 enum key_binding_type type)
+                 enum key_binding_type type, bool need_argv, bool need_section_id)
 {
    xassert(map[action] != NULL);
    xassert(bindings->count == 0);
@ -855,7 +808,10 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx),
    const bool alt = action % 3;
    const bool shift = action % 4;
    const bool super = action % 5;
-    const bool argv = action % 6;
+    const bool argv = need_argv;
+    const bool section_id = need_section_id;
+
+    xassert(!(argv && section_id));

    static const char *const args[] = {
        "command", "arg1", "arg2", "arg3 has spaces"};
@ -894,7 +850,7 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx),
        xkb_keysym_get_name(sym, sym_name, sizeof(sym_name));

        snprintf(value, sizeof(value), "%s%s%s",
-                 argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : "",
+                 argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : section_id ? "[foobar]" : "",
                 modifier_string, sym_name);
        break;
    }
@ -903,7 +859,7 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx),
        const char *const button_name = button_map[button_idx].name;
        int chars = snprintf(
            value, sizeof(value), "%s%s%s",
-            argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : "",
+            argv ? "[command arg1 arg2 \"arg3 has spaces\"] " : section_id ? "[foobar]" : "",
            modifier_string, button_name);

        xassert(click_count > 0);
@ -944,6 +900,18 @@ test_key_binding(struct context *ctx, bool (*parse_fun)(struct context *ctx),
                ctx->section, ctx->key, ctx->value,
                ALEN(args), binding->aux.pipe.args[ALEN(args)]);
        }
+    } else if (section_id) {
+        if (binding->aux.regex_name == NULL) {
+            BUG("[%s].%s=%s: regex name is NULL",
+                ctx->section, ctx->key, ctx->value);
+        }
+
+        if (!streq(binding->aux.regex_name, "foobar")) {
+            BUG("[%s].%s=%s: regex name not the expected one: "
+                "expected=\"%s\", got=\"%s\"",
+                ctx->section, ctx->key, ctx->value,
+                "foobar", binding->aux.regex_name);
+        }
    } else {
        if (binding->aux.pipe.args != NULL) {
            BUG("[%s].%s=%s: pipe argv not NULL",
@ -1139,7 +1107,9 @@ test_section_key_bindings(void)
        test_key_binding(
            &ctx, &parse_section_key_bindings,
            action, BIND_ACTION_KEY_COUNT - 1,
-            binding_action_map, &conf.bindings.key, KEY_BINDING);
+            binding_action_map, &conf.bindings.key, KEY_BINDING,
+            action >= BIND_ACTION_PIPE_SCROLLBACK && action <= BIND_ACTION_PIPE_COMMAND_OUTPUT,
+            action >= BIND_ACTION_REGEX_LAUNCH && action <= BIND_ACTION_REGEX_COPY);
    }

    config_free(&conf);
@ -1174,7 +1144,8 @@ test_section_search_bindings(void)
        test_key_binding(
            &ctx, &parse_section_search_bindings,
            action, BIND_ACTION_SEARCH_COUNT - 1,
-            search_binding_action_map, &conf.bindings.search, KEY_BINDING);
+            search_binding_action_map, &conf.bindings.search, KEY_BINDING,
+            false, false);
    }

    config_free(&conf);
@ -1210,7 +1181,8 @@ test_section_url_bindings(void)
        test_key_binding(
            &ctx, &parse_section_url_bindings,
            action, BIND_ACTION_URL_COUNT - 1,
-            url_binding_action_map, &conf.bindings.url, KEY_BINDING);
+            url_binding_action_map, &conf.bindings.url, KEY_BINDING,
+            false, false);
    }

    config_free(&conf);
@ -1246,7 +1218,8 @@ test_section_mouse_bindings(void)
        test_key_binding(
            &ctx, &parse_section_mouse_bindings,
            action, BIND_ACTION_COUNT - 1,
-            binding_action_map, &conf.bindings.mouse, MOUSE_BINDING);
+            binding_action_map, &conf.bindings.mouse, MOUSE_BINDING,
+            false, false);
    }

    config_free(&conf);
--- a/url-mode.c
+++ b/url-mode.c
@ -4,6 +4,7 @@
 #include <string.h>
 #include <wctype.h>
 #include <unistd.h>
+#include <regex.h>

 #include <sys/stat.h>
 #include <fcntl.h>
@ -66,12 +67,13 @@ spawn_url_launcher_with_token(struct terminal *term,
        return false;
    }

+    xassert(term->url_launch != NULL);
    bool ret = false;

    if (spawn_expand_template(
-            &term->conf->url.launch, 1,
-            (const char *[]){"url"},
-            (const char *[]){url},
+            term->url_launch, 2,
+            (const char *[]){"url", "match"},
+            (const char *[]){url, url},
            &argc, &argv))
    {
        ret = spawn(
@ -83,6 +85,8 @@ spawn_url_launcher_with_token(struct terminal *term,
        free(argv);
    }

+    term->url_launch = NULL;
+
    close(dev_null);
    return ret;
 }
@ -106,6 +110,8 @@ static bool
 spawn_url_launcher(struct seat *seat, struct terminal *term, const char *url,
                   uint32_t serial)
 {
+    xassert(term->url_launch != NULL);
+
    struct spawn_activation_context *ctx = xmalloc(sizeof(*ctx));
    *ctx = (struct spawn_activation_context){
        .term = term,
@ -291,243 +297,135 @@ urls_input(struct seat *seat, struct terminal *term,
    }
 }

-static int
-c32cmp_single(const void *_a, const void *_b)
-{
-    const char32_t *a = _a;
-    const char32_t *b = _b;
-    return *a - *b;
-}
+struct vline {
+    char *utf8;
+    size_t len;          /* Length of utf8[] */
+    size_t sz;           /* utf8[] allocated size */
+    struct coord *map;   /* Maps utf8[ofs] to grid coordinates */
+};

 static void
-auto_detected(const struct terminal *term, enum url_action action,
-              url_list_t *urls)
+regex_detected(const struct terminal *term, enum url_action action,
+               const regex_t *preg, url_list_t *urls)
 {
-    const struct config *conf = term->conf;
+    /*
+     * Use regcomp()+regexec() to find patterns.
+     *
+     * Since we can't feed regexec() one character at a time, and
+     * since it doesn't accept wide characters, we need to build utf8
+     * strings.
+     *
+     * Each string represents a logical line (i.e. handle line-wrap).
+     * To be able to map regex matches back to the grid, we store the
+     * grid coordinates of *each* character, in the line struct as
+     * well. This is offset based; utf8[ofs] has its grid coordinates
+     * in map[ofs.
+     */

-    const char32_t *uri_characters = conf->url.uri_characters;
-    if (uri_characters == NULL)
-        return;
+    /* There is *at most* term->rows logical lines */
+    struct vline vlines[term->rows];
+    size_t vline_idx = 0;

-    const size_t uri_characters_count = c32len(uri_characters);
-    if (uri_characters_count == 0)
-        return;
+    memset(vlines, 0, sizeof(vlines));
+    struct vline *vline = &vlines[vline_idx];

-    size_t max_prot_len = conf->url.max_prot_len;
-    char32_t proto_chars[max_prot_len];
-    struct coord proto_start[max_prot_len];
-    size_t proto_char_count = 0;
-
-    enum {
-        STATE_PROTOCOL,
-        STATE_URL,
-    } state = STATE_PROTOCOL;
-
-    struct coord start = {-1, -1};
-    char32_t url[term->cols * term->rows + 1];
-    size_t len = 0;
-
-    ssize_t parenthesis = 0;
-    ssize_t brackets = 0;
-    ssize_t ltgts = 0;
+    mbstate_t ps = {0};

    for (int r = 0; r < term->rows; r++) {
        const struct row *row = grid_row_in_view(term->grid, r);

        for (int c = 0; c < term->cols; c++) {
            const struct cell *cell = &row->cells[c];
+            const char32_t *wc = &cell->wc;
+            size_t wc_count = 1;

-            if (cell->wc >= CELL_SPACER)
-                continue;
+            /* Expand combining characters */
+            if (wc[0] >= CELL_COMB_CHARS_LO && wc[0] <= CELL_COMB_CHARS_HI) {
+                const struct composed *composed =
+                    composed_lookup(term->composed, wc[0] - CELL_COMB_CHARS_LO);
+                xassert(composed != NULL);

-            const char32_t *wcs = NULL;
-            size_t wc_count = 0;
-
-            if (cell->wc >= CELL_COMB_CHARS_LO && cell->wc <= CELL_COMB_CHARS_HI) {
-                struct composed *composed =
-                    composed_lookup(term->composed, cell->wc - CELL_COMB_CHARS_LO);
-                wcs = composed->chars;
+                wc = composed->chars;
                wc_count = composed->count;
-            } else {
-                wcs = &cell->wc;
-                wc_count = 1;
            }

-            for (size_t w_idx = 0; w_idx < wc_count; w_idx++) {
-                char32_t wc = wcs[w_idx];
+            /* Convert wide character to utf8 */
+            for (size_t i = 0; i < wc_count; i++) {
+                char buf[16];
+                size_t char_len = c32rtomb(buf, wc[i], &ps);

-                switch (state) {
-                case STATE_PROTOCOL:
-                  for (size_t i = 0; i < max_prot_len - 1; i++) {
-                    proto_chars[i] = proto_chars[i + 1];
-                    proto_start[i] = proto_start[i + 1];
-                  }
+                if (char_len == (size_t)-1)
+                    continue;

-                  if (proto_char_count >= max_prot_len)
-                    proto_char_count = max_prot_len - 1;
+                for (size_t j = 0; j < char_len; j++) {
+                    const size_t requires_size = vline->len + char_len;

-                  proto_chars[max_prot_len - 1] = wc;
-                  proto_start[max_prot_len - 1] = (struct coord){c, r};
-                  proto_char_count++;
-
-                  for (size_t i = 0; i < conf->url.prot_count; i++) {
-                    size_t prot_len = c32len(conf->url.protocols[i]);
-
-                    if (proto_char_count < prot_len)
-                      continue;
-
-                    const char32_t *proto =
-                        &proto_chars[max_prot_len - prot_len];
-
-                    if (c32ncasecmp(conf->url.protocols[i], proto, prot_len) ==
-                        0) {
-                      state = STATE_URL;
-                      start = proto_start[max_prot_len - prot_len];
-
-                      c32ncpy(url, proto, prot_len);
-                      len = prot_len;
-
-                      parenthesis = brackets = ltgts = 0;
-                      break;
-                    }
-                  }
-                  break;
-
-                case STATE_URL: {
-                  const char32_t *match =
-                      bsearch(&wc, uri_characters, uri_characters_count,
-                              sizeof(uri_characters[0]), &c32cmp_single);
-
-                  bool emit_url = false;
-
-                  if (match == NULL) {
-                    /*
-                     * Character is not a valid URI character. Emit
-                     * the URL we've collected so far, *without*
-                     * including _this_ character.
-                     */
-                    emit_url = true;
-                  } else {
-                    xassert(*match == wc);
-
-                    switch (wc) {
-                    default:
-                      url[len++] = wc;
-                      break;
-
-                    case U'(':
-                      parenthesis++;
-                      url[len++] = wc;
-                      break;
-
-                    case U'[':
-                      brackets++;
-                      url[len++] = wc;
-                      break;
-
-                    case U'<':
-                      ltgts++;
-                      url[len++] = wc;
-                      break;
-
-                    case U')':
-                      if (--parenthesis < 0)
-                        emit_url = true;
-                      else
-                        url[len++] = wc;
-                      break;
-
-                    case U']':
-                      if (--brackets < 0)
-                        emit_url = true;
-                      else
-                        url[len++] = wc;
-                      break;
-
-                    case U'>':
-                      if (--ltgts < 0)
-                        emit_url = true;
-                      else
-                        url[len++] = wc;
-                      break;
-                    }
-                  }
-
-                  if (c >= term->cols - 1 && row->linebreak) {
-                    /*
-                     * Endpoint is inclusive, and we'll be subtracting
-                     * 1 from the column when emitting the URL.
-                     */
-                    c++;
-                    emit_url = true;
-                  }
-
-                  if (emit_url) {
-                    struct coord end = {c, r};
-
-                    if (--end.col < 0) {
-                      end.row--;
-                      end.col = term->cols - 1;
+                    /* Need to grow? Remember to save at least one byte for terminator */
+                    if (vline->sz == 0 || requires_size > vline->sz - 1) {
+                        const size_t new_size = requires_size * 2;
+                        vline->utf8 = xreallocarray(vline->utf8, new_size, 1);
+                        vline->map = xreallocarray(vline->map, new_size, sizeof(vline->map[0]));
+                        vline->sz = new_size;
                    }

-                    /* Heuristic to remove trailing characters that
-                     * are valid URL characters, but typically not at
-                     * the end of the URL */
-                    bool done = false;
-                    do {
-                      switch (url[len - 1]) {
-                      case U'.':
-                      case U',':
-                      case U':':
-                      case U';':
-                      case U'?':
-                      case U'!':
-                      case U'"':
-                      case U'\'':
-                      case U'%':
-                        len--;
-                        end.col--;
-                        if (end.col < 0) {
-                          end.row--;
-                          end.col = term->cols - 1;
-                        }
-                        break;
-
-                      default:
-                        done = true;
-                        break;
-                      }
-                    } while (!done);
-
-                    url[len] = U'\0';
-
-                    start.row += term->grid->view;
-                    end.row += term->grid->view;
-
-                    char *url_utf8 = ac32tombs(url);
-                    if (url_utf8 != NULL) {
-                      tll_push_back(
-                          *urls,
-                          ((struct url){.id = (uint64_t)rand() << 32 | rand(),
-                                        .url = url_utf8,
-                                        .range =
-                                            {
-                                                .start = start,
-                                                .end = end,
-                                            },
-                                        .action = action,
-                                        .osc8 = false}));
-                    }
-
-                    state = STATE_PROTOCOL;
-                    len = 0;
-                    parenthesis = brackets = ltgts = 0;
-                  }
-                  break;
-                }
+                    vline->utf8[vline->len + j] = buf[j];
+                    vline->map[vline->len + j] = (struct coord){c, term->grid->view + r};
                }
+
+                vline->len += char_len;
            }
        }
+
+        if (row->linebreak) {
+            if (vline->len > 0) {
+                vline->utf8[vline->len++] = '\0';
+                ps = (mbstate_t){0};
+
+                vline_idx++;
+                vline = &vlines[vline_idx];
+            }
+        }
+    }
+
+    for (size_t i = 0; i < ALEN(vlines); i++) {
+        const struct vline *v = &vlines[i];
+        if (v->utf8 == NULL)
+            continue;;
+
+        const char *search_string = v->utf8;
+        while (true) {
+            regmatch_t matches[preg->re_nsub + 1];
+            int r = regexec(preg, search_string, preg->re_nsub + 1, matches, 0);
+
+            if (r == REG_NOMATCH)
+                break;
+
+            const size_t mlen = matches[1].rm_eo - matches[1].rm_so;
+            const size_t start = &search_string[matches[1].rm_so] - v->utf8;
+            const size_t end = start + mlen;
+
+            LOG_DBG(
+                "regex match at row %d: %.*srow/col = %dx%d",
+                matches[1].rm_so, (int)mlen, &search_string[matches[1].rm_so],
+                v->map[start].row, v->map[start].col);
+
+            tll_push_back(
+                *urls,
+                ((struct url){
+                    .id = (uint64_t)rand() << 32 | rand(),
+                    .url = xstrndup(&v->utf8[start], mlen),
+                    .range = {
+                        .start = v->map[start],
+                        .end = v->map[end - 1], /* Inclusive */
+                    },
+                    .action = action,
+                    .osc8 = false}));
+
+            search_string += matches[0].rm_eo;
+        }
+
+        free(v->utf8);
+        free(v->map);
    }
 }

@ -629,11 +527,13 @@ remove_overlapping(url_list_t *urls, int cols)
 }

 void
-urls_collect(const struct terminal *term, enum url_action action, url_list_t *urls)
+urls_collect(const struct terminal *term, enum url_action action,
+             const regex_t *preg, bool osc8, url_list_t *urls)
 {
    xassert(tll_length(term->urls) == 0);
-    osc8_uris(term, action, urls);
-    auto_detected(term, action, urls);
+    if (osc8)
+        osc8_uris(term, action, urls);
+    regex_detected(term, action, preg, urls);
    remove_overlapping(urls, term->grid->num_cols);
 }

@ -816,7 +716,7 @@ tag_cells_for_url(struct terminal *term, const struct url *url, bool value)
 }

 void
-urls_render(struct terminal *term)
+urls_render(struct terminal *term, const struct config_spawn_template *launch)
 {
    struct wl_window *win = term->window;

@ -851,6 +751,9 @@ urls_render(struct terminal *term)
    /* Snapshot the current grid */
    term->url_grid_snapshot = grid_snapshot(term->grid);

+    /* Remember which launcher to use */
+    term->url_launch = launch;
+
    xassert(tll_length(win->urls) == 0);
    tll_foreach(win->term->urls, it) {
        struct wl_url url = {.url = &it->item};
--- a/url-mode.h
+++ b/url-mode.h
@ -14,10 +14,11 @@ static inline bool urls_mode_is_active(const struct terminal *term)
 }

 void urls_collect(
-    const struct terminal *term, enum url_action action, url_list_t *urls);
+    const struct terminal *term, enum url_action action, const regex_t *preg,
+    bool osc8, url_list_t *urls);
 void urls_assign_key_combos(const struct config *conf, url_list_t *urls);

-void urls_render(struct terminal *term);
+void urls_render(struct terminal *term, const struct config_spawn_template *launch);
 void urls_reset(struct terminal *term);

 void urls_input(struct seat *seat, struct terminal *term,
--- a/xmalloc.c
+++ b/xmalloc.c
@ -32,8 +32,17 @@ xcalloc(size_t nmemb, size_t size)
 void *
 xrealloc(void *ptr, size_t size)
 {
+    xassert(size != 0);
    void *alloc = realloc(ptr, size);
-    return unlikely(size == 0) ? alloc : check_alloc(alloc);
+    return check_alloc(alloc);
+}
+
+void *
+xreallocarray(void *ptr, size_t n, size_t size)
+{
+    xassert(n != 0 && size != 0);
+    void *alloc = reallocarray(ptr, n, size);
+    return check_alloc(alloc);
 }

 char *
--- a/xmalloc.h
+++ b/xmalloc.h
@ -12,6 +12,7 @@
 void *xmalloc(size_t size) XMALLOC;
 void *xcalloc(size_t nmemb, size_t size) XMALLOC;
 void *xrealloc(void *ptr, size_t size);
+void *xreallocarray(void *ptr, size_t n, size_t size);
 char *xstrdup(const char *str) XSTRDUP;
 char *xstrndup(const char *str, size_t n) XSTRDUP;
 char *xasprintf(const char *format, ...) PRINTF(1) XMALLOC;