url-mode: use the first *sub* expression as URL

When auto-matching URLs (or custom regular expressions), use the first *subexpression* as URL, rather than the while regex match. This allows us to write custom regular expressions with prefix/suffix strings that should not be included in the presented match.
2026-02-04 04:06:06 -05:00 · 2025-02-03 13:56:57 +01:00 · 2025-02-03 13:56:57 +01:00 · a984531ce5
commit a984531ce5
parent 31f536ff8c
4 changed files with 51 additions and 28 deletions
--- a/config.c
+++ b/config.c
@ -1256,6 +1256,12 @@ parse_section_url(struct context *ctx)
            return false;
        }

+        if (preg.re_nsub == 0) {
+            LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)");
+            regfree(&preg);
+            return false;
+        }
+
        regfree(&conf->url.preg);
        free(conf->url.regex);

@ -1300,6 +1306,12 @@ parse_section_regex(struct context *ctx)
            return false;
        }

+        if (preg.re_nsub == 0) {
+            LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)");
+            regfree(&preg);
+            return false;
+        }
+
        if (regex == NULL) {
            tll_push_back(conf->custom_regexes,
                          ((struct custom_regex){.name = xstrdup(regex_name)}));
@ -3426,33 +3438,37 @@ config_load(struct config *conf, const char *conf_path,
         */
        const char *url_regex_string =
            "("
-                "[a-z][[:alnum:]-]+:"       // protocol
                "("
-                    "/{1,3}|[a-z0-9%]"     // slashes (what's the OR part for?)
+                    "[a-z][[:alnum:]-]+:"       // protocol
+                    "("
+                        "/{1,3}|[a-z0-9%]"     // slashes (what's the OR part for?)
+                    ")"
+                    "|"
+                    "www[:digit:]{0,3}[.]"
+                    //"|"
+                    //"[a-z0-9.\\-]+[.][a-z]{2,4}/"  /* "looks like domain name followed by a slash" - remove? */
+                ")"
+                "("
+                    "[^[:space:](){}<>]+"
+                    "|"
+                    "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
+                    "|"
+                    "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
+                ")+"
+                "("
+                    "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
+                    "|"
+                    "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
+                    "|"
+                    "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
                ")"
-                "|"
-                "www[:digit:]{0,3}[.]"
-                //"|"
-                //"[a-z0-9.\\-]+[.][a-z]{2,4}/"  /* "looks like domain name followed by a slash" - remove? */
-            ")"
-            "("
-                "[^[:space:](){}<>]+"
-                "|"
-                "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
-                "|"
-                "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
-            ")+"
-            "("
-                "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
-                "|"
-                "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
-                "|"
-                "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
            ")"
        ;
+
        int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED);
        xassert(r == 0);
        conf->url.regex = xstrdup(url_regex_string);
+        xassert(conf->url.preg.re_nsub >= 1);
    }

    tll_foreach(*initial_user_notifications, it) {