diff --git a/config.c b/config.c index 82a4811f..604c0a76 100644 --- a/config.c +++ b/config.c @@ -1256,6 +1256,12 @@ parse_section_url(struct context *ctx) return false; } + if (preg.re_nsub == 0) { + LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)"); + regfree(&preg); + return false; + } + regfree(&conf->url.preg); free(conf->url.regex); @@ -1300,6 +1306,12 @@ parse_section_regex(struct context *ctx) return false; } + if (preg.re_nsub == 0) { + LOG_CONTEXTUAL_ERR("invalid regex: no marked subexpression(s)"); + regfree(&preg); + return false; + } + if (regex == NULL) { tll_push_back(conf->custom_regexes, ((struct custom_regex){.name = xstrdup(regex_name)})); @@ -3426,33 +3438,37 @@ config_load(struct config *conf, const char *conf_path, */ const char *url_regex_string = "(" - "[a-z][[:alnum:]-]+:" // protocol "(" - "/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?) + "[a-z][[:alnum:]-]+:" // protocol + "(" + "/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?) + ")" + "|" + "www[:digit:]{0,3}[.]" + //"|" + //"[a-z0-9.\\-]+[.][a-z]{2,4}/" /* "looks like domain name followed by a slash" - remove? */ + ")" + "(" + "[^[:space:](){}<>]+" + "|" + "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" + "|" + "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" + ")+" + "(" + "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" + "|" + "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" + "|" + "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]" ")" - "|" - "www[:digit:]{0,3}[.]" - //"|" - //"[a-z0-9.\\-]+[.][a-z]{2,4}/" /* "looks like domain name followed by a slash" - remove? */ - ")" - "(" - "[^[:space:](){}<>]+" - "|" - "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" - "|" - "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" - ")+" - "(" - "\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)" - "|" - "\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]" - "|" - "[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]" ")" ; + int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED); xassert(r == 0); conf->url.regex = xstrdup(url_regex_string); + xassert(conf->url.preg.re_nsub >= 1); } tll_foreach(*initial_user_notifications, it) { diff --git a/doc/foot.ini.5.scd b/doc/foot.ini.5.scd index af355d68..742281d4 100644 --- a/doc/foot.ini.5.scd +++ b/doc/foot.ini.5.scd @@ -786,11 +786,13 @@ section. Default: _sadfjklewcmpgh_. *regex* - Regular expression to use when auto-detecting URLs. The format is - "POSIX-Extended Regular Expressions". + "POSIX-Extended Regular Expressions". Note that the first marked + subexpression is used a the URL. In other words, if you want the + whole regex matćh to be used as an URL, surround all of it with + parenthesis: *(regex-pattern)*. - Default: _([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’])_ + Default: _(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))_ # SECTION: regex @@ -817,7 +819,12 @@ regex-copy=[foo] Control+Mod1+Shift+q *regex* Regular expression to use when matching text. The format is - "POSIX-Extended Regular Expressions". Default: _not set_. + "POSIX-Extended Regular Expressions". Note that the first marked + subexpression is used a the URL. In other words, if you want the + whole regex matćh to be used as an URL, surround all of it with + parenthesis: *(regex-pattern)*. + + Default: _not set_. # SECTION: cursor diff --git a/foot.ini b/foot.ini index 7514c02b..a9a790ac 100644 --- a/foot.ini +++ b/foot.ini @@ -69,7 +69,7 @@ # launch=xdg-open ${url} # label-letters=sadfjklewcmpgh # osc8-underline=url-mode -# regex=([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]) +# regex=(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’])) # You can define your own regex's, by adding a section called # 'regex:' with a 'regex' and 'launch' key. These can then be tied diff --git a/url-mode.c b/url-mode.c index 3108ec12..0101de19 100644 --- a/url-mode.c +++ b/url-mode.c @@ -400,13 +400,13 @@ regex_detected(const struct terminal *term, enum url_action action, if (r == REG_NOMATCH) break; - const size_t mlen = matches[0].rm_eo - matches[0].rm_so; - const size_t start = &search_string[matches[0].rm_so] - v->utf8; + const size_t mlen = matches[1].rm_eo - matches[1].rm_so; + const size_t start = &search_string[matches[1].rm_so] - v->utf8; const size_t end = start + mlen; LOG_DBG( "regex match at row %d: %.*srow/col = %dx%d", - matches[0].rm_so, (int)mlen, &search_string[matches[0].rm_so], + matches[1].rm_so, (int)mlen, &search_string[matches[1].rm_so], v->map[start].row, v->map[start].col); tll_push_back(