url-mode+config: wip: add url.regex option

This commit is contained in:
Daniel Eklöf 2025-01-30 12:26:23 +01:00
parent e76d8dd7af
commit d41b28bd02
No known key found for this signature in database
GPG key ID: 5BBD4992C116573F
5 changed files with 76 additions and 54 deletions

View file

@ -1234,6 +1234,26 @@ parse_section_url(struct context *ctx)
(int *)&conf->url.osc8_underline);
}
else if (streq(key, "regex")) {
const char *regex = ctx->value;
regex_t preg;
int r = regcomp(&preg, regex, REG_EXTENDED);
if (r != 0) {
char err_buf[128];
regerror(r, &preg, err_buf, sizeof(err_buf));
LOG_CONTEXTUAL_ERR("invalid regex: %s", err_buf);
return false;
}
regfree(&conf->url.preg);
free(conf->url.regex);
conf->url.regex = xstrdup(regex);
conf->url.preg = preg;
return true;
}
else {
LOG_CONTEXTUAL_ERR("not a valid option: %s", key);
return false;
@ -3241,6 +3261,42 @@ config_load(struct config *conf, const char *conf_path,
tokenize_cmdline("--action ${action-name}=${action-label}", &conf->desktop_notifications.command_action_arg.argv.args);
tokenize_cmdline("xdg-open ${url}", &conf->url.launch.argv.args);
{
/*
* Based on https://gist.github.com/gruber/249502, but modified:
* - Do not allow {} at all
* - Do allow matched []
*/
const char *url_regex_string =
"("
"[a-z][[:alnum:]-]+:" // protocol
"("
"/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?)
")"
"|"
"www[:digit:]{0,3}[.]"
//"|"
//"[a-z0-9.\\-]+[.][a-z]{2,4}/" /* "looks like domain name followed by a slash" - remove? */
")"
"("
"[^[:space:](){}<>]+"
"|"
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
"|"
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
")+"
"("
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
"|"
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
"|"
"[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
")"
;
int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED);
xassert(r == 0);
conf->url.regex = xstrdup(url_regex_string);
}
tll_foreach(*initial_user_notifications, it) {
tll_push_back(conf->notifications, it->item);
@ -3476,6 +3532,8 @@ config_clone(const struct config *old)
conf->url.label_letters = xc32dup(old->url.label_letters);
spawn_template_clone(&conf->url.launch, &old->url.launch);
conf->url.regex = xstrdup(old->url.regex);
regcomp(&conf->url.preg, conf->url.regex, REG_EXTENDED);
key_binding_list_clone(&conf->bindings.key, &old->bindings.key);
key_binding_list_clone(&conf->bindings.search, &old->bindings.search);
@ -3556,6 +3614,8 @@ config_free(struct config *conf)
free(conf->url.label_letters);
spawn_template_free(&conf->url.launch);
regfree(&conf->url.preg);
free(conf->url.regex);
free_key_binding_list(&conf->bindings.key);
free_key_binding_list(&conf->bindings.search);

View file

@ -1,7 +1,8 @@
#pragma once
#include <stdint.h>
#include <regex.h>
#include <stdbool.h>
#include <stdint.h>
#include <uchar.h>
#include <xkbcommon/xkbcommon.h>
@ -219,6 +220,9 @@ struct config {
OSC8_UNDERLINE_URL_MODE,
OSC8_UNDERLINE_ALWAYS,
} osc8_underline;
char *regex;
regex_t preg;
} url;
struct {

View file

@ -782,6 +782,11 @@ xdgtoken=95ebdfe56e4f47ddb5bba9d7dc3a2c35
Default: _sadfjklewcmpgh_.
*regex*
URL regex to use when auto-detecting URLs. The format is
"POSIX-Extended Regular Expressions".
Default: _TODO_
# SECTION: cursor

View file

@ -69,6 +69,8 @@
# launch=xdg-open ${url}
# label-letters=sadfjklewcmpgh
# osc8-underline=url-mode
# regex=TODO
[cursor]
# style=block

View file

@ -380,54 +380,7 @@ regex_detected(const struct terminal *term, enum url_action action, url_list_t *
}
}
/*
* Based on https://gist.github.com/gruber/249502, but modified:
* - Do not allow {} at all
* - Do allow matched []
*/
regex_t preg;
const char *regex_string =
"("
"[a-z][[:alnum:]-]+:" // protocol
"("
"/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?)
")"
"|"
"www[:digit:]{0,3}[.]"
"|"
"[a-z0-9.\\-]+[.][a-z]{2,4}/"
")"
"("
"[^[:space:](){}<>]+"
"|"
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
"|"
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
")+"
"("
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
"|"
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
"|"
"[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
")"
;
int r = regcomp(&preg, regex_string, REG_EXTENDED);
if (r != 0) {
char err_buf[1024];
regerror(r, &preg, err_buf, sizeof(err_buf));
LOG_ERR("failed to compile regular expression: %s", err_buf);
for (size_t i = 0; i < ALEN(vlines); i++) {
const struct vline *v = &vlines[i];
free(v->utf8);
free(v->map);
}
return;
}
const regex_t *preg = &term->conf->url.preg;
for (size_t i = 0; i < ALEN(vlines); i++) {
const struct vline *v = &vlines[i];
@ -436,9 +389,8 @@ regex_detected(const struct terminal *term, enum url_action action, url_list_t *
const char *search_string = v->utf8;
while (true) {
regmatch_t matches[preg.re_nsub + 1];
r = regexec(&preg, search_string, preg.re_nsub + 1, matches, 0);
regmatch_t matches[preg->re_nsub + 1];
int r = regexec(preg, search_string, preg->re_nsub + 1, matches, 0);
if (r == REG_NOMATCH)
break;
@ -470,9 +422,8 @@ regex_detected(const struct terminal *term, enum url_action action, url_list_t *
free(v->utf8);
free(v->map);
}
regfree(&preg);
}
static void
osc8_uris(const struct terminal *term, enum url_action action, url_list_t *urls)
{