mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-05 04:06:08 -05:00
config: update default URL regex
The old one is in some cases too liberal. The new one is stricter in two ways: 1. The protocol list is now explicit, rather than matching anything:// 2. Allowed characters are now limited to the "safe character set", the "reserved character set", and some from the "unsafe character set" Furthermore, some of the characters are restricted in how/when they are allowed: 1. Periods, commas, question marks etc are allowed inside an URL, but not at the end. 2. [ ], ( ), " " and ' ' are allowed but only when balanced. This allows us to match e.g. [http://foo.bar/foo[bar]] correctly. Closes #2016
This commit is contained in:
parent
a50f78c599
commit
1760cb6ab8
4 changed files with 45 additions and 33 deletions
|
|
@ -79,9 +79,14 @@
|
||||||
`Mod4` etc) in key bindings are now recognized as being virtual, and
|
`Mod4` etc) in key bindings are now recognized as being virtual, and
|
||||||
are automatically mapped to the corresponding real modifier. This
|
are automatically mapped to the corresponding real modifier. This
|
||||||
means you can use e.g. `Alt+b` instead of `Mod1+b`.
|
means you can use e.g. `Alt+b` instead of `Mod1+b`.
|
||||||
|
* Default URL regex changed to a much more strict variant
|
||||||
|
([#2016][2016]). You can manually set the [old
|
||||||
|
one](https://codeberg.org/dnkl/foot/src/tag/1.21.0/foot.ini#L72), if
|
||||||
|
you prefer it over the new regex.
|
||||||
|
|
||||||
[2006]: https://codeberg.org/dnkl/foot/issues/2006
|
[2006]: https://codeberg.org/dnkl/foot/issues/2006
|
||||||
[2009]: https://codeberg.org/dnkl/foot/issues/2009
|
[2009]: https://codeberg.org/dnkl/foot/issues/2009
|
||||||
|
[2016]: https://codeberg.org/dnkl/foot/issues/2016
|
||||||
|
|
||||||
|
|
||||||
### Deprecated
|
### Deprecated
|
||||||
|
|
|
||||||
69
config.c
69
config.c
|
|
@ -3446,39 +3446,46 @@ config_load(struct config *conf, const char *conf_path,
|
||||||
tokenize_cmdline("xdg-open ${url}", &conf->url.launch.argv.args);
|
tokenize_cmdline("xdg-open ${url}", &conf->url.launch.argv.args);
|
||||||
|
|
||||||
{
|
{
|
||||||
/*
|
const char *url_regex_string =
|
||||||
* Based on https://gist.github.com/gruber/249502, but modified:
|
"("
|
||||||
* - Do not allow {} at all
|
|
||||||
* - Do allow matched []
|
|
||||||
*/
|
|
||||||
const char *url_regex_string =
|
|
||||||
"("
|
"("
|
||||||
"("
|
"(https?://|mailto:|ftp://|file:|ssh:|ssh://|git://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news:)"
|
||||||
"[a-z][[:alnum:]-]+:" // protocol
|
"|"
|
||||||
"("
|
"www\\."
|
||||||
"/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?)
|
|
||||||
")"
|
|
||||||
"|"
|
|
||||||
"www[:digit:]{0,3}[.]"
|
|
||||||
//"|"
|
|
||||||
//"[a-z0-9.\\-]+[.][a-z]{2,4}/" /* "looks like domain name followed by a slash" - remove? */
|
|
||||||
")"
|
|
||||||
"("
|
|
||||||
"[^[:space:](){}<>]+"
|
|
||||||
"|"
|
|
||||||
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
|
|
||||||
"|"
|
|
||||||
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
|
|
||||||
")+"
|
|
||||||
"("
|
|
||||||
"\\(([^[:space:](){}<>]+|(\\([^[:space:](){}<>]+\\)))*\\)"
|
|
||||||
"|"
|
|
||||||
"\\[([^]\\[[:space:](){}<>]+|(\\[[^]\\[[:space:](){}<>]+\\]))*\\]"
|
|
||||||
"|"
|
|
||||||
"[^]\\[[:space:]`!(){};:'\".,<>?«»“”‘’]"
|
|
||||||
")"
|
|
||||||
")"
|
")"
|
||||||
;
|
"("
|
||||||
|
/* Safe + reserved + some unsafe characters parenthesis and double quotes omitted (we only allow them when balanced) */
|
||||||
|
"[0-9a-zA-Z:/?#@!$&*+,;=.~_%^\\-]+"
|
||||||
|
"|"
|
||||||
|
/* Balanced "(...)". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\\([]\\[\"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\\)"
|
||||||
|
"|"
|
||||||
|
/* Balanced "[...]". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\\[[\\(\\)\"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\\]"
|
||||||
|
"|"
|
||||||
|
/* Balanced '"..."'. Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\"[]\\[\\(\\)0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\""
|
||||||
|
"|"
|
||||||
|
/* Balanced "'...'". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"'[]\\[\\(\\)0-9a-zA-Z:/?#@!$&*+,;=.~_%^\\-]*'"
|
||||||
|
")+"
|
||||||
|
"("
|
||||||
|
/* Same as above, except :?!,;. are excluded */
|
||||||
|
"[0-9a-zA-Z/#@$&*+=~_%^\\-]"
|
||||||
|
"|"
|
||||||
|
/* Balanced "(...)". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\\([]\\[\"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\\)"
|
||||||
|
"|"
|
||||||
|
/* Balanced "[...]". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\\[[\\(\\)\"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\\]"
|
||||||
|
"|"
|
||||||
|
/* Balanced '"..."'. Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"\"[]\\[\\(\\)0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\\-]*\""
|
||||||
|
"|"
|
||||||
|
/* Balanced "'...'". Content is same as above, plus all _other_ characters we require to be balanced */
|
||||||
|
"'[]\\[\\(\\)0-9a-zA-Z:/?#@!$&*+,;=.~_%^\\-]*'"
|
||||||
|
")"
|
||||||
|
")";
|
||||||
|
|
||||||
int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED);
|
int r = regcomp(&conf->url.preg, url_regex_string, REG_EXTENDED);
|
||||||
xassert(r == 0);
|
xassert(r == 0);
|
||||||
|
|
|
||||||
|
|
@ -828,7 +828,7 @@ section.
|
||||||
whole regex match to be used as an URL, surround all of it with
|
whole regex match to be used as an URL, surround all of it with
|
||||||
parenthesis: *(regex-pattern)*.
|
parenthesis: *(regex-pattern)*.
|
||||||
|
|
||||||
Default: _(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))\*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))\*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))_
|
Default: _(((https?://|mailto:|ftp://|file:|ssh:|ssh://|git://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news:)|www\.)([0-9a-zA-Z:/?#@!$&\*+,;=.~\_%^\-]+|\([]\["0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*\)|\[[\(\)"0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*\]|"[]\[\(\)0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*"|'[]\[\(\)0-9a-zA-Z:/?#@!$&\*+,;=.~\_%^\-]\*')+([0-9a-zA-Z/#@$&\*+=~\_%^\-]|\([]\["0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*\)|\[[\(\)"0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*\]|"[]\[\(\)0-9a-zA-Z:/?#@!$&'\*+,;=.~\_%^\-]\*"|'[]\[\(\)0-9a-zA-Z:/?#@!$&\*+,;=.~\_%^\-]\*'))_
|
||||||
|
|
||||||
# SECTION: regex
|
# SECTION: regex
|
||||||
|
|
||||||
|
|
|
||||||
2
foot.ini
2
foot.ini
|
|
@ -69,7 +69,7 @@
|
||||||
# launch=xdg-open ${url}
|
# launch=xdg-open ${url}
|
||||||
# label-letters=sadfjklewcmpgh
|
# label-letters=sadfjklewcmpgh
|
||||||
# osc8-underline=url-mode
|
# osc8-underline=url-mode
|
||||||
# regex=(([a-z][[:alnum:]-]+:(/{1,3}|[a-z0-9%])|www[:digit:]{0,3}[.])([^[:space:](){}<>]+|\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\])+(\(([^[:space:](){}<>]+|(\([^[:space:](){}<>]+\)))*\)|\[([^]\[[:space:](){}<>]+|(\[[^]\[[:space:](){}<>]+\]))*\]|[^]\[[:space:]`!(){};:'".,<>?«»“”‘’]))
|
# regex=(((https?://|mailto:|ftp://|file:|ssh:|ssh://|git://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news:)|www\.)([0-9a-zA-Z:/?#@!$&*+,;=.~_%^\-]+|\([]\["0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*\)|\[[\(\)"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*\]|"[]\[\(\)0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*"|'[]\[\(\)0-9a-zA-Z:/?#@!$&*+,;=.~_%^\-]*')+([0-9a-zA-Z/#@$&*+=~_%^\-]|\([]\["0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*\)|\[[\(\)"0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*\]|"[]\[\(\)0-9a-zA-Z:/?#@!$&'*+,;=.~_%^\-]*"|'[]\[\(\)0-9a-zA-Z:/?#@!$&*+,;=.~_%^\-]*'))
|
||||||
|
|
||||||
# You can define your own regex's, by adding a section called
|
# You can define your own regex's, by adding a section called
|
||||||
# 'regex:<ID>' with a 'regex' and 'launch' key. These can then be tied
|
# 'regex:<ID>' with a 'regex' and 'launch' key. These can then be tied
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue