mirror of
https://codeberg.org/dnkl/foot.git
synced 2026-02-10 04:27:45 -05:00
url-mode: wip: convert to regex matching for auto-detection
This commit is contained in:
parent
32919b1049
commit
1c15ee940d
1 changed files with 145 additions and 0 deletions
145
url-mode.c
145
url-mode.c
|
|
@ -4,6 +4,7 @@
|
|||
#include <string.h>
|
||||
#include <wctype.h>
|
||||
#include <unistd.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
|
@ -291,6 +292,149 @@ urls_input(struct seat *seat, struct terminal *term,
|
|||
}
|
||||
}
|
||||
|
||||
struct charmap {
|
||||
const struct row *row;
|
||||
int col;
|
||||
};
|
||||
|
||||
struct vline {
|
||||
char *utf8;
|
||||
size_t len;
|
||||
size_t sz;
|
||||
struct charmap *map;
|
||||
};
|
||||
|
||||
static void
|
||||
regex_detected(const struct terminal *term, enum url_action action, url_list_t *urls)
|
||||
{
|
||||
struct vline vlines[term->rows];
|
||||
size_t vline_idx = 0;
|
||||
|
||||
memset(vlines, 0, sizeof(vlines));
|
||||
struct vline *vline = &vlines[vline_idx];
|
||||
|
||||
mbstate_t ps = {0};
|
||||
|
||||
for (int row_no = 0; row_no < term->rows; row_no++) {
|
||||
const struct row *row = grid_row_in_view(term->grid, row_no);
|
||||
|
||||
for (int c = 0; c < term->cols; c++) {
|
||||
const struct cell *cell = &row->cells[c];
|
||||
const char32_t *wc = &cell->wc;
|
||||
size_t wc_count = 1;
|
||||
|
||||
if (wc[0] >= CELL_COMB_CHARS_LO && wc[0] <= CELL_COMB_CHARS_HI) {
|
||||
const struct composed *composed =
|
||||
composed_lookup(term->composed, wc[0] - CELL_COMB_CHARS_LO);
|
||||
xassert(composed != NULL);
|
||||
|
||||
wc = composed->chars;
|
||||
wc_count = composed->count;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < wc_count; i++) {
|
||||
char buf[16];
|
||||
size_t char_len = c32rtomb(buf, wc[i], &ps);
|
||||
if (char_len == (size_t)-1)
|
||||
continue;
|
||||
|
||||
if (vline->len == 0 && char_len == 1 && buf[0] == 0)
|
||||
continue;
|
||||
|
||||
for (size_t j = 0; j < char_len; j++) {
|
||||
if (vline->len + char_len > vline->sz) {
|
||||
/* TODO: grow dynamically */
|
||||
size_t new_count = (vline->len + char_len) * 2;
|
||||
vline->utf8 = xreallocarray(vline->utf8, new_count, 1);
|
||||
vline->map = xreallocarray(vline->map, new_count, sizeof(vline->map[0]));
|
||||
}
|
||||
|
||||
vline->utf8[vline->len + j] = buf[j];
|
||||
vline->map[vline->len + j].col = c;
|
||||
vline->map[vline->len + j].row = row;
|
||||
}
|
||||
|
||||
vline->len += char_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (row->linebreak) {
|
||||
if (vline->len > 0) {
|
||||
vline->utf8[vline->len++] = '\0';
|
||||
ps = (mbstate_t){0};
|
||||
|
||||
vline_idx++;
|
||||
vline = &vlines[vline_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://gist.github.com/gruber/249502
|
||||
regex_t preg;
|
||||
const char *foo =
|
||||
"("
|
||||
"[a-z][[:alpha:]-]+:" // protocol
|
||||
"("
|
||||
"/{1,3}|[a-z0-9%]" // slashes (what's the OR part for?)
|
||||
")"
|
||||
"|"
|
||||
"www[:digit:]{0,3}[.]"
|
||||
"|"
|
||||
"[a-z0-9.\\-]+[.][a-z]{2,4}/"
|
||||
")"
|
||||
"("
|
||||
"[^[:space:]()<>]+"
|
||||
"|"
|
||||
"\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\)"
|
||||
")+"
|
||||
"("
|
||||
"\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\)"
|
||||
"|"
|
||||
// TODO: figure out how to add \\] to the expression below...
|
||||
"[^[:space:]`!()\\[{};:'\".,<>?«»“”‘’]"
|
||||
")"
|
||||
;
|
||||
|
||||
LOG_ERR("foo=%s", foo);
|
||||
|
||||
int r = regcomp(&preg, foo, REG_EXTENDED);
|
||||
|
||||
if (r != 0) {
|
||||
char err_buf[1024];
|
||||
regerror(r, &preg, err_buf, sizeof(err_buf));
|
||||
LOG_ERR("regcomp: %s", err_buf);
|
||||
} else {
|
||||
size_t i = 0;
|
||||
while (true) {
|
||||
const struct vline *v = &vlines[i++];
|
||||
if (v->utf8 == NULL)
|
||||
break;
|
||||
|
||||
|
||||
regmatch_t matches[preg.re_nsub + 1];
|
||||
r = regexec(&preg, v->utf8, preg.re_nsub + 1, matches, 0);
|
||||
|
||||
if (r == REG_NOMATCH)
|
||||
continue;
|
||||
|
||||
size_t mlen = matches[0].rm_eo - matches[0].rm_so;
|
||||
LOG_WARN("MATCH at %d: %.*s (%zu)", matches[0].rm_so, (int)mlen, &v->utf8[matches[0].rm_so], mlen);
|
||||
}
|
||||
regfree(&preg);
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
while (true) {
|
||||
const struct vline *v = &vlines[i++];
|
||||
if (v->utf8 == NULL)
|
||||
break;
|
||||
|
||||
LOG_WARN("%.*s", (int)v->len, v->utf8);
|
||||
free(v->utf8);
|
||||
free(v->map);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
c32cmp_single(const void *_a, const void *_b)
|
||||
{
|
||||
|
|
@ -634,6 +778,7 @@ urls_collect(const struct terminal *term, enum url_action action, url_list_t *ur
|
|||
xassert(tll_length(term->urls) == 0);
|
||||
osc8_uris(term, action, urls);
|
||||
auto_detected(term, action, urls);
|
||||
regex_detected(term, action, urls);
|
||||
remove_overlapping(urls, term->grid->num_cols);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue