foot/extract.c
Daniel Eklöf e0227266ca
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.

Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.

For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).

Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.

These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.

For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the  __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.

FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.

Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.

Other fcft API changes:

* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2022-02-05 17:00:54 +01:00

271 lines
6.7 KiB
C

#include "extract.h"
#include <string.h>
#define LOG_MODULE "extract"
#define LOG_ENABLE_DBG 1
#include "log.h"
#include "char32.h"
struct extraction_context {
char32_t *buf;
size_t size;
size_t idx;
size_t tab_spaces_left;
size_t empty_count;
size_t newline_count;
bool strip_trailing_empty;
bool failed;
const struct row *last_row;
const struct cell *last_cell;
enum selection_kind selection_kind;
};
struct extraction_context *
extract_begin(enum selection_kind kind, bool strip_trailing_empty)
{
struct extraction_context *ctx = malloc(sizeof(*ctx));
if (unlikely(ctx == NULL)) {
LOG_ERRNO("malloc() failed");
return NULL;
}
*ctx = (struct extraction_context){
.selection_kind = kind,
.strip_trailing_empty = strip_trailing_empty,
};
return ctx;
}
static bool
ensure_size(struct extraction_context *ctx, size_t additional_chars)
{
while (ctx->size < ctx->idx + additional_chars) {
size_t new_size = ctx->size == 0 ? 512 : ctx->size * 2;
char32_t *new_buf = realloc(ctx->buf, new_size * sizeof(new_buf[0]));
if (new_buf == NULL)
return false;
ctx->buf = new_buf;
ctx->size = new_size;
}
xassert(ctx->size >= ctx->idx + additional_chars);
return true;
}
bool
extract_finish_wide(struct extraction_context *ctx, char32_t **text, size_t *len)
{
if (text == NULL)
return false;
*text = NULL;
if (len != NULL)
*len = 0;
if (ctx->failed)
goto err;
if (!ctx->strip_trailing_empty) {
/* Insert pending newlines, and replace empty cells with spaces */
if (!ensure_size(ctx, ctx->newline_count + ctx->empty_count))
goto err;
for (size_t i = 0; i < ctx->newline_count; i++)
ctx->buf[ctx->idx++] = U'\n';
for (size_t i = 0; i < ctx->empty_count; i++)
ctx->buf[ctx->idx++] = U' ';
}
if (ctx->idx == 0) {
/* Selection of empty cells only */
if (!ensure_size(ctx, 1))
goto err;
ctx->buf[ctx->idx++] = U'\0';
} else {
xassert(ctx->idx > 0);
xassert(ctx->idx <= ctx->size);
switch (ctx->selection_kind) {
default:
if (ctx->buf[ctx->idx - 1] == U'\n')
ctx->buf[ctx->idx - 1] = U'\0';
break;
case SELECTION_LINE_WISE:
if (ctx->buf[ctx->idx - 1] != U'\n') {
if (!ensure_size(ctx, 1))
goto err;
ctx->buf[ctx->idx++] = U'\n';
}
break;
}
if (ctx->buf[ctx->idx - 1] != U'\0') {
if (!ensure_size(ctx, 1))
goto err;
ctx->buf[ctx->idx++] = U'\0';
}
}
*text = ctx->buf;
if (len != NULL)
*len = ctx->idx - 1;
free(ctx);
return true;
err:
free(ctx->buf);
free(ctx);
return false;
}
bool
extract_finish(struct extraction_context *ctx, char **text, size_t *len)
{
if (text == NULL)
return false;
if (len != NULL)
*len = 0;
char32_t *wtext;
if (!extract_finish_wide(ctx, &wtext, NULL))
return false;
bool ret = false;
*text = ac32tombs(wtext);
if (*text == NULL) {
LOG_ERR("failed to convert selection to UTF-8");
goto out;
}
if (len != NULL)
*len = strlen(*text);
ret = true;
out:
free(wtext);
return ret;
}
bool
extract_one(const struct terminal *term, const struct row *row,
const struct cell *cell, int col, void *context)
{
struct extraction_context *ctx = context;
if (cell->wc >= CELL_SPACER)
return true;
if (ctx->last_row != NULL && row != ctx->last_row) {
/* New row - determine if we should insert a newline or not */
if (ctx->selection_kind != SELECTION_BLOCK) {
if (ctx->last_row->linebreak ||
ctx->empty_count > 0 ||
cell->wc == 0)
{
/* Row has a hard linebreak, or either last cell or
* current cell is empty */
/* Don't emit newline just yet - only if there are
* non-empty cells following it */
ctx->newline_count++;
if (!ctx->strip_trailing_empty) {
if (!ensure_size(ctx, ctx->empty_count))
goto err;
for (size_t i = 0; i < ctx->empty_count; i++)
ctx->buf[ctx->idx++] = U' ';
}
ctx->empty_count = 0;
}
} else {
/* Always insert a linebreak */
if (!ensure_size(ctx, 1))
goto err;
ctx->buf[ctx->idx++] = U'\n';
if (!ctx->strip_trailing_empty) {
if (!ensure_size(ctx, ctx->empty_count))
goto err;
for (size_t i = 0; i < ctx->empty_count; i++)
ctx->buf[ctx->idx++] = U' ';
}
ctx->empty_count = 0;
}
ctx->tab_spaces_left = 0;
}
if (cell->wc == U' ' && ctx->tab_spaces_left > 0) {
ctx->tab_spaces_left--;
return true;
}
ctx->tab_spaces_left = 0;
if (cell->wc == 0) {
ctx->empty_count++;
ctx->last_row = row;
ctx->last_cell = cell;
return true;
}
/* Insert pending newlines, and replace empty cells with spaces */
if (!ensure_size(ctx, ctx->newline_count + ctx->empty_count))
goto err;
for (size_t i = 0; i < ctx->newline_count; i++)
ctx->buf[ctx->idx++] = U'\n';
for (size_t i = 0; i < ctx->empty_count; i++)
ctx->buf[ctx->idx++] = U' ';
ctx->newline_count = 0;
ctx->empty_count = 0;
if (cell->wc >= CELL_COMB_CHARS_LO && cell->wc <= CELL_COMB_CHARS_HI)
{
const struct composed *composed = composed_lookup(
term->composed, cell->wc - CELL_COMB_CHARS_LO);
if (!ensure_size(ctx, composed->count))
goto err;
for (size_t i = 0; i < composed->count; i++)
ctx->buf[ctx->idx++] = composed->chars[i];
}
else {
if (!ensure_size(ctx, 1))
goto err;
ctx->buf[ctx->idx++] = cell->wc;
if (cell->wc == U'\t') {
int next_tab_stop = term->cols - 1;
tll_foreach(term->tab_stops, it) {
if (it->item > col) {
next_tab_stop = it->item;
break;
}
}
xassert(next_tab_stop >= col);
ctx->tab_spaces_left = next_tab_stop - col;
}
}
ctx->last_row = row;
ctx->last_cell = cell;
return true;
err:
ctx->failed = true;
return false;
}