foot/char32.c
Daniel Eklöf e0227266ca
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.

Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.

For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).

Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.

These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.

For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the  __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.

FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.

Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.

Other fcft API changes:

* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2022-02-05 17:00:54 +01:00

505 lines
10 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "char32.h"
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <wctype.h>
#include <wchar.h>
#define LOG_MODULE "char32"
#define LOG_ENABLE_DBG 0
#include "log.h"
#include "debug.h"
#include "macros.h"
#include "xmalloc.h"
/*
* For now, assume we can map directly to the corresponding wchar_t
* functions. This is true if:
*
* - both data types have the same size
* - both use the same encoding (though we require that encoding to be UTF-32)
*/
_Static_assert(
sizeof(wchar_t) == sizeof(char32_t), "wchar_t vs. char32_t size mismatch");
#if !defined(__STDC_UTF_32__) || !__STDC_UTF_32__
#error "char32_t does not use UTF-32"
#endif
#if (!defined(__STDC_ISO_10646__) || !__STDC_ISO_10646__) && !defined(__FreeBSD__)
#error "wchar_t does not use UTF-32"
#endif
size_t
c32len(const char32_t *s)
{
return wcslen((const wchar_t *)s);
}
UNITTEST
{
xassert(c32len(U"") == 0);
xassert(c32len(U"foobar") == 6);
}
int
c32cmp(const char32_t *s1, const char32_t *s2)
{
return wcscmp((const wchar_t *)s1, (const wchar_t *)s2);
}
UNITTEST
{
xassert(c32cmp(U"foobar", U"foobar") == 0);
xassert(c32cmp(U"foo", U"foobar") < 0);
xassert(c32cmp(U"foobar", U"foo") > 0);
xassert(c32cmp(U"a", U"b") < 0);
xassert(c32cmp(U"b", U"a") > 0);
}
char32_t *
c32ncpy(char32_t *dst, const char32_t *src, size_t n)
{
return (char32_t *)wcsncpy((wchar_t *)dst, (const wchar_t *)src, n);
}
UNITTEST
{
char32_t copy[16];
char32_t *ret = c32ncpy(copy, U"foobar", 16);
xassert(ret == copy);
xassert(copy[0] == U'f');
xassert(copy[1] == U'o');
xassert(copy[2] == U'o');
xassert(copy[3] == U'b');
xassert(copy[4] == U'a');
xassert(copy[5] == U'r');
unsigned char zeroes[(16 - 6) * sizeof(copy[0])] = {0};
xassert(memcmp(&copy[6], zeroes, sizeof(zeroes)) == 0);
}
char32_t *
c32cpy(char32_t *dst, const char32_t *src)
{
return (char32_t *)wcscpy((wchar_t *)dst, (const wchar_t *)src);
}
UNITTEST
{
char32_t copy[16];
memset(copy, 0x55, sizeof(copy));
char32_t *ret = c32cpy(copy, U"foobar");
xassert(ret == copy);
xassert(copy[0] == U'f');
xassert(copy[1] == U'o');
xassert(copy[2] == U'o');
xassert(copy[3] == U'b');
xassert(copy[4] == U'a');
xassert(copy[5] == U'r');
xassert(copy[6] == U'\0');
unsigned char fives[(16 - 6 - 1) * sizeof(copy[0])];
memset(fives, 0x55, sizeof(fives));
xassert(memcmp(&copy[7], fives, sizeof(fives)) == 0);
}
char32_t *
c32chr(const char32_t *s, char32_t c)
{
return (char32_t *)wcschr((const wchar_t *)s, c);
}
int
c32casecmp(const char32_t *s1, const char32_t *s2)
{
return wcscasecmp((const wchar_t *)s1, (const wchar_t *)s2);
}
UNITTEST
{
xassert(c32casecmp(U"foobar", U"FOOBAR") == 0);
xassert(c32casecmp(U"foo", U"FOOO") < 0);
xassert(c32casecmp(U"FOOO", U"foo") > 0);
xassert(c32casecmp(U"a", U"B") < 0);
xassert(c32casecmp(U"B", U"a") > 0);
}
int
c32ncasecmp(const char32_t *s1, const char32_t *s2, size_t n)
{
return wcsncasecmp((const wchar_t *)s1, (const wchar_t *)s2, n);
}
UNITTEST
{
xassert(c32ncasecmp(U"foo", U"FOObar", 3) == 0);
xassert(c32ncasecmp(U"foo", U"FOOO", 4) < 0);
xassert(c32ncasecmp(U"FOOO", U"foo", 4) > 0);
xassert(c32ncasecmp(U"a", U"BB", 1) < 0);
xassert(c32ncasecmp(U"BB", U"a", 1) > 0);
}
char32_t *
c32ncat(char32_t *dst, const char32_t *src, size_t n)
{
return (char32_t *)wcsncat((wchar_t *)dst, (const wchar_t *)src, n);
}
UNITTEST
{
char32_t dst[32] = U"foobar";
char32_t *ret = c32ncat(dst, U"12345678XXXXXXXXX", 8);
xassert(ret == dst);
xassert(c32cmp(dst, U"foobar12345678") == 0);
}
char32_t *
c32cat(char32_t *dst, const char32_t *src)
{
return (char32_t *)wcscat((wchar_t *)dst, (const wchar_t *)src);
}
UNITTEST
{
char32_t dst[32] = U"foobar";
char32_t *ret = c32cat(dst, U"12345678");
xassert(ret == dst);
xassert(c32cmp(dst, U"foobar12345678") == 0);
}
char32_t *
c32dup(const char32_t *s)
{
return (char32_t *)wcsdup((const wchar_t *)s);
}
UNITTEST
{
char32_t *c = c32dup(U"foobar");
xassert(c32cmp(c, U"foobar") == 0);
free(c);
c = c32dup(U"");
xassert(c32cmp(c, U"") == 0);
free(c);
}
size_t
mbsntoc32(char32_t *dst, const char *src, size_t nms, size_t len)
{
mbstate_t ps = {0};
char32_t *out = dst;
const char *in = src;
size_t consumed = 0;
size_t chars = 0;
size_t rc;
while ((out == NULL || chars < len) &&
consumed < nms &&
(rc = mbrtoc32(out, in, nms - consumed, &ps)) != 0)
{
switch (rc) {
case 0:
goto done;
case (size_t)-1:
case (size_t)-2:
case (size_t)-3:
goto err;
}
in += rc;
consumed += rc;
chars++;
if (out != NULL)
out++;
}
done:
return chars;
err:
return (char32_t)-1;
}
UNITTEST
{
const char input[] = "foobarzoo";
char32_t c32[32];
size_t ret = mbsntoc32(NULL, input, sizeof(input), 0);
xassert(ret == 9);
memset(c32, 0x55, sizeof(c32));
ret = mbsntoc32(c32, input, sizeof(input), 32);
xassert(ret == 9);
xassert(c32[0] == U'f');
xassert(c32[1] == U'o');
xassert(c32[2] == U'o');
xassert(c32[3] == U'b');
xassert(c32[4] == U'a');
xassert(c32[5] == U'r');
xassert(c32[6] == U'z');
xassert(c32[7] == U'o');
xassert(c32[8] == U'o');
xassert(c32[9] == U'\0');
xassert(c32[10] == 0x55555555);
memset(c32, 0x55, sizeof(c32));
ret = mbsntoc32(c32, input, 1, 32);
xassert(ret == 1);
xassert(c32[0] == U'f');
xassert(c32[1] == 0x55555555);
memset(c32, 0x55, sizeof(c32));
ret = mbsntoc32(c32, input, sizeof(input), 1);
xassert(ret == 1);
xassert(c32[0] == U'f');
xassert(c32[1] == 0x55555555);
}
size_t
mbstoc32(char32_t *dst, const char *src, size_t len)
{
return mbsntoc32(dst, src, strlen(src) + 1, len);
}
UNITTEST
{
const char input[] = "foobarzoo";
char32_t c32[32];
size_t ret = mbstoc32(NULL, input, 0);
xassert(ret == 9);
memset(c32, 0x55, sizeof(c32));
ret = mbstoc32(c32, input, 32);
xassert(ret == 9);
xassert(c32[0] == U'f');
xassert(c32[1] == U'o');
xassert(c32[2] == U'o');
xassert(c32[3] == U'b');
xassert(c32[4] == U'a');
xassert(c32[5] == U'r');
xassert(c32[6] == U'z');
xassert(c32[7] == U'o');
xassert(c32[8] == U'o');
xassert(c32[9] == U'\0');
xassert(c32[10] == 0x55555555);
memset(c32, 0x55, sizeof(c32));
ret = mbstoc32(c32, input, 1);
xassert(ret == 1);
xassert(c32[0] == U'f');
xassert(c32[1] == 0x55555555);
}
char32_t *
ambstoc32(const char *src)
{
if (src == NULL)
return NULL;
const size_t src_len = strlen(src);
char32_t *ret = xmalloc((src_len + 1) * sizeof(ret[0]));
mbstate_t ps = {0};
char32_t *out = ret;
const char *in = src;
const char *const end = src + src_len + 1;
size_t chars = 0;
size_t rc;
while ((rc = mbrtoc32(out, in, end - in, &ps)) != 0) {
switch (rc) {
case (size_t)-1:
case (size_t)-2:
case (size_t)-3:
goto err;
}
in += rc;
out++;
chars++;
}
*out = U'\0';
ret = xrealloc(ret, (chars + 1) * sizeof(ret[0]));
return ret;
err:
free(ret);
return NULL;
}
UNITTEST
{
setlocale(LC_CTYPE, "en_US.UTF-8");
char32_t *hello = ambstoc32(u8"hello");
xassert(hello != NULL);
xassert(hello[0] == U'h');
xassert(hello[1] == U'e');
xassert(hello[2] == U'l');
xassert(hello[3] == U'l');
xassert(hello[4] == U'o');
xassert(hello[5] == U'\0');
free(hello);
char32_t *swedish = ambstoc32(u8"åäö");
xassert(swedish != NULL);
xassert(swedish[0] == U'å');
xassert(swedish[1] == U'ä');
xassert(swedish[2] == U'ö');
xassert(swedish[3] == U'\0');
free(swedish);
char32_t *emoji = ambstoc32(u8"👨‍👩‍👧‍👦");
xassert(emoji != NULL);
xassert(emoji[0] == U'👨');
xassert(emoji[1] == U'');
xassert(emoji[2] == U'👩');
xassert(emoji[3] == U'');
xassert(emoji[4] == U'👧');
xassert(emoji[5] == U'');
xassert(emoji[6] == U'👦');
xassert(emoji[7] == U'\0');
free(emoji);
xassert(ambstoc32(NULL) == NULL);
setlocale(LC_CTYPE, "C");
}
char *
ac32tombs(const char32_t *src)
{
if (src == NULL)
return NULL;
const size_t src_len = c32len(src);
size_t allocated = src_len + 1;
char *ret = xmalloc(allocated);
mbstate_t ps = {0};
char *out = ret;
const char32_t *const end = src + src_len + 1;
size_t bytes = 0;
char mb[MB_CUR_MAX];
for (const char32_t *in = src; in < end; in++) {
size_t rc = c32rtomb(mb, *in, &ps);
switch (rc) {
case (size_t)-1:
goto err;
}
if (bytes + rc > allocated) {
allocated *= 2;
ret = xrealloc(ret, allocated);
out = &ret[bytes];
}
for (size_t i = 0; i < rc; i++, out++)
*out = mb[i];
bytes += rc;
}
xassert(ret[bytes - 1] == '\0');
ret = xrealloc(ret, bytes);
return ret;
err:
free(ret);
return NULL;
}
UNITTEST
{
setlocale(LC_CTYPE, "en_US.UTF-8");
char *s = ac32tombs(U"foobar");
xassert(s != NULL);
xassert(strcmp(s, "foobar") == 0);
free(s);
s = ac32tombs(U"åäö");
xassert(s != NULL);
xassert(strcmp(s, u8"åäö") == 0);
free(s);
s = ac32tombs(U"👨‍👩‍👧‍👦");
xassert(s != NULL);
xassert(strcmp(s, u8"👨‍👩‍👧‍👦") == 0);
free(s);
xassert(ac32tombs(NULL) == NULL);
setlocale(LC_CTYPE, "C");
}
char32_t
toc32lower(char32_t c)
{
return (char32_t)towlower((wint_t)c);
}
char32_t
toc32upper(char32_t c)
{
return (char32_t)towupper((wint_t)c);
}
bool
isc32space(char32_t c32)
{
return iswspace((wint_t)c32);
}
bool
isc32print(char32_t c32)
{
return iswprint((wint_t)c32);
}
bool
isc32graph(char32_t c32)
{
return iswgraph((wint_t)c32);
}
int
c32width(char32_t c)
{
return wcwidth((wchar_t)c);
}
int
c32swidth(const char32_t *s, size_t n)
{
return wcswidth((const wchar_t *)s, n);
}