fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
#include "char32.h"
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
#include <locale.h>
|
|
|
|
|
|
|
|
|
|
|
|
#include <wctype.h>
|
|
|
|
|
|
#include <wchar.h>
|
|
|
|
|
|
|
2022-03-19 19:52:21 +01:00
|
|
|
|
#if defined __has_include
|
|
|
|
|
|
#if __has_include (<stdc-predef.h>)
|
|
|
|
|
|
#include <stdc-predef.h>
|
|
|
|
|
|
#endif
|
|
|
|
|
|
#endif
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
|
|
|
|
|
|
#define LOG_MODULE "char32"
|
|
|
|
|
|
#define LOG_ENABLE_DBG 0
|
|
|
|
|
|
#include "log.h"
|
|
|
|
|
|
#include "debug.h"
|
|
|
|
|
|
#include "macros.h"
|
|
|
|
|
|
#include "xmalloc.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* For now, assume we can map directly to the corresponding wchar_t
|
|
|
|
|
|
* functions. This is true if:
|
|
|
|
|
|
*
|
|
|
|
|
|
* - both data types have the same size
|
|
|
|
|
|
* - both use the same encoding (though we require that encoding to be UTF-32)
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
_Static_assert(
|
|
|
|
|
|
sizeof(wchar_t) == sizeof(char32_t), "wchar_t vs. char32_t size mismatch");
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(__STDC_UTF_32__) || !__STDC_UTF_32__
|
|
|
|
|
|
#error "char32_t does not use UTF-32"
|
|
|
|
|
|
#endif
|
|
|
|
|
|
#if (!defined(__STDC_ISO_10646__) || !__STDC_ISO_10646__) && !defined(__FreeBSD__)
|
|
|
|
|
|
#error "wchar_t does not use UTF-32"
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
xassert(c32len(U"") == 0);
|
|
|
|
|
|
xassert(c32len(U"foobar") == 6);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
xassert(c32cmp(U"foobar", U"foobar") == 0);
|
|
|
|
|
|
xassert(c32cmp(U"foo", U"foobar") < 0);
|
|
|
|
|
|
xassert(c32cmp(U"foobar", U"foo") > 0);
|
|
|
|
|
|
xassert(c32cmp(U"a", U"b") < 0);
|
|
|
|
|
|
xassert(c32cmp(U"b", U"a") > 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
char32_t copy[16];
|
|
|
|
|
|
char32_t *ret = c32ncpy(copy, U"foobar", 16);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == copy);
|
|
|
|
|
|
xassert(copy[0] == U'f');
|
|
|
|
|
|
xassert(copy[1] == U'o');
|
|
|
|
|
|
xassert(copy[2] == U'o');
|
|
|
|
|
|
xassert(copy[3] == U'b');
|
|
|
|
|
|
xassert(copy[4] == U'a');
|
|
|
|
|
|
xassert(copy[5] == U'r');
|
|
|
|
|
|
|
|
|
|
|
|
unsigned char zeroes[(16 - 6) * sizeof(copy[0])] = {0};
|
|
|
|
|
|
xassert(memcmp(©[6], zeroes, sizeof(zeroes)) == 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
char32_t copy[16];
|
|
|
|
|
|
memset(copy, 0x55, sizeof(copy));
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *ret = c32cpy(copy, U"foobar");
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == copy);
|
|
|
|
|
|
xassert(copy[0] == U'f');
|
|
|
|
|
|
xassert(copy[1] == U'o');
|
|
|
|
|
|
xassert(copy[2] == U'o');
|
|
|
|
|
|
xassert(copy[3] == U'b');
|
|
|
|
|
|
xassert(copy[4] == U'a');
|
|
|
|
|
|
xassert(copy[5] == U'r');
|
|
|
|
|
|
xassert(copy[6] == U'\0');
|
|
|
|
|
|
|
|
|
|
|
|
unsigned char fives[(16 - 6 - 1) * sizeof(copy[0])];
|
|
|
|
|
|
memset(fives, 0x55, sizeof(fives));
|
|
|
|
|
|
xassert(memcmp(©[7], fives, sizeof(fives)) == 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
xassert(c32casecmp(U"foobar", U"FOOBAR") == 0);
|
|
|
|
|
|
xassert(c32casecmp(U"foo", U"FOOO") < 0);
|
|
|
|
|
|
xassert(c32casecmp(U"FOOO", U"foo") > 0);
|
|
|
|
|
|
xassert(c32casecmp(U"a", U"B") < 0);
|
|
|
|
|
|
xassert(c32casecmp(U"B", U"a") > 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
xassert(c32ncasecmp(U"foo", U"FOObar", 3) == 0);
|
|
|
|
|
|
xassert(c32ncasecmp(U"foo", U"FOOO", 4) < 0);
|
|
|
|
|
|
xassert(c32ncasecmp(U"FOOO", U"foo", 4) > 0);
|
|
|
|
|
|
xassert(c32ncasecmp(U"a", U"BB", 1) < 0);
|
|
|
|
|
|
xassert(c32ncasecmp(U"BB", U"a", 1) > 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
char32_t dst[32] = U"foobar";
|
|
|
|
|
|
char32_t *ret = c32ncat(dst, U"12345678XXXXXXXXX", 8);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == dst);
|
|
|
|
|
|
xassert(c32cmp(dst, U"foobar12345678") == 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
char32_t dst[32] = U"foobar";
|
|
|
|
|
|
char32_t *ret = c32cat(dst, U"12345678");
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == dst);
|
|
|
|
|
|
xassert(c32cmp(dst, U"foobar12345678") == 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
2024-01-25 07:03:50 +00:00
|
|
|
|
char32_t *c = xc32dup(U"foobar");
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
xassert(c32cmp(c, U"foobar") == 0);
|
|
|
|
|
|
free(c);
|
|
|
|
|
|
|
2024-01-25 07:03:50 +00:00
|
|
|
|
c = xc32dup(U"");
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
xassert(c32cmp(c, U"") == 0);
|
|
|
|
|
|
free(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
|
|
mbsntoc32(char32_t *dst, const char *src, size_t nms, size_t len)
|
|
|
|
|
|
{
|
|
|
|
|
|
mbstate_t ps = {0};
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *out = dst;
|
|
|
|
|
|
const char *in = src;
|
|
|
|
|
|
|
|
|
|
|
|
size_t consumed = 0;
|
|
|
|
|
|
size_t chars = 0;
|
|
|
|
|
|
size_t rc;
|
|
|
|
|
|
|
|
|
|
|
|
while ((out == NULL || chars < len) &&
|
|
|
|
|
|
consumed < nms &&
|
|
|
|
|
|
(rc = mbrtoc32(out, in, nms - consumed, &ps)) != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (rc) {
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
|
|
case (size_t)-1:
|
|
|
|
|
|
case (size_t)-2:
|
|
|
|
|
|
case (size_t)-3:
|
|
|
|
|
|
goto err;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
in += rc;
|
|
|
|
|
|
consumed += rc;
|
|
|
|
|
|
chars++;
|
|
|
|
|
|
|
|
|
|
|
|
if (out != NULL)
|
|
|
|
|
|
out++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
|
return chars;
|
|
|
|
|
|
|
|
|
|
|
|
err:
|
2024-04-20 08:16:15 +02:00
|
|
|
|
return (size_t)-1;
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
const char input[] = "foobarzoo";
|
|
|
|
|
|
char32_t c32[32];
|
|
|
|
|
|
|
|
|
|
|
|
size_t ret = mbsntoc32(NULL, input, sizeof(input), 0);
|
|
|
|
|
|
xassert(ret == 9);
|
|
|
|
|
|
|
|
|
|
|
|
memset(c32, 0x55, sizeof(c32));
|
|
|
|
|
|
ret = mbsntoc32(c32, input, sizeof(input), 32);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == 9);
|
|
|
|
|
|
xassert(c32[0] == U'f');
|
|
|
|
|
|
xassert(c32[1] == U'o');
|
|
|
|
|
|
xassert(c32[2] == U'o');
|
|
|
|
|
|
xassert(c32[3] == U'b');
|
|
|
|
|
|
xassert(c32[4] == U'a');
|
|
|
|
|
|
xassert(c32[5] == U'r');
|
|
|
|
|
|
xassert(c32[6] == U'z');
|
|
|
|
|
|
xassert(c32[7] == U'o');
|
|
|
|
|
|
xassert(c32[8] == U'o');
|
|
|
|
|
|
xassert(c32[9] == U'\0');
|
|
|
|
|
|
xassert(c32[10] == 0x55555555);
|
|
|
|
|
|
|
|
|
|
|
|
memset(c32, 0x55, sizeof(c32));
|
|
|
|
|
|
ret = mbsntoc32(c32, input, 1, 32);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == 1);
|
|
|
|
|
|
xassert(c32[0] == U'f');
|
|
|
|
|
|
xassert(c32[1] == 0x55555555);
|
|
|
|
|
|
|
|
|
|
|
|
memset(c32, 0x55, sizeof(c32));
|
|
|
|
|
|
ret = mbsntoc32(c32, input, sizeof(input), 1);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == 1);
|
|
|
|
|
|
xassert(c32[0] == U'f');
|
|
|
|
|
|
xassert(c32[1] == 0x55555555);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
|
|
|
|
|
const char input[] = "foobarzoo";
|
|
|
|
|
|
char32_t c32[32];
|
|
|
|
|
|
|
|
|
|
|
|
size_t ret = mbstoc32(NULL, input, 0);
|
|
|
|
|
|
xassert(ret == 9);
|
|
|
|
|
|
|
|
|
|
|
|
memset(c32, 0x55, sizeof(c32));
|
|
|
|
|
|
ret = mbstoc32(c32, input, 32);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == 9);
|
|
|
|
|
|
xassert(c32[0] == U'f');
|
|
|
|
|
|
xassert(c32[1] == U'o');
|
|
|
|
|
|
xassert(c32[2] == U'o');
|
|
|
|
|
|
xassert(c32[3] == U'b');
|
|
|
|
|
|
xassert(c32[4] == U'a');
|
|
|
|
|
|
xassert(c32[5] == U'r');
|
|
|
|
|
|
xassert(c32[6] == U'z');
|
|
|
|
|
|
xassert(c32[7] == U'o');
|
|
|
|
|
|
xassert(c32[8] == U'o');
|
|
|
|
|
|
xassert(c32[9] == U'\0');
|
|
|
|
|
|
xassert(c32[10] == 0x55555555);
|
|
|
|
|
|
|
|
|
|
|
|
memset(c32, 0x55, sizeof(c32));
|
|
|
|
|
|
ret = mbstoc32(c32, input, 1);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret == 1);
|
|
|
|
|
|
xassert(c32[0] == U'f');
|
|
|
|
|
|
xassert(c32[1] == 0x55555555);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *
|
|
|
|
|
|
ambstoc32(const char *src)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (src == NULL)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
const size_t src_len = strlen(src);
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *ret = xmalloc((src_len + 1) * sizeof(ret[0]));
|
|
|
|
|
|
mbstate_t ps = {0};
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *out = ret;
|
|
|
|
|
|
const char *in = src;
|
|
|
|
|
|
const char *const end = src + src_len + 1;
|
|
|
|
|
|
|
|
|
|
|
|
size_t chars = 0;
|
|
|
|
|
|
size_t rc;
|
|
|
|
|
|
|
|
|
|
|
|
while ((rc = mbrtoc32(out, in, end - in, &ps)) != 0) {
|
|
|
|
|
|
switch (rc) {
|
|
|
|
|
|
case (size_t)-1:
|
|
|
|
|
|
case (size_t)-2:
|
|
|
|
|
|
case (size_t)-3:
|
|
|
|
|
|
goto err;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
in += rc;
|
|
|
|
|
|
out++;
|
|
|
|
|
|
chars++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
*out = U'\0';
|
|
|
|
|
|
|
|
|
|
|
|
ret = xrealloc(ret, (chars + 1) * sizeof(ret[0]));
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
|
|
err:
|
|
|
|
|
|
free(ret);
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
2022-02-27 14:29:29 +01:00
|
|
|
|
const char* locale = setlocale(LC_CTYPE, "en_US.UTF-8");
|
|
|
|
|
|
if (!locale)
|
|
|
|
|
|
locale = setlocale(LC_CTYPE, "C.UTF-8");
|
|
|
|
|
|
if (!locale)
|
|
|
|
|
|
return;
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
|
|
|
|
|
|
char32_t *hello = ambstoc32(u8"hello");
|
|
|
|
|
|
xassert(hello != NULL);
|
|
|
|
|
|
xassert(hello[0] == U'h');
|
|
|
|
|
|
xassert(hello[1] == U'e');
|
|
|
|
|
|
xassert(hello[2] == U'l');
|
|
|
|
|
|
xassert(hello[3] == U'l');
|
|
|
|
|
|
xassert(hello[4] == U'o');
|
|
|
|
|
|
xassert(hello[5] == U'\0');
|
|
|
|
|
|
free(hello);
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *swedish = ambstoc32(u8"åäö");
|
|
|
|
|
|
xassert(swedish != NULL);
|
|
|
|
|
|
xassert(swedish[0] == U'å');
|
|
|
|
|
|
xassert(swedish[1] == U'ä');
|
|
|
|
|
|
xassert(swedish[2] == U'ö');
|
|
|
|
|
|
xassert(swedish[3] == U'\0');
|
|
|
|
|
|
free(swedish);
|
|
|
|
|
|
|
|
|
|
|
|
char32_t *emoji = ambstoc32(u8"👨👩👧👦");
|
|
|
|
|
|
xassert(emoji != NULL);
|
|
|
|
|
|
xassert(emoji[0] == U'👨');
|
|
|
|
|
|
xassert(emoji[1] == U'');
|
|
|
|
|
|
xassert(emoji[2] == U'👩');
|
|
|
|
|
|
xassert(emoji[3] == U'');
|
|
|
|
|
|
xassert(emoji[4] == U'👧');
|
|
|
|
|
|
xassert(emoji[5] == U'');
|
|
|
|
|
|
xassert(emoji[6] == U'👦');
|
|
|
|
|
|
xassert(emoji[7] == U'\0');
|
|
|
|
|
|
free(emoji);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ambstoc32(NULL) == NULL);
|
2022-02-27 14:29:29 +01:00
|
|
|
|
xassert(setlocale(LC_CTYPE, "C") != NULL);
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
char *
|
|
|
|
|
|
ac32tombs(const char32_t *src)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (src == NULL)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
const size_t src_len = c32len(src);
|
|
|
|
|
|
|
|
|
|
|
|
size_t allocated = src_len + 1;
|
|
|
|
|
|
char *ret = xmalloc(allocated);
|
|
|
|
|
|
mbstate_t ps = {0};
|
|
|
|
|
|
|
|
|
|
|
|
char *out = ret;
|
|
|
|
|
|
const char32_t *const end = src + src_len + 1;
|
|
|
|
|
|
|
|
|
|
|
|
size_t bytes = 0;
|
|
|
|
|
|
|
|
|
|
|
|
char mb[MB_CUR_MAX];
|
|
|
|
|
|
|
|
|
|
|
|
for (const char32_t *in = src; in < end; in++) {
|
|
|
|
|
|
size_t rc = c32rtomb(mb, *in, &ps);
|
|
|
|
|
|
|
|
|
|
|
|
switch (rc) {
|
|
|
|
|
|
case (size_t)-1:
|
|
|
|
|
|
goto err;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (bytes + rc > allocated) {
|
|
|
|
|
|
allocated *= 2;
|
|
|
|
|
|
ret = xrealloc(ret, allocated);
|
|
|
|
|
|
out = &ret[bytes];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < rc; i++, out++)
|
|
|
|
|
|
*out = mb[i];
|
|
|
|
|
|
|
|
|
|
|
|
bytes += rc;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ret[bytes - 1] == '\0');
|
|
|
|
|
|
ret = xrealloc(ret, bytes);
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
|
|
err:
|
|
|
|
|
|
free(ret);
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UNITTEST
|
|
|
|
|
|
{
|
2022-02-27 14:29:29 +01:00
|
|
|
|
const char* locale = setlocale(LC_CTYPE, "en_US.UTF-8");
|
|
|
|
|
|
if (!locale)
|
|
|
|
|
|
locale = setlocale(LC_CTYPE, "C.UTF-8");
|
|
|
|
|
|
if (!locale)
|
|
|
|
|
|
return;
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
|
|
|
|
|
|
char *s = ac32tombs(U"foobar");
|
|
|
|
|
|
xassert(s != NULL);
|
|
|
|
|
|
xassert(strcmp(s, "foobar") == 0);
|
|
|
|
|
|
free(s);
|
|
|
|
|
|
|
|
|
|
|
|
s = ac32tombs(U"åäö");
|
|
|
|
|
|
xassert(s != NULL);
|
|
|
|
|
|
xassert(strcmp(s, u8"åäö") == 0);
|
|
|
|
|
|
free(s);
|
|
|
|
|
|
|
|
|
|
|
|
s = ac32tombs(U"👨👩👧👦");
|
|
|
|
|
|
xassert(s != NULL);
|
|
|
|
|
|
xassert(strcmp(s, u8"👨👩👧👦") == 0);
|
|
|
|
|
|
free(s);
|
|
|
|
|
|
|
|
|
|
|
|
xassert(ac32tombs(NULL) == NULL);
|
2022-02-27 14:29:29 +01:00
|
|
|
|
xassert(setlocale(LC_CTYPE, "C") != NULL);
|
fcft: adapt to API changes in fcft-3.x
Fcft no longer uses wchar_t, but plain uint32_t to represent
codepoints.
Since we do a fair amount of string operations in foot, it still makes
sense to use something that actually _is_ a string (or character),
rather than an array of uint32_t.
For this reason, we switch out all wchar_t usage in foot to
char32_t. We also verify, at compile-time, that char32_t used
UTF-32 (which is what fcft expects).
Unfortunately, there are no string functions for char32_t. To avoid
having to re-implement all wcs*() functions, we add a small wrapper
layer of c32*() functions.
These wrapper functions take char32_t arguments, but then simply call
the corresponding wcs*() function.
For this to work, wcs*() must _also_ be UTF-32 compatible. We can
check for the presence of the __STDC_ISO_10646__ macro. If set,
wchar_t is at least 4 bytes and its internal representation is UTF-32.
FreeBSD does *not* define this macro, because its internal wchar_t
representation depends on the current locale. It _does_ use UTF-32
_if_ the current locale is UTF-8.
Since foot enforces UTF-8, we simply need to check if __FreeBSD__ is
defined.
Other fcft API changes:
* fcft_glyph_rasterize() -> fcft_codepoint_rasterize()
* font.space_advance has been removed
* ‘tags’ have been removed from fcft_grapheme_rasterize()
* ‘fcft_log_init()’ removed
* ‘fcft_init()’ and ‘fcft_fini()’ must be explicitly called
2021-08-21 14:50:42 +02:00
|
|
|
|
}
|