From fd4c96011ebe156aa00e7b7de77e1adc26b7fff5 Mon Sep 17 00:00:00 2001
From: Manuel Stoeckl <code@mstoeckl.com>
Date: Thu, 17 Apr 2025 18:50:38 -0400
Subject: [PATCH 1/4] protocol: specify exact multiplanar layout for wl_shm

This change calculates multiplanar buffer plane strides so that,
if the first plane is tightly packed, the other planes are also
tightly packed. Matching Vulkan's constraints on multiplanar formats,
it requires the width/height/stride parameters are divisible as
neccesary to avoid ever needing to round subsampled pixels.

This is technically a breaking change, but very few clients and
and compositors implemented and used multiplanar shm formats.
For a given format, those that do either agree with the new
calculations or disagree with each other.

Signed-off-by: Manuel Stoeckl <code@mstoeckl.com>
---
 protocol/wayland.xml | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/protocol/wayland.xml b/protocol/wayland.xml
index 1af51d36..8d0add03 100644
--- a/protocol/wayland.xml
+++ b/protocol/wayland.xml
@@ -230,8 +230,43 @@
 	The buffer is created offset bytes into the pool and has
 	width and height as specified.  The stride argument specifies
 	the number of bytes from the beginning of one row to the beginning
-	of the next.  The format is the pixel format of the buffer and
-	must be one of those advertised through the wl_shm.format event.
+	of the next; if the pixel format has multiple planes the stride
+	applies to the first plane. The format is the pixel format of the
+	buffer and must be one of those advertised through the wl_shm.format
+	event.
+
+	When the pixel format has multiple planes, the strides and starting
+	offsets of the individual planes are derived from the provided
+	stride as follows. Denote "stride", "width", "height" as the provided
+	arguments. Let "p" be the number of planes. For the sake of
+	calculating parameters, we will require that each plane, seen as a
+	width x height grid of squares, can be decomposed into an array of
+	disjoint, tightly packed, indivisible rectangular blocks (which to
+	make calculations easier, here encompass both subsampling and the
+	packing of subsampled pixel data together into short byte sequences.)
+	For each plane index "i" between 1 and p, let "blockw[i]" be the
+	width of the blocks for plane i, blockh[i] the height of the blocks,
+	and "bpb[i]" the number of bytes used to encode each block.
+	(For example: for the purely subsampled two-plane format nv12,
+	blockw[2] = blockh[2] = 2 and bpb[2] = 2, because each Cr:Cb plane
+	entry corresponds (roughly; the interpretation may be more
+	complicated) to a 2x2 region of pixels, while for the packed single
+	plane format y210, blockw[1] = 2, blockh[1] = 1, and bpb[1] = 8.
+	For p030, which has both 3x1 packing and 2x2 subsampling,
+	blockw[2] = 6, blockh[2] = 2, and bpb[2] = 8.)
+
+	Parameters are valid only if, for each plane i, width % blockw[i] = 0
+	and height % blockh[i] = 0. Furthermore, stride % bpb[1] = 0 is needed.
+	Let ext_width = stride / bpb[1]. For each plane i, ext_width must
+	satisfy ext_width % blockw[i] = 0. Then define the stride of the
+	ith plane, "stride[i]", to be ext_width * bpb[i] / blockw[i].
+	The offset of the ith plane is
+	offset + sum_{i = 1}^{i - 1} stride[i] * (height / blockh[i]); this
+	evaluates to just offset when i = 1.
+
+	Formats (like yuv420_10bit or vuy101010) whose description does not
+	match the above multiplanar, linear layout model have unspecified
+	interpretation.
 
 	A buffer will keep a reference to the pool it was created from
 	so it is valid to destroy the pool immediately after creating

From 77730f10a0eaac1c654d1bdc689783292bdb5f2d Mon Sep 17 00:00:00 2001
From: Kyle Brenneman <kbrenneman@nvidia.com>
Date: Tue, 17 Sep 2024 17:27:37 -0600
Subject: [PATCH 2/4] connection: Add a function to parse WAYLAND_DEBUG tokens

Add a new function, wl_check_env_token, to scan for a token in a
comma-separated string.

Change wl_display_create in wayland-server.c and
wl_display_connect_to_fd in wayland-client.c to use that instead of a
simple substring search.

This means that WAYLAND_DEBUG will accept a value like "client,server"
but not "clientserver". But, this will make it easier to add other
tokens without worrying about overlap between them.

Signed-off-by: Kyle Brenneman <kbrenneman@nvidia.com>
---
 src/connection.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
 src/wayland-client.c  |  2 +-
 src/wayland-private.h |  3 +++
 src/wayland-server.c  |  2 +-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/connection.c b/src/connection.c
index 593f52f3..9c6a6b01 100644
--- a/src/connection.c
+++ b/src/connection.c
@@ -1491,6 +1491,48 @@ wl_closure_queue(struct wl_closure *closure, struct wl_connection *connection)
 	return result;
 }
 
+bool
+wl_check_env_token(const char *env, const char *token)
+{
+	const char *ptr = env;
+	size_t token_len;
+
+	if (env == NULL)
+		return false;
+
+	token_len = strlen(token);
+
+	// Scan the string for comma-separated tokens and look for a match.
+	while (true) {
+		const char *end;
+		size_t len;
+
+		// Skip over any leading separators.
+		while (*ptr == ',')
+			ptr++;
+
+		if (*ptr == '\x00')
+			return false;
+
+		end = strchr(ptr + 1, ',');
+
+		// If there isn't another separarator, then the rest of the string
+		// is one token.
+		if (end == NULL)
+			return (strcmp(ptr, token) == 0);
+
+		len = end - ptr;
+		if (len == token_len && memcmp(ptr, token, len) == 0) {
+			return true;
+		}
+
+		// Skip to the next token.
+		ptr += len;
+	}
+
+	return false;
+}
+
 void
 wl_closure_print(struct wl_closure *closure, struct wl_object *target,
 		 int send, int discarded, uint32_t (*n_parse)(union wl_argument *arg),
diff --git a/src/wayland-client.c b/src/wayland-client.c
index c8633046..c0b361f0 100644
--- a/src/wayland-client.c
+++ b/src/wayland-client.c
@@ -1236,7 +1236,7 @@ wl_display_connect_to_fd(int fd)
 	no_color = getenv("NO_COLOR");
 	force_color = getenv("FORCE_COLOR");
 	debug = getenv("WAYLAND_DEBUG");
-	if (debug && (strstr(debug, "client") || strstr(debug, "1"))) {
+	if (debug && (wl_check_env_token(debug, "client") || wl_check_env_token(debug, "1"))) {
 		debug_client = 1;
 		if (isatty(fileno(stderr)))
 			debug_color = 1;
diff --git a/src/wayland-private.h b/src/wayland-private.h
index d7ba9dae..d0e4cfc6 100644
--- a/src/wayland-private.h
+++ b/src/wayland-private.h
@@ -237,6 +237,9 @@ wl_closure_send(struct wl_closure *closure, struct wl_connection *connection);
 int
 wl_closure_queue(struct wl_closure *closure, struct wl_connection *connection);
 
+bool
+wl_check_env_token(const char *env, const char *token);
+
 void
 wl_closure_print(struct wl_closure *closure,
 		 struct wl_object *target, int send, int discarded,
diff --git a/src/wayland-server.c b/src/wayland-server.c
index 482743b3..c81d98f1 100644
--- a/src/wayland-server.c
+++ b/src/wayland-server.c
@@ -1198,7 +1198,7 @@ wl_display_create(void)
 	no_color = getenv("NO_COLOR");
 	force_color = getenv("FORCE_COLOR");
 	debug = getenv("WAYLAND_DEBUG");
-	if (debug && (strstr(debug, "server") || strstr(debug, "1"))) {
+	if (debug && (wl_check_env_token(debug, "server") || wl_check_env_token(debug, "1"))) {
 		debug_server = 1;
 		if (isatty(fileno(stderr)))
 			debug_color = 1;

From 4673ef7e9ce5de21051b64c39816a98187611966 Mon Sep 17 00:00:00 2001
From: Kyle Brenneman <kbrenneman@nvidia.com>
Date: Tue, 10 Sep 2024 14:36:06 -0600
Subject: [PATCH 3/4] connection: Add a thread ID to WAYLAND_DEBUG output.

If WAYLAND_DEBUG contains the token "thread_id", and gettid() is
available, then include the current thread ID in the output from
wl_closure_print.

If multiple threads are sending requests, then those requests can get
interleaved. That's usually fine, but for wl_surface requests and
commits, that can cause problems ranging from incorrect behavior to
protocol errors.

Being able to see which requests are sent by different threads would
make such problems much easier to diagnose.

Signed-off-by: Kyle Brenneman <kbrenneman@nvidia.com>
---
 meson.build      |  1 +
 src/connection.c | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/meson.build b/meson.build
index 37c14687..ce386a4c 100644
--- a/meson.build
+++ b/meson.build
@@ -46,6 +46,7 @@ have_funcs = [
 	'memfd_create',
 	'mremap',
 	'strndup',
+	'gettid',
 ]
 foreach f: have_funcs
 	config_h.set('HAVE_' + f.underscorify().to_upper(), cc.has_function(f))
diff --git a/src/connection.c b/src/connection.c
index 9c6a6b01..2d1e8d1d 100644
--- a/src/connection.c
+++ b/src/connection.c
@@ -26,6 +26,8 @@
 
 #define _GNU_SOURCE
 
+#include "../config.h"
+
 #include <math.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -1538,6 +1540,9 @@ wl_closure_print(struct wl_closure *closure, struct wl_object *target,
 		 int send, int discarded, uint32_t (*n_parse)(union wl_argument *arg),
 		 const char *queue_name, int color)
 {
+#if defined(HAVE_GETTID)
+	static int include_tid = -1;
+#endif // defined(HAVE_GETTID)
 	int i;
 	struct argument_details arg;
 	const char *signature = closure->message->signature;
@@ -1558,6 +1563,18 @@ wl_closure_print(struct wl_closure *closure, struct wl_object *target,
 		color ? WL_DEBUG_COLOR_GREEN : "",
 		time / 1000, time % 1000);
 
+#if defined(HAVE_GETTID)
+	if (include_tid < 0) {
+		include_tid = wl_check_env_token(getenv("WAYLAND_DEBUG"), "thread_id");
+	}
+
+	if (include_tid) {
+		fprintf(f, "%sTID#%d ",
+			color ? WL_DEBUG_COLOR_CYAN : "",
+			(int) gettid());
+	}
+#endif
+
 	if (queue_name) {
 		fprintf(f, "%s{%s} ",
 			color ? WL_DEBUG_COLOR_YELLOW : "",

From d81525a235e48cc5de3e4005a16ddb1fbdfd9d7c Mon Sep 17 00:00:00 2001
From: Isaac Freund <mail@isaacfreund.com>
Date: Wed, 2 Jul 2025 12:15:33 +0200
Subject: [PATCH 4/4] client: add wl_display_dispatch_pending_single

As well as wl_display_dispatch_queue_pending_single.

The motivation is writing libwayland bindings for a dynamic language
with exceptions/non-local returns. Since it is invalid for a
wl_dispatcher_func_t callback provided to libwayland to not return,
there is no way to prevent dispatching of further events in the case of
an exception in the dynamic language event handler.

Furthermore, since creating/destroying Wayland objects in an event
handler affects the dispatching of subsequent events by libwayland,
it is not possible to collect Wayland events in a queue outside
libwayland and dispatch them one-by-one after
wl_display_dispatch_pending() returns.

Adding libwayland API to dispatch at most one pending event solves this
problem cleanly. The bindings can have libwayland dispatch a single
event, wait for wl_display_dispatch_pending_single() to return, run the
dynamic language event handler (which may longjmp away), and continue
the loop for as long as there are more events to dispatch.

References: https://codeberg.org/ifreund/janet-wayland
Signed-off-by: Isaac Freund <mail@isaacfreund.com>
---
 src/wayland-client-core.h |  7 ++++
 src/wayland-client.c      | 75 +++++++++++++++++++++++++++++++++++++++
 tests/display-test.c      | 69 +++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)

diff --git a/src/wayland-client-core.h b/src/wayland-client-core.h
index 970e6254..e0523e49 100644
--- a/src/wayland-client-core.h
+++ b/src/wayland-client-core.h
@@ -268,9 +268,16 @@ int
 wl_display_dispatch_queue_pending(struct wl_display *display,
 				  struct wl_event_queue *queue);
 
+int
+wl_display_dispatch_queue_pending_single(struct wl_display *display,
+					 struct wl_event_queue *queue);
+
 int
 wl_display_dispatch_pending(struct wl_display *display);
 
+int
+wl_display_dispatch_pending_single(struct wl_display *display);
+
 int
 wl_display_get_error(struct wl_display *display);
 
diff --git a/src/wayland-client.c b/src/wayland-client.c
index c0b361f0..ed686b5c 100644
--- a/src/wayland-client.c
+++ b/src/wayland-client.c
@@ -1882,6 +1882,34 @@ err:
 	return -1;
 }
 
+
+static int
+dispatch_queue_single(struct wl_display *display, struct wl_event_queue *queue)
+{
+	if (display->last_error)
+		goto err;
+
+	while (!wl_list_empty(&display->display_queue.event_list)) {
+		dispatch_event(display, &display->display_queue);
+		if (display->last_error)
+			goto err;
+	}
+
+	if (!wl_list_empty(&queue->event_list)) {
+		dispatch_event(display, queue);
+		if (display->last_error)
+			goto err;
+		return 1;
+	} else {
+		return 0;
+	}
+
+err:
+	errno = display->last_error;
+
+	return -1;
+}
+
 /** Prepare to read events from the display's file descriptor to a queue
  *
  * \param display The display context object
@@ -2212,6 +2240,34 @@ wl_display_dispatch_queue_pending(struct wl_display *display,
 	return ret;
 }
 
+/** Dispatch at most one pending event in an event queue
+ *
+ * \param display The display context object
+ * \param queue The event queue to dispatch
+ * \return The number of dispatched events (0 or 1) on success or -1 on failure
+ *
+ * Dispatch at most one pending event for objects assigned to the given
+ * event queue. On failure -1 is returned and errno set appropriately.
+ * If there are no events queued, this function returns immediately.
+ *
+ * \memberof wl_display
+ * \since 1.25.0
+ */
+WL_EXPORT int
+wl_display_dispatch_queue_pending_single(struct wl_display *display,
+					 struct wl_event_queue *queue)
+{
+	int ret;
+
+	pthread_mutex_lock(&display->mutex);
+
+	ret = dispatch_queue_single(display, queue);
+
+	pthread_mutex_unlock(&display->mutex);
+
+	return ret;
+}
+
 /** Process incoming events
  *
  * \param display The display context object
@@ -2272,6 +2328,25 @@ wl_display_dispatch_pending(struct wl_display *display)
 						 &display->default_queue);
 }
 
+/** Dispatch at most one pending event in the default event queue.
+ *
+ * \param display The display context object
+ * \return The number of dispatched events (0 or 1) on success or -1 on failure
+ *
+ * Dispatch at most one pending event for objects assigned to the default
+ * event queue. On failure -1 is returned and errno set appropriately.
+ * If there are no events queued, this function returns immediately.
+ *
+ * \memberof wl_display
+ * \since 1.25.0
+ */
+WL_EXPORT int
+wl_display_dispatch_pending_single(struct wl_display *display)
+{
+	return wl_display_dispatch_queue_pending_single(display,
+			                             &display->default_queue);
+}
+
 /** Retrieve the last error that occurred on a display
  *
  * \param display The display context object
diff --git a/tests/display-test.c b/tests/display-test.c
index 89606c73..fe78b521 100644
--- a/tests/display-test.c
+++ b/tests/display-test.c
@@ -1695,6 +1695,75 @@ TEST(global_remove)
 	display_destroy(d);
 }
 
+static void
+dispatch_single_read_events(struct wl_display *d)
+{
+	if (wl_display_prepare_read(d) < 0) {
+		return;
+	}
+
+	int ret = 0;
+	do {
+		ret = wl_display_flush(d);
+	} while (ret < 0 && (errno == EINTR || errno == EAGAIN));
+	assert(ret >= 0);
+
+	struct pollfd pfd[1];
+	pfd[0].fd = wl_display_get_fd(d);
+	pfd[0].events = POLLIN;
+
+	do {
+		ret = poll(pfd, 1, -1);
+	} while (ret < 0 && errno == EINTR);
+	assert(ret > 0);
+
+	wl_display_read_events(d);
+}
+
+static void
+dispatch_single_client(void)
+{
+	struct client *c = client_connect();
+
+	assert(wl_display_dispatch_pending_single(c->wl_display) == 0);
+
+	struct wl_registry *registry = wl_display_get_registry(c->wl_display);
+
+	dispatch_single_read_events(c->wl_display);
+
+	// [1815110.061] {Default Queue} wl_registry#3.global(1, "test", 1)
+	assert(wl_display_dispatch_pending_single(c->wl_display) == 1);
+
+	dispatch_single_read_events(c->wl_display);
+
+	// [1815110.067] {Default Queue} wl_registry#3.global(2, "wl_seat", 1)
+	assert(wl_display_dispatch_pending_single(c->wl_display) == 1);
+
+	// No more events
+	assert(wl_display_dispatch_pending_single(c->wl_display) == 0);
+
+	wl_registry_destroy(registry);
+
+	client_disconnect(c);
+}
+
+TEST(dispatch_single)
+{
+	struct display *d = display_create();
+
+	struct wl_global *global = wl_global_create(d->wl_display,
+						    &wl_seat_interface,
+						    1, d, bind_seat);
+
+	client_create_noarg(d, dispatch_single_client);
+
+	display_run(d);
+
+	wl_global_destroy(global);
+
+	display_destroy(d);
+}
+
 static void
 terminate_display(void *arg)
 {