Merge branch 'dmabuf' into 'master'

Allow scanning out fullscreen surfaces on secondary GPUs See merge request wlroots/wlroots!4055
2026-06-12 03:01:38 -04:00 · 2024-03-14 15:15:48 +00:00 · 2024-03-14 15:15:48 +00:00 · 772265a81a
commit 772265a81a
parent 5bef385ffc eb812f34f0
18 changed files with 782 additions and 76 deletions
--- a/backend/drm/backend.c
+++ b/backend/drm/backend.c
@ -53,9 +53,7 @@ static void backend_destroy(struct wlr_backend *backend) {
 	wl_list_remove(&drm->dev_change.link);
 	wl_list_remove(&drm->dev_remove.link);

-	if (drm->parent) {
-		finish_drm_renderer(&drm->mgpu_renderer);
-	}
+	finish_drm_renderer(&drm->mgpu_renderer);

 	finish_drm_resources(drm);

@ -210,22 +208,20 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session,
 		goto error_event;
 	}

-	if (drm->parent) {
-		if (!init_drm_renderer(drm, &drm->mgpu_renderer)) {
-			wlr_log(WLR_ERROR, "Failed to initialize renderer");
-			goto error_resources;
-		}
-
-		// We'll perform a multi-GPU copy for all submitted buffers, we need
-		// to be able to texture from them
-		struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend;
-		const struct wlr_drm_format_set *texture_formats =
-			wlr_renderer_get_dmabuf_texture_formats(renderer);
-		if (texture_formats == NULL) {
-			wlr_log(WLR_ERROR, "Failed to query renderer texture formats");
-			goto error_mgpu_renderer;
-		}
+	if (!init_drm_renderer(drm, &drm->mgpu_renderer)) {
+		wlr_log(WLR_ERROR, "Failed to initialize renderer");
+		goto error_resources;
+	}

+	// We'll perform a multi-GPU copy for all submitted buffers, we need
+	// to be able to texture from them
+	struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend;
+	const struct wlr_drm_format_set *texture_formats =
+		wlr_renderer_get_dmabuf_texture_formats(renderer);
+	// Some configurations (alpine CI job) will have a renderer here that does not
+	// support dmabuf formats. We don't want to fail creation of the drm backend
+	// as a result of this, we simply don't populate the format set in that case.
+	if (texture_formats) {
 		// Forbid implicit modifiers, because their meaning changes from one
 		// GPU to another.
 		for (size_t i = 0; i < texture_formats->len; i++) {
@ -245,8 +241,6 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session,

 	return &drm->backend;

-error_mgpu_renderer:
-	finish_drm_renderer(&drm->mgpu_renderer);
 error_resources:
 	finish_drm_resources(drm);
 error_event:
--- a/backend/drm/drm.c
+++ b/backend/drm/drm.c
@ -576,6 +576,7 @@ static void drm_connector_state_finish(struct wlr_drm_connector_state *state) {

 static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn,
 		struct wlr_drm_connector_state *state) {
+	bool ok;
 	struct wlr_drm_backend *drm = conn->backend;

 	assert(state->base->committed & WLR_OUTPUT_STATE_BUFFER);
@ -585,34 +586,63 @@ static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn

 	struct wlr_drm_plane *plane = crtc->primary;
 	struct wlr_buffer *source_buf = state->base->buffer;
+	struct wlr_buffer *local_buf = wlr_buffer_lock(source_buf);

-	struct wlr_buffer *local_buf;
-	if (drm->parent) {
+	/*
+	 * First try to import the buffer. We can have a decent degree of
+	 * confidence this will work for a couple reasons:
+	 * 1. Apps running on the dGPU in PRIME setups will be submitting
+	 *    buffers with linear modifiers, so that they can be imported
+	 *    on the primary GPU. This means they are directly imporatable
+	 *    here as well. This gives a nice FPS boost.
+	 * 2. When the dGPU app supports reacting to dmabuf feedback it will
+	 *    be using dGPU modifiers, again meaning it can be imported into
+	 *    the dGPU directly for an additional nice perf boost.
+	 *
+	 * The fallback drm_surface_blit path will only be hit when the
+	 * app is running fullscreen with dGPU (non-linear) modifiers and
+	 * we start using rendered composition again. For a frame we will
+	 * do the fallback before the app reallocs its buffers back to
+	 * linear to be compatible with the primary GPU.
+	 */
+	ok = drm_fb_import(&state->primary_fb, drm, local_buf,
+		&crtc->primary->formats);
+
+	/*
+	 * If trying to import this buffer directly didn't work then try
+	 * to perform a blit to a mgpu drm surface and import that instead.
+	 */
+	if (!ok && drm->parent) {
 		struct wlr_drm_format format = {0};
 		if (!drm_plane_pick_render_format(plane, &format, &drm->mgpu_renderer)) {
 			wlr_log(WLR_ERROR, "Failed to pick primary plane format");
-			return false;
+			ok = false;
+			goto release_buf;
 		}

 		// TODO: fallback to modifier-less buffer allocation
-		bool ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer,
+		ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer,
 			source_buf->width, source_buf->height, &format);
 		wlr_drm_format_finish(&format);
 		if (!ok) {
-			return false;
+			ok = false;
+			goto release_buf;
 		}

-		local_buf = drm_surface_blit(&plane->mgpu_surf, source_buf);
-		if (local_buf == NULL) {
-			return false;
+		struct wlr_buffer *drm_buf = drm_surface_blit(&plane->mgpu_surf,
+				&drm->parent->mgpu_renderer, source_buf);
+		if (drm_buf == NULL) {
+			ok = false;
+			goto release_buf;
 		}
-	} else {
-		local_buf = wlr_buffer_lock(source_buf);
+		ok = drm_fb_import(&state->primary_fb, drm, drm_buf,
+				&plane->formats);
+		wlr_buffer_unlock(drm_buf);
 	}

-	bool ok = drm_fb_import(&state->primary_fb, drm, local_buf,
-		&plane->formats);
+release_buf:
 	wlr_buffer_unlock(local_buf);
+
 	if (!ok) {
 		wlr_drm_conn_log(conn, WLR_DEBUG,
 			"Failed to import buffer for scan-out");
@ -1010,7 +1040,7 @@ static bool drm_connector_set_cursor(struct wlr_output *output,
 				return false;
 			}

-			local_buf = drm_surface_blit(&plane->mgpu_surf, buffer);
+			local_buf = drm_surface_blit(&plane->mgpu_surf, &drm->parent->mgpu_renderer, buffer);
 			if (local_buf == NULL) {
 				return false;
 			}
--- a/backend/drm/renderer.c
+++ b/backend/drm/renderer.c
@ -3,6 +3,7 @@
 #include <wlr/render/swapchain.h>
 #include <wlr/render/wlr_renderer.h>
 #include <wlr/util/log.h>
+#include "backend/backend.h"
 #include "backend/drm/drm.h"
 #include "backend/drm/fb.h"
 #include "backend/drm/renderer.h"
@ -74,7 +75,7 @@ bool init_drm_surface(struct wlr_drm_surface *surf,
 }

 struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
-		struct wlr_buffer *buffer) {
+		struct wlr_drm_renderer *parent_renderer, struct wlr_buffer *buffer) {
 	struct wlr_renderer *renderer = surf->renderer->wlr_rend;

 	if (surf->swapchain->width != buffer->width ||
@ -83,11 +84,23 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
 		return NULL;
 	}

-	struct wlr_texture *tex = wlr_texture_from_buffer(renderer, buffer);
-	if (tex == NULL) {
-		wlr_log(WLR_ERROR, "Failed to import source buffer into multi-GPU renderer");
+	struct wlr_texture_set *set = wlr_texture_set_create(renderer, NULL);
+	if (set == NULL) {
+		wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set");
 		return NULL;
 	}
+	/* Add the parent renderer so the texture set can use it for copies */
+	wlr_texture_set_add_renderer(set, parent_renderer->wlr_rend, parent_renderer->allocator);
+	if (!wlr_texture_set_import_buffer(set, buffer)) {
+		wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set");
+		goto error_tex;
+	}
+
+	struct wlr_texture *tex = wlr_texture_set_get_tex_for_renderer(set, renderer);
+	if (tex == NULL) {
+		wlr_log(WLR_ERROR, "Failed to export source buffer for multi-GPU renderer");
+		goto error_tex;
+	}

 	struct wlr_buffer *dst = wlr_swapchain_acquire(surf->swapchain, NULL);
 	if (!dst) {
@ -110,14 +123,14 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
 		goto error_dst;
 	}

-	wlr_texture_destroy(tex);
+	wlr_texture_set_destroy(set);

 	return dst;

 error_dst:
 	wlr_buffer_unlock(dst);
 error_tex:
-	wlr_texture_destroy(tex);
+	wlr_texture_set_destroy(set);
 	return NULL;
 }

--- a/backend/multi/backend.c
+++ b/backend/multi/backend.c
@ -2,11 +2,15 @@
 #include <stdbool.h>
 #include <stdlib.h>
 #include <time.h>
+#include <xf86drm.h>
+#include <fcntl.h>
 #include <wlr/backend/interface.h>
 #include <wlr/types/wlr_buffer.h>
 #include <wlr/util/log.h>
+#include "render/wlr_renderer.h"
 #include "backend/backend.h"
 #include "backend/multi.h"
+#include "render/allocator/allocator.h"

 struct subbackend_state {
 	struct wlr_backend *backend;
@ -58,6 +62,7 @@ static void multi_backend_destroy(struct wlr_backend *wlr_backend) {
 			wl_container_of(backend->backends.next, sub, link);
 		wlr_backend_destroy(sub->backend);
 	}
+	wlr_multi_gpu_destroy(backend->multi_gpu);

 	free(backend);
 }
@ -118,6 +123,7 @@ struct wlr_backend *wlr_multi_backend_create(struct wl_event_loop *loop) {
 	}

 	wl_list_init(&backend->backends);
+	backend->multi_gpu = wlr_multi_gpu_create();
 	wlr_backend_init(&backend->backend, &backend_impl);

 	wl_signal_init(&backend->events.backend_add);
@ -225,3 +231,101 @@ void wlr_multi_for_each_backend(struct wlr_backend *_backend,
 		callback(sub->backend, data);
 	}
 }
+
+/*
+ * Create a wlr_multi_gpu struct and populate it with a renderer and allocator for each
+ * device in the system. This is done by finding all DRM nodes using drmGetDevices2.
+ */
+struct wlr_multi_gpu *wlr_multi_gpu_create(void) {
+	int flags = 0;
+	struct wlr_multi_gpu *multi_gpu = NULL;
+	int devices_len = drmGetDevices2(flags, NULL, 0);
+
+	if (devices_len < 0) {
+		wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len));
+		return NULL;
+	}
+	drmDevice **devices = calloc(devices_len, sizeof(*devices));
+	if (devices == NULL) {
+		wlr_log_errno(WLR_ERROR, "Allocation failed");
+		goto out;
+	}
+	devices_len = drmGetDevices2(flags, devices, devices_len);
+	if (devices_len < 0) {
+		wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len));
+		goto out;
+	}
+
+	multi_gpu = calloc(1, sizeof(struct wlr_multi_gpu));
+	if (!multi_gpu) {
+		goto out;
+	}
+	wl_list_init(&multi_gpu->devices);
+
+	for (int i = 0; i < devices_len; i++) {
+		drmDevice *dev = devices[i];
+		if (dev->available_nodes & (1 << DRM_NODE_RENDER)) {
+			const char *name = dev->nodes[DRM_NODE_RENDER];
+			wlr_log(WLR_DEBUG, "Opening DRM render node '%s'", name);
+			int fd = open(name, O_RDWR | O_CLOEXEC);
+			if (fd < 0) {
+				wlr_log_errno(WLR_ERROR, "Failed to open '%s'", name);
+				goto out;
+			}
+
+			// Create a renderer/allocator and add it as a new device
+			struct wlr_renderer *renderer = renderer_autocreate_with_drm_fd(fd);
+			if (!renderer) {
+				wlr_log(WLR_ERROR, "Failed to create multi-GPU renderer");
+				goto fail;
+			}
+
+			struct wlr_allocator *allocator =
+				allocator_autocreate_with_drm_fd(WLR_BUFFER_CAP_DMABUF, renderer, fd);
+			if (!allocator) {
+				wlr_log(WLR_ERROR, "Failed to create multi-GPU allocator");
+				wlr_renderer_destroy(renderer);
+				goto fail;
+			}
+
+			struct wlr_multi_gpu_device *device = calloc(1, sizeof(struct wlr_multi_gpu_device));
+			if (!device) {
+				wlr_allocator_destroy(allocator);
+				wlr_renderer_destroy(renderer);
+				goto fail;
+			}
+			wl_list_insert(&multi_gpu->devices, &device->link);
+			device->renderer = renderer;
+			device->allocator = allocator;
+		}
+	}
+
+	goto out;
+
+fail:
+	wlr_multi_gpu_destroy(multi_gpu);
+	multi_gpu = NULL;
+
+out:
+	for (int i = 0; i < devices_len; i++) {
+		drmFreeDevice(&devices[i]);
+	}
+	if (devices) {
+		free(devices);
+	}
+
+	return multi_gpu;
+}
+
+void wlr_multi_gpu_destroy(struct wlr_multi_gpu *multi_gpu) {
+	struct wlr_multi_gpu_device *device;
+	// Remove and destroy all devices
+	wl_list_for_each(device, &multi_gpu->devices, link) {
+		wlr_allocator_destroy(device->allocator);
+		wlr_renderer_destroy(device->renderer);
+		wl_list_remove(&device->link);
+		free(device);
+	}
+
+	free(multi_gpu);
+}