Merge branch 'dmabuf' into 'master'

Allow scanning out fullscreen surfaces on secondary GPUs

See merge request wlroots/wlroots!4055
This commit is contained in:
Austin Shafer 2024-03-14 15:15:48 +00:00
commit 772265a81a
18 changed files with 782 additions and 76 deletions

View file

@ -53,9 +53,7 @@ static void backend_destroy(struct wlr_backend *backend) {
wl_list_remove(&drm->dev_change.link);
wl_list_remove(&drm->dev_remove.link);
if (drm->parent) {
finish_drm_renderer(&drm->mgpu_renderer);
}
finish_drm_renderer(&drm->mgpu_renderer);
finish_drm_resources(drm);
@ -210,22 +208,20 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session,
goto error_event;
}
if (drm->parent) {
if (!init_drm_renderer(drm, &drm->mgpu_renderer)) {
wlr_log(WLR_ERROR, "Failed to initialize renderer");
goto error_resources;
}
// We'll perform a multi-GPU copy for all submitted buffers, we need
// to be able to texture from them
struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend;
const struct wlr_drm_format_set *texture_formats =
wlr_renderer_get_dmabuf_texture_formats(renderer);
if (texture_formats == NULL) {
wlr_log(WLR_ERROR, "Failed to query renderer texture formats");
goto error_mgpu_renderer;
}
if (!init_drm_renderer(drm, &drm->mgpu_renderer)) {
wlr_log(WLR_ERROR, "Failed to initialize renderer");
goto error_resources;
}
// We'll perform a multi-GPU copy for all submitted buffers, we need
// to be able to texture from them
struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend;
const struct wlr_drm_format_set *texture_formats =
wlr_renderer_get_dmabuf_texture_formats(renderer);
// Some configurations (alpine CI job) will have a renderer here that does not
// support dmabuf formats. We don't want to fail creation of the drm backend
// as a result of this, we simply don't populate the format set in that case.
if (texture_formats) {
// Forbid implicit modifiers, because their meaning changes from one
// GPU to another.
for (size_t i = 0; i < texture_formats->len; i++) {
@ -245,8 +241,6 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session,
return &drm->backend;
error_mgpu_renderer:
finish_drm_renderer(&drm->mgpu_renderer);
error_resources:
finish_drm_resources(drm);
error_event:

View file

@ -576,6 +576,7 @@ static void drm_connector_state_finish(struct wlr_drm_connector_state *state) {
static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn,
struct wlr_drm_connector_state *state) {
bool ok;
struct wlr_drm_backend *drm = conn->backend;
assert(state->base->committed & WLR_OUTPUT_STATE_BUFFER);
@ -585,34 +586,63 @@ static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn
struct wlr_drm_plane *plane = crtc->primary;
struct wlr_buffer *source_buf = state->base->buffer;
struct wlr_buffer *local_buf = wlr_buffer_lock(source_buf);
struct wlr_buffer *local_buf;
if (drm->parent) {
/*
* First try to import the buffer. We can have a decent degree of
* confidence this will work for a couple reasons:
* 1. Apps running on the dGPU in PRIME setups will be submitting
* buffers with linear modifiers, so that they can be imported
* on the primary GPU. This means they are directly imporatable
* here as well. This gives a nice FPS boost.
* 2. When the dGPU app supports reacting to dmabuf feedback it will
* be using dGPU modifiers, again meaning it can be imported into
* the dGPU directly for an additional nice perf boost.
*
* The fallback drm_surface_blit path will only be hit when the
* app is running fullscreen with dGPU (non-linear) modifiers and
* we start using rendered composition again. For a frame we will
* do the fallback before the app reallocs its buffers back to
* linear to be compatible with the primary GPU.
*/
ok = drm_fb_import(&state->primary_fb, drm, local_buf,
&crtc->primary->formats);
/*
* If trying to import this buffer directly didn't work then try
* to perform a blit to a mgpu drm surface and import that instead.
*/
if (!ok && drm->parent) {
struct wlr_drm_format format = {0};
if (!drm_plane_pick_render_format(plane, &format, &drm->mgpu_renderer)) {
wlr_log(WLR_ERROR, "Failed to pick primary plane format");
return false;
ok = false;
goto release_buf;
}
// TODO: fallback to modifier-less buffer allocation
bool ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer,
ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer,
source_buf->width, source_buf->height, &format);
wlr_drm_format_finish(&format);
if (!ok) {
return false;
ok = false;
goto release_buf;
}
local_buf = drm_surface_blit(&plane->mgpu_surf, source_buf);
if (local_buf == NULL) {
return false;
struct wlr_buffer *drm_buf = drm_surface_blit(&plane->mgpu_surf,
&drm->parent->mgpu_renderer, source_buf);
if (drm_buf == NULL) {
ok = false;
goto release_buf;
}
} else {
local_buf = wlr_buffer_lock(source_buf);
ok = drm_fb_import(&state->primary_fb, drm, drm_buf,
&plane->formats);
wlr_buffer_unlock(drm_buf);
}
bool ok = drm_fb_import(&state->primary_fb, drm, local_buf,
&plane->formats);
release_buf:
wlr_buffer_unlock(local_buf);
if (!ok) {
wlr_drm_conn_log(conn, WLR_DEBUG,
"Failed to import buffer for scan-out");
@ -1010,7 +1040,7 @@ static bool drm_connector_set_cursor(struct wlr_output *output,
return false;
}
local_buf = drm_surface_blit(&plane->mgpu_surf, buffer);
local_buf = drm_surface_blit(&plane->mgpu_surf, &drm->parent->mgpu_renderer, buffer);
if (local_buf == NULL) {
return false;
}

View file

@ -3,6 +3,7 @@
#include <wlr/render/swapchain.h>
#include <wlr/render/wlr_renderer.h>
#include <wlr/util/log.h>
#include "backend/backend.h"
#include "backend/drm/drm.h"
#include "backend/drm/fb.h"
#include "backend/drm/renderer.h"
@ -74,7 +75,7 @@ bool init_drm_surface(struct wlr_drm_surface *surf,
}
struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
struct wlr_buffer *buffer) {
struct wlr_drm_renderer *parent_renderer, struct wlr_buffer *buffer) {
struct wlr_renderer *renderer = surf->renderer->wlr_rend;
if (surf->swapchain->width != buffer->width ||
@ -83,11 +84,23 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
return NULL;
}
struct wlr_texture *tex = wlr_texture_from_buffer(renderer, buffer);
if (tex == NULL) {
wlr_log(WLR_ERROR, "Failed to import source buffer into multi-GPU renderer");
struct wlr_texture_set *set = wlr_texture_set_create(renderer, NULL);
if (set == NULL) {
wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set");
return NULL;
}
/* Add the parent renderer so the texture set can use it for copies */
wlr_texture_set_add_renderer(set, parent_renderer->wlr_rend, parent_renderer->allocator);
if (!wlr_texture_set_import_buffer(set, buffer)) {
wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set");
goto error_tex;
}
struct wlr_texture *tex = wlr_texture_set_get_tex_for_renderer(set, renderer);
if (tex == NULL) {
wlr_log(WLR_ERROR, "Failed to export source buffer for multi-GPU renderer");
goto error_tex;
}
struct wlr_buffer *dst = wlr_swapchain_acquire(surf->swapchain, NULL);
if (!dst) {
@ -110,14 +123,14 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf,
goto error_dst;
}
wlr_texture_destroy(tex);
wlr_texture_set_destroy(set);
return dst;
error_dst:
wlr_buffer_unlock(dst);
error_tex:
wlr_texture_destroy(tex);
wlr_texture_set_destroy(set);
return NULL;
}

View file

@ -2,11 +2,15 @@
#include <stdbool.h>
#include <stdlib.h>
#include <time.h>
#include <xf86drm.h>
#include <fcntl.h>
#include <wlr/backend/interface.h>
#include <wlr/types/wlr_buffer.h>
#include <wlr/util/log.h>
#include "render/wlr_renderer.h"
#include "backend/backend.h"
#include "backend/multi.h"
#include "render/allocator/allocator.h"
struct subbackend_state {
struct wlr_backend *backend;
@ -58,6 +62,7 @@ static void multi_backend_destroy(struct wlr_backend *wlr_backend) {
wl_container_of(backend->backends.next, sub, link);
wlr_backend_destroy(sub->backend);
}
wlr_multi_gpu_destroy(backend->multi_gpu);
free(backend);
}
@ -118,6 +123,7 @@ struct wlr_backend *wlr_multi_backend_create(struct wl_event_loop *loop) {
}
wl_list_init(&backend->backends);
backend->multi_gpu = wlr_multi_gpu_create();
wlr_backend_init(&backend->backend, &backend_impl);
wl_signal_init(&backend->events.backend_add);
@ -225,3 +231,101 @@ void wlr_multi_for_each_backend(struct wlr_backend *_backend,
callback(sub->backend, data);
}
}
/*
* Create a wlr_multi_gpu struct and populate it with a renderer and allocator for each
* device in the system. This is done by finding all DRM nodes using drmGetDevices2.
*/
struct wlr_multi_gpu *wlr_multi_gpu_create(void) {
int flags = 0;
struct wlr_multi_gpu *multi_gpu = NULL;
int devices_len = drmGetDevices2(flags, NULL, 0);
if (devices_len < 0) {
wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len));
return NULL;
}
drmDevice **devices = calloc(devices_len, sizeof(*devices));
if (devices == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto out;
}
devices_len = drmGetDevices2(flags, devices, devices_len);
if (devices_len < 0) {
wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len));
goto out;
}
multi_gpu = calloc(1, sizeof(struct wlr_multi_gpu));
if (!multi_gpu) {
goto out;
}
wl_list_init(&multi_gpu->devices);
for (int i = 0; i < devices_len; i++) {
drmDevice *dev = devices[i];
if (dev->available_nodes & (1 << DRM_NODE_RENDER)) {
const char *name = dev->nodes[DRM_NODE_RENDER];
wlr_log(WLR_DEBUG, "Opening DRM render node '%s'", name);
int fd = open(name, O_RDWR | O_CLOEXEC);
if (fd < 0) {
wlr_log_errno(WLR_ERROR, "Failed to open '%s'", name);
goto out;
}
// Create a renderer/allocator and add it as a new device
struct wlr_renderer *renderer = renderer_autocreate_with_drm_fd(fd);
if (!renderer) {
wlr_log(WLR_ERROR, "Failed to create multi-GPU renderer");
goto fail;
}
struct wlr_allocator *allocator =
allocator_autocreate_with_drm_fd(WLR_BUFFER_CAP_DMABUF, renderer, fd);
if (!allocator) {
wlr_log(WLR_ERROR, "Failed to create multi-GPU allocator");
wlr_renderer_destroy(renderer);
goto fail;
}
struct wlr_multi_gpu_device *device = calloc(1, sizeof(struct wlr_multi_gpu_device));
if (!device) {
wlr_allocator_destroy(allocator);
wlr_renderer_destroy(renderer);
goto fail;
}
wl_list_insert(&multi_gpu->devices, &device->link);
device->renderer = renderer;
device->allocator = allocator;
}
}
goto out;
fail:
wlr_multi_gpu_destroy(multi_gpu);
multi_gpu = NULL;
out:
for (int i = 0; i < devices_len; i++) {
drmFreeDevice(&devices[i]);
}
if (devices) {
free(devices);
}
return multi_gpu;
}
void wlr_multi_gpu_destroy(struct wlr_multi_gpu *multi_gpu) {
struct wlr_multi_gpu_device *device;
// Remove and destroy all devices
wl_list_for_each(device, &multi_gpu->devices, link) {
wlr_allocator_destroy(device->allocator);
wlr_renderer_destroy(device->renderer);
wl_list_remove(&device->link);
free(device);
}
free(multi_gpu);
}