diff --git a/backend/drm/backend.c b/backend/drm/backend.c index 27e5585dc..848f1046e 100644 --- a/backend/drm/backend.c +++ b/backend/drm/backend.c @@ -53,9 +53,7 @@ static void backend_destroy(struct wlr_backend *backend) { wl_list_remove(&drm->dev_change.link); wl_list_remove(&drm->dev_remove.link); - if (drm->parent) { - finish_drm_renderer(&drm->mgpu_renderer); - } + finish_drm_renderer(&drm->mgpu_renderer); finish_drm_resources(drm); @@ -210,22 +208,20 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session, goto error_event; } - if (drm->parent) { - if (!init_drm_renderer(drm, &drm->mgpu_renderer)) { - wlr_log(WLR_ERROR, "Failed to initialize renderer"); - goto error_resources; - } - - // We'll perform a multi-GPU copy for all submitted buffers, we need - // to be able to texture from them - struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend; - const struct wlr_drm_format_set *texture_formats = - wlr_renderer_get_dmabuf_texture_formats(renderer); - if (texture_formats == NULL) { - wlr_log(WLR_ERROR, "Failed to query renderer texture formats"); - goto error_mgpu_renderer; - } + if (!init_drm_renderer(drm, &drm->mgpu_renderer)) { + wlr_log(WLR_ERROR, "Failed to initialize renderer"); + goto error_resources; + } + // We'll perform a multi-GPU copy for all submitted buffers, we need + // to be able to texture from them + struct wlr_renderer *renderer = drm->mgpu_renderer.wlr_rend; + const struct wlr_drm_format_set *texture_formats = + wlr_renderer_get_dmabuf_texture_formats(renderer); + // Some configurations (alpine CI job) will have a renderer here that does not + // support dmabuf formats. We don't want to fail creation of the drm backend + // as a result of this, we simply don't populate the format set in that case. + if (texture_formats) { // Forbid implicit modifiers, because their meaning changes from one // GPU to another. for (size_t i = 0; i < texture_formats->len; i++) { @@ -245,8 +241,6 @@ struct wlr_backend *wlr_drm_backend_create(struct wlr_session *session, return &drm->backend; -error_mgpu_renderer: - finish_drm_renderer(&drm->mgpu_renderer); error_resources: finish_drm_resources(drm); error_event: diff --git a/backend/drm/drm.c b/backend/drm/drm.c index 3a71f393d..936b439dd 100644 --- a/backend/drm/drm.c +++ b/backend/drm/drm.c @@ -576,6 +576,7 @@ static void drm_connector_state_finish(struct wlr_drm_connector_state *state) { static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn, struct wlr_drm_connector_state *state) { + bool ok; struct wlr_drm_backend *drm = conn->backend; assert(state->base->committed & WLR_OUTPUT_STATE_BUFFER); @@ -585,34 +586,63 @@ static bool drm_connector_state_update_primary_fb(struct wlr_drm_connector *conn struct wlr_drm_plane *plane = crtc->primary; struct wlr_buffer *source_buf = state->base->buffer; + struct wlr_buffer *local_buf = wlr_buffer_lock(source_buf); - struct wlr_buffer *local_buf; - if (drm->parent) { + /* + * First try to import the buffer. We can have a decent degree of + * confidence this will work for a couple reasons: + * 1. Apps running on the dGPU in PRIME setups will be submitting + * buffers with linear modifiers, so that they can be imported + * on the primary GPU. This means they are directly imporatable + * here as well. This gives a nice FPS boost. + * 2. When the dGPU app supports reacting to dmabuf feedback it will + * be using dGPU modifiers, again meaning it can be imported into + * the dGPU directly for an additional nice perf boost. + * + * The fallback drm_surface_blit path will only be hit when the + * app is running fullscreen with dGPU (non-linear) modifiers and + * we start using rendered composition again. For a frame we will + * do the fallback before the app reallocs its buffers back to + * linear to be compatible with the primary GPU. + */ + ok = drm_fb_import(&state->primary_fb, drm, local_buf, + &crtc->primary->formats); + + /* + * If trying to import this buffer directly didn't work then try + * to perform a blit to a mgpu drm surface and import that instead. + */ + if (!ok && drm->parent) { struct wlr_drm_format format = {0}; if (!drm_plane_pick_render_format(plane, &format, &drm->mgpu_renderer)) { wlr_log(WLR_ERROR, "Failed to pick primary plane format"); - return false; + ok = false; + goto release_buf; } // TODO: fallback to modifier-less buffer allocation - bool ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer, + ok = init_drm_surface(&plane->mgpu_surf, &drm->mgpu_renderer, source_buf->width, source_buf->height, &format); wlr_drm_format_finish(&format); if (!ok) { - return false; + ok = false; + goto release_buf; } - local_buf = drm_surface_blit(&plane->mgpu_surf, source_buf); - if (local_buf == NULL) { - return false; + struct wlr_buffer *drm_buf = drm_surface_blit(&plane->mgpu_surf, + &drm->parent->mgpu_renderer, source_buf); + if (drm_buf == NULL) { + ok = false; + goto release_buf; } - } else { - local_buf = wlr_buffer_lock(source_buf); + ok = drm_fb_import(&state->primary_fb, drm, drm_buf, + &plane->formats); + wlr_buffer_unlock(drm_buf); } - bool ok = drm_fb_import(&state->primary_fb, drm, local_buf, - &plane->formats); +release_buf: wlr_buffer_unlock(local_buf); + if (!ok) { wlr_drm_conn_log(conn, WLR_DEBUG, "Failed to import buffer for scan-out"); @@ -1010,7 +1040,7 @@ static bool drm_connector_set_cursor(struct wlr_output *output, return false; } - local_buf = drm_surface_blit(&plane->mgpu_surf, buffer); + local_buf = drm_surface_blit(&plane->mgpu_surf, &drm->parent->mgpu_renderer, buffer); if (local_buf == NULL) { return false; } diff --git a/backend/drm/renderer.c b/backend/drm/renderer.c index e4aadc106..ace2d1559 100644 --- a/backend/drm/renderer.c +++ b/backend/drm/renderer.c @@ -3,6 +3,7 @@ #include #include #include +#include "backend/backend.h" #include "backend/drm/drm.h" #include "backend/drm/fb.h" #include "backend/drm/renderer.h" @@ -74,7 +75,7 @@ bool init_drm_surface(struct wlr_drm_surface *surf, } struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf, - struct wlr_buffer *buffer) { + struct wlr_drm_renderer *parent_renderer, struct wlr_buffer *buffer) { struct wlr_renderer *renderer = surf->renderer->wlr_rend; if (surf->swapchain->width != buffer->width || @@ -83,11 +84,23 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf, return NULL; } - struct wlr_texture *tex = wlr_texture_from_buffer(renderer, buffer); - if (tex == NULL) { - wlr_log(WLR_ERROR, "Failed to import source buffer into multi-GPU renderer"); + struct wlr_texture_set *set = wlr_texture_set_create(renderer, NULL); + if (set == NULL) { + wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set"); return NULL; } + /* Add the parent renderer so the texture set can use it for copies */ + wlr_texture_set_add_renderer(set, parent_renderer->wlr_rend, parent_renderer->allocator); + if (!wlr_texture_set_import_buffer(set, buffer)) { + wlr_log(WLR_ERROR, "Failed to import source buffer multi-GPU texture set"); + goto error_tex; + } + + struct wlr_texture *tex = wlr_texture_set_get_tex_for_renderer(set, renderer); + if (tex == NULL) { + wlr_log(WLR_ERROR, "Failed to export source buffer for multi-GPU renderer"); + goto error_tex; + } struct wlr_buffer *dst = wlr_swapchain_acquire(surf->swapchain, NULL); if (!dst) { @@ -110,14 +123,14 @@ struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf, goto error_dst; } - wlr_texture_destroy(tex); + wlr_texture_set_destroy(set); return dst; error_dst: wlr_buffer_unlock(dst); error_tex: - wlr_texture_destroy(tex); + wlr_texture_set_destroy(set); return NULL; } diff --git a/backend/multi/backend.c b/backend/multi/backend.c index 740e1d6fa..caa6089b1 100644 --- a/backend/multi/backend.c +++ b/backend/multi/backend.c @@ -2,11 +2,15 @@ #include #include #include +#include +#include #include #include #include +#include "render/wlr_renderer.h" #include "backend/backend.h" #include "backend/multi.h" +#include "render/allocator/allocator.h" struct subbackend_state { struct wlr_backend *backend; @@ -58,6 +62,7 @@ static void multi_backend_destroy(struct wlr_backend *wlr_backend) { wl_container_of(backend->backends.next, sub, link); wlr_backend_destroy(sub->backend); } + wlr_multi_gpu_destroy(backend->multi_gpu); free(backend); } @@ -118,6 +123,7 @@ struct wlr_backend *wlr_multi_backend_create(struct wl_event_loop *loop) { } wl_list_init(&backend->backends); + backend->multi_gpu = wlr_multi_gpu_create(); wlr_backend_init(&backend->backend, &backend_impl); wl_signal_init(&backend->events.backend_add); @@ -225,3 +231,101 @@ void wlr_multi_for_each_backend(struct wlr_backend *_backend, callback(sub->backend, data); } } + +/* + * Create a wlr_multi_gpu struct and populate it with a renderer and allocator for each + * device in the system. This is done by finding all DRM nodes using drmGetDevices2. + */ +struct wlr_multi_gpu *wlr_multi_gpu_create(void) { + int flags = 0; + struct wlr_multi_gpu *multi_gpu = NULL; + int devices_len = drmGetDevices2(flags, NULL, 0); + + if (devices_len < 0) { + wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len)); + return NULL; + } + drmDevice **devices = calloc(devices_len, sizeof(*devices)); + if (devices == NULL) { + wlr_log_errno(WLR_ERROR, "Allocation failed"); + goto out; + } + devices_len = drmGetDevices2(flags, devices, devices_len); + if (devices_len < 0) { + wlr_log(WLR_ERROR, "drmGetDevices2 failed: %s", strerror(-devices_len)); + goto out; + } + + multi_gpu = calloc(1, sizeof(struct wlr_multi_gpu)); + if (!multi_gpu) { + goto out; + } + wl_list_init(&multi_gpu->devices); + + for (int i = 0; i < devices_len; i++) { + drmDevice *dev = devices[i]; + if (dev->available_nodes & (1 << DRM_NODE_RENDER)) { + const char *name = dev->nodes[DRM_NODE_RENDER]; + wlr_log(WLR_DEBUG, "Opening DRM render node '%s'", name); + int fd = open(name, O_RDWR | O_CLOEXEC); + if (fd < 0) { + wlr_log_errno(WLR_ERROR, "Failed to open '%s'", name); + goto out; + } + + // Create a renderer/allocator and add it as a new device + struct wlr_renderer *renderer = renderer_autocreate_with_drm_fd(fd); + if (!renderer) { + wlr_log(WLR_ERROR, "Failed to create multi-GPU renderer"); + goto fail; + } + + struct wlr_allocator *allocator = + allocator_autocreate_with_drm_fd(WLR_BUFFER_CAP_DMABUF, renderer, fd); + if (!allocator) { + wlr_log(WLR_ERROR, "Failed to create multi-GPU allocator"); + wlr_renderer_destroy(renderer); + goto fail; + } + + struct wlr_multi_gpu_device *device = calloc(1, sizeof(struct wlr_multi_gpu_device)); + if (!device) { + wlr_allocator_destroy(allocator); + wlr_renderer_destroy(renderer); + goto fail; + } + wl_list_insert(&multi_gpu->devices, &device->link); + device->renderer = renderer; + device->allocator = allocator; + } + } + + goto out; + +fail: + wlr_multi_gpu_destroy(multi_gpu); + multi_gpu = NULL; + +out: + for (int i = 0; i < devices_len; i++) { + drmFreeDevice(&devices[i]); + } + if (devices) { + free(devices); + } + + return multi_gpu; +} + +void wlr_multi_gpu_destroy(struct wlr_multi_gpu *multi_gpu) { + struct wlr_multi_gpu_device *device; + // Remove and destroy all devices + wl_list_for_each(device, &multi_gpu->devices, link) { + wlr_allocator_destroy(device->allocator); + wlr_renderer_destroy(device->renderer); + wl_list_remove(&device->link); + free(device); + } + + free(multi_gpu); +} diff --git a/include/backend/drm/drm.h b/include/backend/drm/drm.h index 675c5db26..1c4d66056 100644 --- a/include/backend/drm/drm.h +++ b/include/backend/drm/drm.h @@ -107,6 +107,7 @@ struct wlr_drm_backend { /* Only initialized on multi-GPU setups */ struct wlr_drm_renderer mgpu_renderer; + struct wlr_multi_gpu *multi_gpu; struct wlr_session *session; diff --git a/include/backend/drm/renderer.h b/include/backend/drm/renderer.h index f53f720bc..04710c61e 100644 --- a/include/backend/drm/renderer.h +++ b/include/backend/drm/renderer.h @@ -32,7 +32,7 @@ bool init_drm_surface(struct wlr_drm_surface *surf, void finish_drm_surface(struct wlr_drm_surface *surf); struct wlr_buffer *drm_surface_blit(struct wlr_drm_surface *surf, - struct wlr_buffer *buffer); + struct wlr_drm_renderer *parent_renderer, struct wlr_buffer *buffer); bool drm_plane_pick_render_format(struct wlr_drm_plane *plane, struct wlr_drm_format *fmt, struct wlr_drm_renderer *renderer); diff --git a/include/backend/multi.h b/include/backend/multi.h index 3ffd81406..993ccbe4b 100644 --- a/include/backend/multi.h +++ b/include/backend/multi.h @@ -4,10 +4,31 @@ #include #include #include +#include +#include + +struct wlr_multi_gpu_device { + struct wlr_renderer *renderer; + struct wlr_allocator *allocator; + struct wl_list link; +}; + +/* + * Helper struct for tracking multiple renderers. This solves the + * problem of us having many renderers (primary, plus individual + * secondary GPU drm renderers) but not tracking them in one location. + * We can use this struct to access renderers for each GPU in + * the system all from one place. Will be populated by the renderer + * the compositor makes, plus every time a drm mgpu renderer is made. + */ +struct wlr_multi_gpu { + struct wl_list devices; +}; struct wlr_multi_backend { struct wlr_backend backend; + struct wlr_multi_gpu *multi_gpu; struct wl_list backends; struct wl_listener event_loop_destroy; diff --git a/include/wlr/backend/multi.h b/include/wlr/backend/multi.h index c4322d98b..8ae5e4763 100644 --- a/include/wlr/backend/multi.h +++ b/include/wlr/backend/multi.h @@ -32,4 +32,7 @@ bool wlr_multi_is_empty(struct wlr_backend *backend); void wlr_multi_for_each_backend(struct wlr_backend *backend, void (*callback)(struct wlr_backend *backend, void *data), void *data); +struct wlr_multi_gpu *wlr_multi_gpu_create(void); +void wlr_multi_gpu_destroy(struct wlr_multi_gpu *multi_gpu); + #endif diff --git a/include/wlr/render/wlr_renderer.h b/include/wlr/render/wlr_renderer.h index 08333a529..5a31ca7bb 100644 --- a/include/wlr/render/wlr_renderer.h +++ b/include/wlr/render/wlr_renderer.h @@ -14,13 +14,10 @@ #include #include #include +#include -struct wlr_backend; struct wlr_renderer_impl; -struct wlr_drm_format_set; struct wlr_buffer; -struct wlr_box; -struct wlr_fbox; /** * A renderer for basic 2D operations. @@ -39,6 +36,9 @@ struct wlr_renderer { // private state const struct wlr_renderer_impl *impl; + + /* The GPU list we are a part of, may be null if not created from multi backend */ + struct wlr_multi_gpu *multi_gpu; }; /** diff --git a/include/wlr/render/wlr_texture.h b/include/wlr/render/wlr_texture.h index 1e352c6e6..af243fb79 100644 --- a/include/wlr/render/wlr_texture.h +++ b/include/wlr/render/wlr_texture.h @@ -18,6 +18,7 @@ struct wlr_buffer; struct wlr_renderer; struct wlr_texture_impl; +struct wlr_multi_gpu; struct wlr_texture { const struct wlr_texture_impl *impl; @@ -82,4 +83,123 @@ void wlr_texture_destroy(struct wlr_texture *texture); struct wlr_texture *wlr_texture_from_buffer(struct wlr_renderer *renderer, struct wlr_buffer *buffer); +struct wlr_texture_renderer_pair { + struct wlr_renderer *renderer; + struct wlr_texture *texture; + struct wlr_allocator *allocator; +}; + +/** + * The texture set provides a mapping between renderers and the texture + * imported into them. You can use it to query a texture for a particular + * renderer and it will handle importing and any blitting that needs to + * take place. + */ +struct wlr_texture_set { + /* The buffer this texture set was made from */ + struct wlr_buffer *buffer; + struct wl_listener buffer_release; + + /** + * Index into pairings of the device that this texture directly + * imports into. This texture is "native" to that device, and + * will have to be blitted to other gpus. + * + * This will be -1 if no buffer has been imported yet. + */ + int32_t native_pair; + struct wlr_multi_gpu *multi_gpu; + /* + * This will cache the result of creating a linear-layout version of + * this texture on the native device. This can then be imported into + * the other GPUs. + */ + uint32_t format; + void *pixel_data; + + uint32_t width; + uint32_t height; + + /* This is the size of the pairings array */ + int pairing_count; + struct wlr_texture_renderer_pair *pairings; +}; + +/** + * Create an empty texture set. When setting up our wlr_multi_gpu struct we put + * all renderers into a list. This lets us iterate them from here. If this + * request is made on a renderer not in the multi-GPU set, then the list will + * be of length 1, and the renderer will be the only entry in the set. + * + * A buffer must be imported for this set to be used. + */ +struct wlr_texture_set *wlr_texture_set_create(struct wlr_renderer *renderer, + struct wlr_allocator *allocator); + +/** + * Add a renderer to the set. This adds an entry to the set tracking this renderer + * in the set's internal list. No texture is created for this renderer. + */ +void wlr_texture_set_add_renderer(struct wlr_texture_set *set, struct wlr_renderer *renderer, + struct wlr_allocator *allocator); + +/* + * Imports a buffer into the texture set. This initializes the native_pair + * internal state and returns true if the buffer was imported on at least one + * of the renderers in the set. + * + * This should only be called once per texture set initialization. + */ +bool wlr_texture_set_import_buffer(struct wlr_texture_set *set, struct wlr_buffer *buffer); + +/** + * Create a new texture set from a DMA-BUF. The returned texture is immutable. + * The dmabuf will be imported on only one of the mgpu renderers in the system, + * no copies will be made. Returns NULL if the dmabuf could not be imported into + * any renderer. + */ +struct wlr_texture_set *wlr_texture_set_from_dmabuf(struct wlr_renderer *renderer, + struct wlr_dmabuf_attributes *attribs); + +/** + * Create a new texture set from a buffer. + */ +struct wlr_texture_set *wlr_texture_set_from_buffer(struct wlr_renderer *renderer, + struct wlr_buffer *buffer); + +/** + * Request a wlr_texture for this resource that is compatible with the given + * renderer. This allows for on-demand cross-GPU blits in multi-GPU setups. + * The texture will have been imported into the renderer that corresponds to + * its native device. If a texture is requeseted with a different renderer, + * this function will perform a blit and return the appropriate texture. + * + * Textures are cached, so if multiple requests with a non-native renderer + * are made there will be only one blit. + */ +struct wlr_texture *wlr_texture_set_get_tex_for_renderer(struct wlr_texture_set *set, + struct wlr_renderer *renderer); + +/** + * Get the wlr_texture corresponding to the texture's local GPU. This is the GPU it + * is directly importable into. + */ +struct wlr_texture *wlr_texture_set_get_native_texture(struct wlr_texture_set *set); + +/** + * Get the linear pixel data for the backing texture. + */ +void *wlr_texture_set_get_linear_data(struct wlr_texture_set *set); + +/** + * Update all textures in a set with the contents of the next buffer. This will call + * wlr_texture_update_from_buffer for each texture in the set. + */ +bool wlr_texture_set_update_from_buffer(struct wlr_texture_set *set, + struct wlr_buffer *next, const pixman_region32_t *damage); + +/** + * Destroys the texture set and all textures held inside it. + */ +void wlr_texture_set_destroy(struct wlr_texture_set *set); #endif diff --git a/include/wlr/types/wlr_buffer.h b/include/wlr/types/wlr_buffer.h index de3aeec3d..95d5aa51e 100644 --- a/include/wlr/types/wlr_buffer.h +++ b/include/wlr/types/wlr_buffer.h @@ -142,7 +142,7 @@ struct wlr_client_buffer { * The buffer's texture, if any. A buffer will not have a texture if the * client destroys the buffer before it has been released. */ - struct wlr_texture *texture; + struct wlr_texture_set *texture_set; /** * The buffer this client buffer was created from. NULL if destroyed. */ diff --git a/include/wlr/types/wlr_linux_dmabuf_v1.h b/include/wlr/types/wlr_linux_dmabuf_v1.h index cf967f952..92106314f 100644 --- a/include/wlr/types/wlr_linux_dmabuf_v1.h +++ b/include/wlr/types/wlr_linux_dmabuf_v1.h @@ -63,6 +63,9 @@ struct wlr_linux_dmabuf_v1 { int main_device_fd; // to sanity check FDs sent by clients, -1 if unavailable + // This is only set when the compositor isn't providing a custom renderer. + struct wlr_renderer *main_renderer; + struct wl_listener display_destroy; bool (*check_dmabuf_callback)(struct wlr_dmabuf_attributes *attribs, void *data); diff --git a/render/wlr_renderer.c b/render/wlr_renderer.c index 513fecbd7..d20366a95 100644 --- a/render/wlr_renderer.c +++ b/render/wlr_renderer.c @@ -25,6 +25,7 @@ #endif // WLR_HAS_VULKAN_RENDERER #include "backend/backend.h" +#include "backend/multi.h" #include "render/pixel_format.h" #include "render/wlr_renderer.h" #include "util/env.h" @@ -285,6 +286,13 @@ out: if (own_drm_fd && drm_fd >= 0) { close(drm_fd); } + // If we have a multi GPU environment, then track this renderer + // for cross-GPU imports. + if (renderer && backend && wlr_backend_is_multi(backend)) { + struct wlr_multi_backend *multi = (struct wlr_multi_backend *)backend; + renderer->multi_gpu = multi->multi_gpu; + } + return renderer; } diff --git a/render/wlr_texture.c b/render/wlr_texture.c index 3526ee140..1b3339edf 100644 --- a/render/wlr_texture.c +++ b/render/wlr_texture.c @@ -3,10 +3,17 @@ #include #include #include +#include #include #include #include "render/pixel_format.h" +#include +#include #include "types/wlr_buffer.h" +#include "backend/multi.h" +#include "backend/drm/drm.h" +#include "render/drm_format_set.h" +#include "render/wlr_renderer.h" void wlr_texture_init(struct wlr_texture *texture, struct wlr_renderer *renderer, const struct wlr_texture_impl *impl, uint32_t width, uint32_t height) { @@ -116,6 +123,26 @@ struct wlr_texture *wlr_texture_from_buffer(struct wlr_renderer *renderer, if (!renderer->impl->texture_from_buffer) { return NULL; } + + struct wlr_dmabuf_attributes dmabuf; + /* + * If this is a dmabuf backed buffer then get the format/modifier for it and + * compare it with the set supported by the renderer + */ + if (wlr_buffer_get_dmabuf(buffer, &dmabuf)) { + const struct wlr_drm_format_set *formats = wlr_renderer_get_dmabuf_texture_formats(renderer); + if (!formats) { + wlr_log(WLR_DEBUG, "Could not get DRM format set for renderer"); + return NULL; + } + + if (!wlr_drm_format_set_has(formats, dmabuf.format, dmabuf.modifier)) { + wlr_log(WLR_DEBUG, "Renderer could not import buffer with format 0x%x and modifier 0x%lx", + dmabuf.format, dmabuf.modifier); + return NULL; + } + } + return renderer->impl->texture_from_buffer(renderer, buffer); } @@ -135,3 +162,362 @@ bool wlr_texture_update_from_buffer(struct wlr_texture *texture, } return texture->impl->update_from_buffer(texture, buffer, damage); } + +struct wlr_texture_set *wlr_texture_set_from_dmabuf(struct wlr_renderer *renderer, + struct wlr_dmabuf_attributes *attribs) { + struct wlr_dmabuf_buffer *buffer = dmabuf_buffer_create(attribs); + if (buffer == NULL) { + return NULL; + } + + struct wlr_texture_set *set = + wlr_texture_set_from_buffer(renderer, &buffer->base); + + // By this point, the renderer should have locked the buffer if it still + // needs to access it in the future. + dmabuf_buffer_drop(buffer); + + return set; +} + +static void texture_set_handle_buffer_release(struct wl_listener *listener, void *data) { + struct wlr_texture_set *set = wl_container_of(listener, set, buffer_release); + set->buffer = NULL; + wl_list_remove(&set->buffer_release.link); +} + +static void wlr_texture_set_add_pair(struct wlr_texture_set *set, struct wlr_renderer *renderer, + struct wlr_allocator *allocator) { + + set->pairings = realloc(set->pairings, + sizeof(struct wlr_texture_renderer_pair) * (set->pairing_count + 1)); + if (!set->pairings) { + return; + } + + memset(&set->pairings[set->pairing_count], 0, sizeof(struct wlr_texture_renderer_pair)); + set->pairings[set->pairing_count].renderer = renderer; + set->pairings[set->pairing_count].allocator = allocator; + set->pairing_count++; +} + +void wlr_texture_set_add_renderer(struct wlr_texture_set *set, struct wlr_renderer *renderer, + struct wlr_allocator *allocator) { + if (!renderer) { + return; + } + + wlr_texture_set_add_pair(set, renderer, allocator); + + if (renderer->multi_gpu) { + set->multi_gpu = renderer->multi_gpu; + /* Now add each mgpu renderer to the set */ + struct wlr_multi_gpu_device *device; + wl_list_for_each(device, &renderer->multi_gpu->devices, link) { + wlr_texture_set_add_pair(set, device->renderer, device->allocator); + } + } +} + +/* + * When setting up our wlr_multi_gpu struct we put all renderers into a list. This lets us + * iterate them from here. If this request is made on a renderer not in the multi-GPU set, + * then the list will be of length 1, and the renderer will be the only entry in the set. + */ +struct wlr_texture_set *wlr_texture_set_create(struct wlr_renderer *renderer, + struct wlr_allocator *allocator) { + struct wlr_texture_set *set = calloc(1, sizeof(struct wlr_texture_set)); + if (!set) { + return NULL; + } + set->native_pair = -1; + + wlr_texture_set_add_renderer(set, renderer, allocator); + + return set; +} + +/* + * Helper for importing a buffer into the texture set. This initializes + * the native_pair internal state. + */ +bool wlr_texture_set_import_buffer(struct wlr_texture_set *set, struct wlr_buffer *buffer) { + set->buffer = buffer; + // Don't lock our buffer since it gets in the way of releasing shm buffers immediately + // Instead keep a reference to the buffer but register a handler to notify us when + // it is released and clear the pointer. + set->buffer_release.notify = texture_set_handle_buffer_release; + wl_signal_add(&set->buffer->events.release, &set->buffer_release); + + buffer = wlr_buffer_lock(buffer); + bool ret = false; + + /* + * For each renderer, try to create a texture. Go in order, since the first + * entry is always the "primary" renderer that the user created this texture set with. + * The odds are highest that it is importable into that renderer, so start with that + * one. + */ + for (int i = 0; i < set->pairing_count; i++) { + assert(!set->pairings[i].texture); + set->pairings[i].texture = wlr_texture_from_buffer(set->pairings[i].renderer, buffer); + /* If we got a match, mark this renderer as the "native" one the buffer is local to */ + if (set->pairings[i].texture) { + /* Cache the width and height so other places don't have to search for it in pairings */ + set->width = set->pairings[i].texture->width; + set->height = set->pairings[i].texture->height; + set->native_pair = i; + ret = true; + goto buffer_unlock; + } + } + +buffer_unlock: + wlr_buffer_unlock(buffer); + return ret; +} + +struct wlr_texture_set *wlr_texture_set_from_buffer(struct wlr_renderer *renderer, + struct wlr_buffer *buffer) { + /* Get an empty texture set */ + struct wlr_texture_set *set = wlr_texture_set_create(renderer, NULL); + if (!set) { + return NULL; + } + + if (!wlr_texture_set_import_buffer(set, buffer)) { + goto fail; + } + + return set; + +fail: + /* If the buffer couldn't be imported into any renderer in the system, return NULL */ + wlr_texture_set_destroy(set); + return NULL; +} + +static struct wlr_buffer *texture_set_blit_gpu_buffer(struct wlr_texture_set *set, + struct wlr_renderer *renderer) { + struct wlr_renderer *native_renderer = set->pairings[set->native_pair].renderer; + struct wlr_allocator *native_allocator = set->pairings[set->native_pair].allocator; + struct wlr_texture *native_texture = set->pairings[set->native_pair].texture; + assert(native_texture); + + // If the user didn't give us an allocator for this renderer then this path can't be used. + if (!native_allocator) { + return NULL; + } + + // Now intersect our DRM formats + const struct wlr_drm_format_set *src_formats = wlr_renderer_get_render_formats(native_renderer); + if (!src_formats) { + wlr_log(WLR_ERROR, "Failed to get primary renderer DRM formats"); + return NULL; + } + + const struct wlr_drm_format_set *dst_formats = wlr_renderer_get_dmabuf_texture_formats(renderer); + if (!dst_formats) { + wlr_log(WLR_ERROR, "Failed to get destination renderer DRM formats"); + return NULL; + } + + // Get the argb8 mods to use for our new buffer + struct wlr_drm_format argb_format = {0}; + if (!wlr_drm_format_intersect(&argb_format, + wlr_drm_format_set_get(dst_formats, DRM_FORMAT_ARGB8888), + wlr_drm_format_set_get(src_formats, DRM_FORMAT_ARGB8888)) + || argb_format.len == 0) { + wlr_log(WLR_ERROR, "Failed to intersect DRM formats"); + return NULL; + } + + // Allocate a new buffer on the source renderer, we will blit the original texture + // to this and then return it so the caller can import it. + struct wlr_buffer *buffer = wlr_allocator_create_buffer( + native_allocator, set->width, set->height, &argb_format); + wlr_drm_format_finish(&argb_format); + if (!buffer) { + wlr_log(WLR_ERROR, "Failed to allocate buffer on source GPU"); + return NULL; + } + + struct wlr_render_pass *pass = wlr_renderer_begin_buffer_pass(native_renderer, buffer, NULL); + if (!pass) { + wlr_log(WLR_ERROR, "Failed to create a render pass"); + goto drop_buffer; + } + + wlr_render_pass_add_texture(pass, &(struct wlr_render_texture_options) { + .texture = native_texture, + }); + + if (!wlr_render_pass_submit(pass)) { + wlr_log(WLR_ERROR, "Failed to render to buffer"); + goto drop_buffer; + } + + return buffer; + +drop_buffer: + wlr_buffer_drop(buffer); + return NULL; +} + +void *wlr_texture_set_get_linear_data(struct wlr_texture_set *set) { + struct wlr_renderer *native_renderer = set->pairings[set->native_pair].renderer; + struct wlr_texture *native_texture = set->pairings[set->native_pair].texture; + assert(native_texture); + int stride = native_texture->width * 4; + + if (set->pixel_data) { + return set->pixel_data; + } + + set->pixel_data = malloc(native_texture->height * stride); + if (!set->pixel_data) { + return NULL; + } + + struct wlr_buffer *buffer = set->buffer; + if (!set->buffer) { + // If the buffer this set was created with has already been released, blit ourselves + // a new one. + buffer = texture_set_blit_gpu_buffer(set, native_renderer); + if (!buffer) { + wlr_log(WLR_DEBUG, "Cannot get linear data, wlr_texture_set's buffer was released"); + return NULL; + } + } + wlr_buffer_lock(buffer); + + /* Make a buffer with a linear layout and the same format */ + set->format = wlr_texture_preferred_read_format(native_texture); + if (set->format == DRM_FORMAT_INVALID) { + wlr_buffer_unlock(buffer); + return NULL; + } + + bool result = wlr_texture_read_pixels(native_texture, &(struct wlr_texture_read_pixels_options) { + .format = DRM_FORMAT_ARGB8888, + .stride = stride, + .data = set->pixel_data, + }); + wlr_buffer_unlock(buffer); + if (!result) { + return NULL; + } + + wlr_log(WLR_DEBUG, "Copied GPU vidmem buffer to linear sysmem buffer"); + return set->pixel_data; +} + +struct wlr_texture *wlr_texture_set_get_tex_for_renderer(struct wlr_texture_set *set, + struct wlr_renderer *renderer) { + /* Find the entry for this renderer */ + struct wlr_texture_renderer_pair *pair = NULL; + for (int i = 0; i < set->pairing_count; i++) { + if (set->pairings[i].renderer == renderer) { + pair = &set->pairings[i]; + } + } + + /* + * If we have not seen this renderer then add an entry for it so + * we can cache the results of this copy. + */ + if (!pair) { + wlr_texture_set_add_pair(set, renderer, NULL); + pair = &set->pairings[set->pairing_count - 1]; + } + + /* If we already have a texture for this renderer, return it */ + if (pair->texture) { + return pair->texture; + } + + /* + * First try to directly import the texture. We must have a valid buffer + * to lock in order to do this. If the buffer has been released (as is the + * case with shm buffers) then we will have to perform a fallback copy. + */ + if (set->buffer) { + wlr_buffer_lock(set->buffer); + pair->texture = wlr_texture_from_buffer(renderer, set->buffer); + wlr_buffer_unlock(set->buffer); + if (pair->texture) { + return pair->texture; + } + } + + /* + * Directly importing didn't work. The next thing to try is blitting to a compatible + * GPU texture and then importing that. + */ + struct wlr_buffer *buffer = texture_set_blit_gpu_buffer(set, renderer); + if (buffer) { + pair->texture = wlr_texture_from_buffer(renderer, buffer); + wlr_buffer_drop(buffer); + if (pair->texture) { + return pair->texture; + } + } + + /* + * If the above didn't work then we can try a CPU fallback. This is much more expensive + * but should always work. The reason we need this is that sometimes we have to copy + * from GPU A to GPU B, but GPU A can't render to any modifiers that GPU B supports. This + * happens on NVIDIA (among others) where you cannot render to a linear texture, but need + * to convert to linear so that you can import it anywhere. + * + * Get our linear pixel data so we can import it into the target renderer. + * */ + void *pixel_data = wlr_texture_set_get_linear_data(set); + if (!pixel_data) { + return NULL; + } + + /* import the linear texture into our renderer */ + uint32_t stride = set->width * 4; + pair->texture = wlr_texture_from_pixels(renderer, DRM_FORMAT_ARGB8888, stride, set->width, + set->height, pixel_data); + + return pair->texture; +} + +struct wlr_texture *wlr_texture_set_get_native_texture(struct wlr_texture_set *set) { + return set->pairings[set->native_pair].texture; +} + +bool wlr_texture_set_update_from_buffer(struct wlr_texture_set *set, + struct wlr_buffer *next, const pixman_region32_t *damage) { + /* Call wlr_texture_write_pixels on each valid texture in the set */ + for (int i = 0; i < set->pairing_count; i++) { + if (set->pairings[i].texture) { + if (!wlr_texture_update_from_buffer(set->pairings[i].texture, + next, damage)) { + return false; + } + } + } + + return true; +} + +void wlr_texture_set_destroy(struct wlr_texture_set *set) { + if (set->buffer) { + wl_list_remove(&set->buffer_release.link); + } + free(set->pixel_data); + + for (int i = 0; i < set->pairing_count; i++) { + if (set->pairings[i].texture) { + wlr_texture_destroy(set->pairings[i].texture); + } + } + + if (set) { + free(set->pairings); + free(set); + } +} diff --git a/types/buffer/client.c b/types/buffer/client.c index 4cfa57a89..68a233d59 100644 --- a/types/buffer/client.c +++ b/types/buffer/client.c @@ -25,7 +25,7 @@ static struct wlr_client_buffer *client_buffer_from_buffer( static void client_buffer_destroy(struct wlr_buffer *buffer) { struct wlr_client_buffer *client_buffer = client_buffer_from_buffer(buffer); wl_list_remove(&client_buffer->source_destroy.link); - wlr_texture_destroy(client_buffer->texture); + wlr_texture_set_destroy(client_buffer->texture_set); free(client_buffer); } @@ -56,21 +56,21 @@ static void client_buffer_handle_source_destroy(struct wl_listener *listener, struct wlr_client_buffer *wlr_client_buffer_create(struct wlr_buffer *buffer, struct wlr_renderer *renderer) { - struct wlr_texture *texture = wlr_texture_from_buffer(renderer, buffer); - if (texture == NULL) { + struct wlr_texture_set *texture_set = wlr_texture_set_from_buffer(renderer, buffer); + if (texture_set == NULL) { wlr_log(WLR_ERROR, "Failed to create texture"); return NULL; } struct wlr_client_buffer *client_buffer = calloc(1, sizeof(*client_buffer)); if (client_buffer == NULL) { - wlr_texture_destroy(texture); + wlr_texture_set_destroy(texture_set); return NULL; } wlr_buffer_init(&client_buffer->base, &client_buffer_impl, - texture->width, texture->height); + buffer->width, buffer->height); client_buffer->source = buffer; - client_buffer->texture = texture; + client_buffer->texture_set = texture_set; wl_signal_add(&buffer->events.destroy, &client_buffer->source_destroy); client_buffer->source_destroy.notify = client_buffer_handle_source_destroy; @@ -89,5 +89,5 @@ bool wlr_client_buffer_apply_damage(struct wlr_client_buffer *client_buffer, return false; } - return wlr_texture_update_from_buffer(client_buffer->texture, next, damage); + return wlr_texture_set_update_from_buffer(client_buffer->texture_set, next, damage); } diff --git a/types/scene/wlr_scene.c b/types/scene/wlr_scene.c index 9b7e1e980..ff634dfad 100644 --- a/types/scene/wlr_scene.c +++ b/types/scene/wlr_scene.c @@ -882,7 +882,8 @@ static struct wlr_texture *scene_buffer_get_texture( struct wlr_client_buffer *client_buffer = wlr_client_buffer_get(scene_buffer->buffer); if (client_buffer != NULL) { - return client_buffer->texture; + return wlr_texture_set_get_tex_for_renderer(client_buffer->texture_set, + renderer); } scene_buffer->texture = diff --git a/types/wlr_compositor.c b/types/wlr_compositor.c index c5044ce44..a3e052c92 100644 --- a/types/wlr_compositor.c +++ b/types/wlr_compositor.c @@ -445,7 +445,11 @@ static void surface_apply_damage(struct wlr_surface *surface) { } static void surface_update_opaque_region(struct wlr_surface *surface) { - if (!wlr_surface_has_buffer(surface)) { + /* + * The surface's client_buffer may not have a texture imported yet, + * but if it has a texture set it is tracking a valid buffer. + */ + if (!wlr_surface_has_buffer(surface) || !surface->buffer->texture_set) { pixman_region32_clear(&surface->opaque_region); return; } @@ -802,7 +806,8 @@ struct wlr_texture *wlr_surface_get_texture(struct wlr_surface *surface) { if (surface->buffer == NULL) { return NULL; } - return surface->buffer->texture; + return wlr_texture_set_get_tex_for_renderer(surface->buffer->texture_set, + surface->renderer); } bool wlr_surface_has_buffer(struct wlr_surface *surface) { diff --git a/types/wlr_linux_dmabuf_v1.c b/types/wlr_linux_dmabuf_v1.c index 13e82760c..d695acd04 100644 --- a/types/wlr_linux_dmabuf_v1.c +++ b/types/wlr_linux_dmabuf_v1.c @@ -211,16 +211,39 @@ static bool check_import_dmabuf(struct wlr_dmabuf_attributes *attribs, void *dat return true; } - // TODO: check number of planes - for (int i = 0; i < attribs->n_planes; i++) { - uint32_t handle = 0; - if (drmPrimeFDToHandle(linux_dmabuf->main_device_fd, attribs->fd[i], &handle) != 0) { - wlr_log_errno(WLR_DEBUG, "Failed to import DMA-BUF FD"); + /* + * Some compositors will be using this linux dmabuf manager with custom renderers, + * while others will use a wlroots-managed wlr_renderer. When checking if a dmabuf + * is valid for import we should treat these differently. In the first case we just + * need to check if the dmabuf is importable into the DRM device, in the wlroots-managed + * renderer case we should check if this dmabuf can be imported into the renderer. + * + * In the case where we have a wlr_renderer we need to check if a texture set can + * be created in order to handle multi-gpu systems. The texture set will handle ensuring + * that the dmabuf is importable on one GPU in the system, instead of only checking + * the main device. + */ + if (linux_dmabuf->main_renderer) { + struct wlr_texture_set *set= + wlr_texture_set_from_dmabuf(linux_dmabuf->main_renderer, attribs); + if (!set) { return false; } - if (drmCloseBufferHandle(linux_dmabuf->main_device_fd, handle) != 0) { - wlr_log_errno(WLR_ERROR, "Failed to close buffer handle"); - return false; + // We can import the image, good. No need to keep it since wlr_surface will + // import it again on commit. + wlr_texture_set_destroy(set); + } else { + // TODO: check number of planes + for (int i = 0; i < attribs->n_planes; i++) { + uint32_t handle = 0; + if (drmPrimeFDToHandle(linux_dmabuf->main_device_fd, attribs->fd[i], &handle) != 0) { + wlr_log_errno(WLR_DEBUG, "Failed to import DMA-BUF FD"); + return false; + } + if (drmCloseBufferHandle(linux_dmabuf->main_device_fd, handle) != 0) { + wlr_log_errno(WLR_ERROR, "Failed to close buffer handle"); + return false; + } } } return true; @@ -1001,6 +1024,9 @@ struct wlr_linux_dmabuf_v1 *wlr_linux_dmabuf_v1_create_with_renderer(struct wl_d struct wlr_linux_dmabuf_v1 *linux_dmabuf = wlr_linux_dmabuf_v1_create(display, version, &feedback); wlr_linux_dmabuf_feedback_v1_finish(&feedback); + + linux_dmabuf->main_renderer = renderer; + return linux_dmabuf; } @@ -1070,15 +1096,6 @@ static bool devid_from_fd(int fd, dev_t *devid) { return true; } -static bool is_secondary_drm_backend(struct wlr_backend *backend) { -#if WLR_HAS_DRM_BACKEND - return wlr_backend_is_drm(backend) && - wlr_drm_backend_get_parent(backend) != NULL; -#else - return false; -#endif -} - bool wlr_linux_dmabuf_feedback_v1_init_with_options(struct wlr_linux_dmabuf_feedback_v1 *feedback, const struct wlr_linux_dmabuf_feedback_v1_init_options *options) { assert(options->main_renderer != NULL); @@ -1121,8 +1138,7 @@ bool wlr_linux_dmabuf_feedback_v1_init_with_options(struct wlr_linux_dmabuf_feed wlr_log(WLR_ERROR, "Failed to intersect renderer and scanout formats"); goto error; } - } else if (options->scanout_primary_output != NULL && - !is_secondary_drm_backend(options->scanout_primary_output->backend)) { + } else if (options->scanout_primary_output != NULL) { int backend_drm_fd = wlr_backend_get_drm_fd(options->scanout_primary_output->backend); if (backend_drm_fd < 0) { wlr_log(WLR_ERROR, "Failed to get backend DRM FD"); @@ -1148,8 +1164,9 @@ bool wlr_linux_dmabuf_feedback_v1_init_with_options(struct wlr_linux_dmabuf_feed tranche->target_device = backend_dev; tranche->flags = ZWP_LINUX_DMABUF_FEEDBACK_V1_TRANCHE_FLAGS_SCANOUT; - if (!wlr_drm_format_set_intersect(&tranche->formats, scanout_formats, renderer_formats)) { - wlr_log(WLR_ERROR, "Failed to intersect renderer and scanout formats"); + // Copy our scanout formats to the scanout tranche + if (!wlr_drm_format_set_copy(&tranche->formats, scanout_formats)) { + wlr_log(WLR_ERROR, "Failed to copy scanout formats"); goto error; } }