render/vulkan: New staging buffer implementation

Implement a ring-buffer that uses timeline points to track and release
allocated spans. We add larger ring-buffers when it fills, and remove
them when they have been unused for many collection cycles.

Signed-off-by: Kenny Levinsen <kl@kl.wtf>
This commit is contained in:
Kenny Levinsen 2026-04-12 17:10:40 +02:00 committed by Félix Poisot
parent 02abf1cd28
commit 4ee896af99
4 changed files with 211 additions and 155 deletions

View file

@ -595,14 +595,7 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
free(render_wait);
struct wlr_vk_shared_buffer *stage_buf, *stage_buf_tmp;
wl_list_for_each_safe(stage_buf, stage_buf_tmp, &renderer->stage.buffers, link) {
if (stage_buf->allocs.size == 0) {
continue;
}
wl_list_remove(&stage_buf->link);
wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link);
}
vulkan_stage_mark_submit(renderer, render_timeline_point);
if (!vulkan_sync_render_pass_release(renderer, pass)) {
wlr_log(WLR_ERROR, "Failed to sync render buffer");
@ -1056,13 +1049,13 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
size_t size = dim_len * dim_len * dim_len * bytes_per_block;
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer,
size, bytes_per_block);
if (!span.buffer || span.alloc.size != size) {
if (!span.buffer || span.size != size) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
goto fail_imageview;
}
float sample_range = 1.0f / (dim_len - 1);
char *map = (char *)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char *)span.buffer->cpu_mapping + span.offset;
float *dst = (float *)map;
for (size_t b_index = 0; b_index < dim_len; b_index++) {
for (size_t g_index = 0; g_index < dim_len; g_index++) {
@ -1092,7 +1085,7 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT);
VkBufferImageCopy copy = {
.bufferOffset = span.alloc.start,
.bufferOffset = span.offset,
.imageExtent.width = dim_len,
.imageExtent.height = dim_len,
.imageExtent.depth = dim_len,

View file

@ -1,6 +1,5 @@
#include <assert.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <stdlib.h>
#include <stdint.h>
@ -8,6 +7,7 @@
#include <unistd.h>
#include <drm_fourcc.h>
#include <vulkan/vulkan.h>
#include <wayland-util.h>
#include <wlr/render/color.h>
#include <wlr/render/interface.h>
#include <wlr/types/wlr_drm.h>
@ -26,11 +26,9 @@
#include "render/vulkan/shaders/texture.frag.h"
#include "render/vulkan/shaders/quad.frag.h"
#include "render/vulkan/shaders/output.frag.h"
#include "types/wlr_buffer.h"
#include "util/time.h"
#include "util/array.h"
// TODO:
// - simplify stage allocation, don't track allocations but use ringbuffer-like
// - use a pipeline cache (not sure when to save though, after every pipeline
// creation?)
// - create pipelines as derivatives of each other
@ -187,18 +185,13 @@ static void destroy_render_format_setup(struct wlr_vk_renderer *renderer,
free(setup);
}
static void shared_buffer_destroy(struct wlr_vk_renderer *r,
struct wlr_vk_shared_buffer *buffer) {
static void stage_buffer_destroy(struct wlr_vk_renderer *r,
struct wlr_vk_stage_buffer *buffer) {
if (!buffer) {
return;
}
if (buffer->allocs.size > 0) {
wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left",
buffer->allocs.size / sizeof(struct wlr_vk_allocation));
}
wl_array_release(&buffer->allocs);
wl_array_release(&buffer->watermarks);
if (buffer->cpu_mapping) {
vkUnmapMemory(r->dev->dev, buffer->memory);
buffer->cpu_mapping = NULL;
@ -214,75 +207,12 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
free(buffer);
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
// try to find free span
// simple greedy allocation algorithm - should be enough for this usecase
// since all allocations are freed together after the frame
struct wlr_vk_shared_buffer *buf;
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
VkDeviceSize start = 0u;
if (buf->allocs.size > 0) {
const struct wlr_vk_allocation *allocs = buf->allocs.data;
size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation);
const struct wlr_vk_allocation *last = &allocs[allocs_len - 1];
start = last->start + last->size;
}
assert(start <= buf->buf_size);
// ensure the proposed start is a multiple of alignment
start += alignment - 1 - ((start + alignment - 1) % alignment);
if (buf->buf_size - start < size) {
continue;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
}
*a = (struct wlr_vk_allocation){
.start = start,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
};
}
if (size > max_stage_size) {
wlr_log(WLR_ERROR, "cannot vulkan stage buffer: "
"requested size (%zu bytes) exceeds maximum (%zu bytes)",
(size_t)size, (size_t)max_stage_size);
goto error_alloc;
}
// we didn't find a free buffer - create one
// size = clamp(max(size * 2, prev_size * 2), min_size, max_size)
VkDeviceSize bsize = size * 2;
bsize = bsize < min_stage_size ? min_stage_size : bsize;
if (!wl_list_empty(&r->stage.buffers)) {
struct wl_list *last_link = r->stage.buffers.prev;
struct wlr_vk_shared_buffer *prev = wl_container_of(
last_link, prev, link);
VkDeviceSize last_size = 2 * prev->buf_size;
bsize = bsize < last_size ? last_size : bsize;
}
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffers have reached max size");
bsize = max_stage_size;
}
// create buffer
buf = calloc(1, sizeof(*buf));
static struct wlr_vk_stage_buffer *stage_buffer_create(
struct wlr_vk_renderer *r, VkDeviceSize bsize) {
struct wlr_vk_stage_buffer *buf = calloc(1, sizeof(*buf));
if (!buf) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
return NULL;
}
wl_list_init(&buf->link);
@ -319,7 +249,7 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
};
res = vkAllocateMemory(r->dev->dev, &mem_info, NULL, &buf->memory);
if (res != VK_SUCCESS) {
wlr_vk_error("vkAllocatorMemory", res);
wlr_vk_error("vkAllocateMemory", res);
goto error;
}
@ -335,34 +265,162 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
buf->buf_size = bsize;
return buf;
error:
stage_buffer_destroy(r, buf);
return NULL;
}
void vulkan_stage_buffer_reclaim(struct wlr_vk_stage_buffer *buf,
uint64_t current_point) {
size_t completed = 0;
struct wlr_vk_stage_watermark *mark;
wl_array_for_each(mark, &buf->watermarks) {
if (mark->timeline_point > current_point) {
break;
}
buf->tail = mark->head;
completed++;
}
if (completed > 0) {
completed *= sizeof(struct wlr_vk_stage_watermark);
if (completed == buf->watermarks.size) {
buf->watermarks.size = 0;
} else {
array_remove_at(&buf->watermarks, 0, completed);
}
}
}
VkDeviceSize vulkan_stage_buffer_alloc(struct wlr_vk_stage_buffer *buf,
VkDeviceSize size, VkDeviceSize alignment) {
VkDeviceSize head = buf->head;
// Round up to the next multiple of alignment
VkDeviceSize rem = head % alignment;
if (rem != 0) {
head += alignment - rem;
}
VkDeviceSize end = head >= buf->tail ? buf->buf_size : buf->tail;
if (head + size < end) {
// Regular allocation head till end of available space
buf->head = head + size;
return head;
} else if (size < buf->tail && head >= buf->tail) {
// First allocation after wrap-around
buf->head = size;
return 0;
}
return (VkDeviceSize)-1;
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
if (size >= max_stage_size) {
wlr_log(WLR_ERROR, "cannot allocate stage buffer: "
"requested size (%zu bytes) exceeds maximum (%zu bytes)",
(size_t)size, (size_t)max_stage_size-1);
goto error;
}
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
VkDeviceSize max_buf_size = min_stage_size / 2;
struct wlr_vk_stage_buffer *buf;
wl_list_for_each(buf, &r->stage.buffers, link) {
VkDeviceSize offset = vulkan_stage_buffer_alloc(buf, size, alignment);
if (offset != (VkDeviceSize)-1) {
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.offset = offset,
.size = size,
};
}
if (buf->buf_size > max_buf_size) {
max_buf_size = buf->buf_size;
}
}
VkDeviceSize bsize = max_buf_size * 2;
while (size * 2 > bsize) {
bsize *= 2;
}
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffer has reached max size");
bsize = max_stage_size;
}
struct wlr_vk_stage_buffer *new_buf = stage_buffer_create(r, bsize);
if (new_buf == NULL) {
goto error;
}
wl_list_insert(r->stage.buffers.prev, &new_buf->link);
VkDeviceSize offset = vulkan_stage_buffer_alloc(new_buf, size, alignment);
assert(offset != (VkDeviceSize)-1);
*a = (struct wlr_vk_allocation){
.start = 0,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
.buffer = new_buf,
.offset = offset,
.size = size,
};
error:
shared_buffer_destroy(r, buf);
error_alloc:
return (struct wlr_vk_buffer_span) {
.buffer = NULL,
.alloc = (struct wlr_vk_allocation) {0, 0},
.offset = 0,
.size = 0,
};
}
void vulkan_stage_mark_submit(struct wlr_vk_renderer *renderer,
uint64_t timeline_point) {
struct wlr_vk_stage_buffer *buf;
wl_list_for_each(buf, &renderer->stage.buffers, link) {
if (buf->head == buf->tail) {
continue;
}
struct wlr_vk_stage_watermark *mark = wl_array_add(
&buf->watermarks, sizeof(*mark));
if (mark == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
continue;
}
*mark = (struct wlr_vk_stage_watermark){
.head = buf->head,
.timeline_point = timeline_point,
};
}
}
static void stage_buffer_gc(struct wlr_vk_renderer *renderer, uint64_t current_point) {
struct wlr_vk_stage_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (buf->head != buf->tail) {
buf->empty_gc_cnt = 0;
vulkan_stage_buffer_reclaim(buf, current_point);
continue;
}
if (buf->buf_size <= min_stage_size) {
// We will not deallocate the first buffer
continue;
}
buf->empty_gc_cnt++;
if (buf->empty_gc_cnt >= 1000) {
// This buffer hasn't been used for a while, so let's deallocate it
stage_buffer_destroy(renderer, buf);
}
}
}
VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) {
if (renderer->stage.cb == NULL) {
renderer->stage.cb = vulkan_acquire_command_buffer(renderer);
@ -463,16 +521,21 @@ bool vulkan_submit_stage_wait(struct wlr_vk_renderer *renderer, int wait_sync_fi
submit_info.pWaitDstStageMask = &wait_stage;
}
vulkan_stage_mark_submit(renderer, timeline_point);
VkResult res = vkQueueSubmit(renderer->dev->queue, 1, &submit_info, VK_NULL_HANDLE);
if (res != VK_SUCCESS) {
wlr_vk_error("vkQueueSubmit", res);
return false;
}
// NOTE: don't release stage allocations here since they may still be
// used for reading. Will be done next frame.
if (!vulkan_wait_command_buffer(cb, renderer)) {
return false;
}
return vulkan_wait_command_buffer(cb, renderer);
// We did a blocking wait so this is now the current point
stage_buffer_gc(renderer, timeline_point);
return true;
}
struct wlr_vk_format_props *vulkan_format_props_from_drm(
@ -506,7 +569,6 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb,
.vk = vk_cb,
};
wl_list_init(&cb->destroy_textures);
wl_list_init(&cb->stage_buffers);
return true;
}
@ -532,7 +594,7 @@ bool vulkan_wait_command_buffer(struct wlr_vk_command_buffer *cb,
}
static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
struct wlr_vk_renderer *renderer, int64_t now) {
struct wlr_vk_renderer *renderer) {
struct wlr_vk_texture *texture, *texture_tmp;
wl_list_for_each_safe(texture, texture_tmp, &cb->destroy_textures, destroy_link) {
wl_list_remove(&texture->destroy_link);
@ -540,15 +602,6 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
wlr_texture_destroy(&texture->wlr_texture);
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) {
buf->allocs.size = 0;
buf->last_used_ms = now;
wl_list_remove(&buf->link);
wl_list_insert(&renderer->stage.buffers, &buf->link);
}
if (cb->color_transform) {
wlr_color_transform_unref(cb->color_transform);
cb->color_transform = NULL;
@ -567,22 +620,14 @@ static struct wlr_vk_command_buffer *get_command_buffer(
return NULL;
}
// Garbage collect any buffers that have remained unused for too long
int64_t now = get_current_time_msec();
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (buf->allocs.size == 0 && buf->last_used_ms + 10000 < now) {
shared_buffer_destroy(renderer, buf);
}
}
stage_buffer_gc(renderer, current_point);
// Destroy textures for completed command buffers
for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) {
struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i];
if (cb->vk != VK_NULL_HANDLE && !cb->recording &&
cb->timeline_point <= current_point) {
release_command_buffer_resources(cb, renderer, now);
release_command_buffer_resources(cb, renderer);
}
}
@ -1185,7 +1230,7 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
if (cb->vk == VK_NULL_HANDLE) {
continue;
}
release_command_buffer_resources(cb, renderer, 0);
release_command_buffer_resources(cb, renderer);
if (cb->binary_semaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(renderer->dev->dev, cb->binary_semaphore, NULL);
}
@ -1197,9 +1242,9 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
}
// stage.cb automatically freed with command pool
struct wlr_vk_shared_buffer *buf, *tmp_buf;
struct wlr_vk_stage_buffer *buf, *tmp_buf;
wl_list_for_each_safe(buf, tmp_buf, &renderer->stage.buffers, link) {
shared_buffer_destroy(renderer, buf);
stage_buffer_destroy(renderer, buf);
}
struct wlr_vk_texture *tex, *tex_tmp;

View file

@ -72,16 +72,16 @@ static bool write_pixels(struct wlr_vk_texture *texture,
// get staging buffer
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block);
if (!span.buffer || span.alloc.size != bsize) {
if (!span.buffer || span.size != bsize) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
free(copies);
return false;
}
char *map = (char*)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char*)span.buffer->cpu_mapping + span.offset;
// upload data
uint32_t buf_off = span.alloc.start;
uint32_t buf_off = span.offset;
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;