render/vulkan: New staging buffer implementation

Implement a ring-buffer that uses timeline points to track and release
allocated spans, upgrading the buffer if it fills and shrinking it if it
has been 4x too large for many collections.
This commit is contained in:
Kenny Levinsen 2026-04-12 17:10:40 +02:00
parent e8c03e9ce9
commit 1ec740692f
4 changed files with 267 additions and 155 deletions

View file

@ -1,6 +1,5 @@
#include <assert.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <stdlib.h>
#include <stdint.h>
@ -8,6 +7,7 @@
#include <unistd.h>
#include <drm_fourcc.h>
#include <vulkan/vulkan.h>
#include <wayland-util.h>
#include <wlr/render/color.h>
#include <wlr/render/interface.h>
#include <wlr/types/wlr_drm.h>
@ -26,11 +26,9 @@
#include "render/vulkan/shaders/texture.frag.h"
#include "render/vulkan/shaders/quad.frag.h"
#include "render/vulkan/shaders/output.frag.h"
#include "types/wlr_buffer.h"
#include "util/time.h"
#include "util/array.h"
// TODO:
// - simplify stage allocation, don't track allocations but use ringbuffer-like
// - use a pipeline cache (not sure when to save though, after every pipeline
// creation?)
// - create pipelines as derivatives of each other
@ -187,18 +185,13 @@ static void destroy_render_format_setup(struct wlr_vk_renderer *renderer,
free(setup);
}
static void shared_buffer_destroy(struct wlr_vk_renderer *r,
struct wlr_vk_shared_buffer *buffer) {
static void stage_buffer_destroy(struct wlr_vk_renderer *r,
struct wlr_vk_stage_buffer *buffer) {
if (!buffer) {
return;
}
if (buffer->allocs.size > 0) {
wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left",
buffer->allocs.size / sizeof(struct wlr_vk_allocation));
}
wl_array_release(&buffer->allocs);
wl_array_release(&buffer->watermarks);
if (buffer->cpu_mapping) {
vkUnmapMemory(r->dev->dev, buffer->memory);
buffer->cpu_mapping = NULL;
@ -214,75 +207,12 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
free(buffer);
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
// try to find free span
// simple greedy allocation algorithm - should be enough for this usecase
// since all allocations are freed together after the frame
struct wlr_vk_shared_buffer *buf;
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
VkDeviceSize start = 0u;
if (buf->allocs.size > 0) {
const struct wlr_vk_allocation *allocs = buf->allocs.data;
size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation);
const struct wlr_vk_allocation *last = &allocs[allocs_len - 1];
start = last->start + last->size;
}
assert(start <= buf->buf_size);
// ensure the proposed start is a multiple of alignment
start += alignment - 1 - ((start + alignment - 1) % alignment);
if (buf->buf_size - start < size) {
continue;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
}
*a = (struct wlr_vk_allocation){
.start = start,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
};
}
if (size > max_stage_size) {
wlr_log(WLR_ERROR, "cannot vulkan stage buffer: "
"requested size (%zu bytes) exceeds maximum (%zu bytes)",
(size_t)size, (size_t)max_stage_size);
goto error_alloc;
}
// we didn't find a free buffer - create one
// size = clamp(max(size * 2, prev_size * 2), min_size, max_size)
VkDeviceSize bsize = size * 2;
bsize = bsize < min_stage_size ? min_stage_size : bsize;
if (!wl_list_empty(&r->stage.buffers)) {
struct wl_list *last_link = r->stage.buffers.prev;
struct wlr_vk_shared_buffer *prev = wl_container_of(
last_link, prev, link);
VkDeviceSize last_size = 2 * prev->buf_size;
bsize = bsize < last_size ? last_size : bsize;
}
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffers have reached max size");
bsize = max_stage_size;
}
// create buffer
buf = calloc(1, sizeof(*buf));
static struct wlr_vk_stage_buffer *stage_buffer_create(
struct wlr_vk_renderer *r, VkDeviceSize bsize) {
struct wlr_vk_stage_buffer *buf = calloc(1, sizeof(*buf));
if (!buf) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
return NULL;
}
wl_list_init(&buf->link);
@ -319,7 +249,7 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
};
res = vkAllocateMemory(r->dev->dev, &mem_info, NULL, &buf->memory);
if (res != VK_SUCCESS) {
wlr_vk_error("vkAllocatorMemory", res);
wlr_vk_error("vkAllocateMemory", res);
goto error;
}
@ -335,34 +265,209 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
buf->active = true;
buf->buf_size = bsize;
return buf;
error:
stage_buffer_destroy(r, buf);
return NULL;
}
// Returns true if the buffer is fully drained.
bool vulkan_stage_buffer_reclaim(struct wlr_vk_stage_buffer *buf,
uint64_t current_point) {
// Update utilization metrics before cleaning
VkDeviceSize occupied = buf->head >= buf->tail
? buf->head - buf->tail
: buf->buf_size - buf->tail + buf->head;
if (occupied > buf->peak_utilization) {
buf->peak_utilization = occupied;
}
size_t completed = 0;
struct wlr_vk_stage_watermark *mark;
wl_array_for_each(mark, &buf->watermarks) {
if (mark->timeline_point > current_point) {
break;
}
buf->tail = mark->head;
completed++;
}
if (completed > 0) {
completed *= sizeof(struct wlr_vk_stage_watermark);
if (completed == buf->watermarks.size) {
buf->watermarks.size = 0;
} else {
array_remove_at(&buf->watermarks, 0, completed);
}
}
return buf->head == buf->tail;
}
VkDeviceSize vulkan_stage_buffer_alloc(struct wlr_vk_stage_buffer *buf,
VkDeviceSize size, VkDeviceSize alignment) {
VkDeviceSize head = buf->head;
// Round up to the next multiple of alignment
VkDeviceSize rem = head % alignment;
if (rem != 0) {
head += alignment - rem;
}
VkDeviceSize end = head >= buf->tail ? buf->buf_size : buf->tail;
if (head + size < end) {
// Regular allocation head till end of available space
buf->head = head + size;
return head;
} else if (size < buf->tail && head >= buf->tail) {
// First allocation after wrap-around
buf->head = size;
return 0;
}
return (VkDeviceSize)-1;
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
if (size > max_stage_size) {
wlr_log(WLR_ERROR, "cannot allocate stage buffer: "
"requested size (%zu bytes) exceeds maximum (%zu bytes)",
(size_t)size, (size_t)max_stage_size);
goto error;
}
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
// Try to reclaim and allocate from the active buffer
struct wlr_vk_stage_buffer *buf;
VkDeviceSize max_buf_size = min_stage_size;
wl_list_for_each(buf, &r->stage.buffers, link) {
if (!buf->active) {
continue;
}
VkDeviceSize offset = vulkan_stage_buffer_alloc(buf, size, alignment);
if (offset != (VkDeviceSize)-1) {
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.offset = offset,
.size = size,
};
}
if (buf->buf_size > max_buf_size) {
max_buf_size = buf->buf_size;
}
// Buffer is full, retire it
buf->active = false;
}
VkDeviceSize bsize = max_buf_size * 2;
while (size * 2 > bsize) {
bsize *= 2;
}
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffer has reached max size");
bsize = max_stage_size;
}
struct wlr_vk_stage_buffer *new_buf = stage_buffer_create(r, bsize);
if (new_buf == NULL) {
goto error;
}
wl_list_insert(&r->stage.buffers, &new_buf->link);
VkDeviceSize offset = vulkan_stage_buffer_alloc(new_buf, size, alignment);
assert(offset != (VkDeviceSize)-1);
*a = (struct wlr_vk_allocation){
.start = 0,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
.buffer = new_buf,
.offset = offset,
.size = size,
};
error:
shared_buffer_destroy(r, buf);
error_alloc:
return (struct wlr_vk_buffer_span) {
.buffer = NULL,
.alloc = (struct wlr_vk_allocation) {0, 0},
.offset = 0,
.size = 0,
};
}
void vulkan_return_stage_span(struct wlr_vk_buffer_span *span, VkDeviceSize return_size) {
assert(return_size <= span->size);
if (span->buffer->head == span->offset + span->size) {
// If the current buffer head is our current buffer, move the head back
span->size -= return_size;
span->buffer->head = span->offset + span->size;
}
}
void vulkan_stage_mark_submit(struct wlr_vk_renderer *renderer,
uint64_t timeline_point) {
struct wlr_vk_stage_buffer *buf;
wl_list_for_each(buf, &renderer->stage.buffers, link) {
if (buf->head == buf->tail) {
continue;
}
struct wlr_vk_stage_watermark *mark = wl_array_add(
&buf->watermarks, sizeof(*mark));
if (mark == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
continue;
}
*mark = (struct wlr_vk_stage_watermark){
.head = buf->head,
.timeline_point = timeline_point,
};
}
}
static void vulkan_stage_buffer_gc(struct wlr_vk_renderer *renderer, uint64_t current_point) {
struct wlr_vk_stage_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (!vulkan_stage_buffer_reclaim(buf, current_point)) {
// There are active allocations on this buffer
continue;
}
if (!buf->active) {
stage_buffer_destroy(renderer, buf);
continue;
}
if (buf->buf_size < min_stage_size * 2) {
// We will neither shrink nor deallocate the first buffer
continue;
}
// Note: We use 1/4th as the underutilization threshold, and when
// underutilized for 100 GC runs we cut the buffer size in half
if (buf->peak_utilization > buf->buf_size / 4) {
buf->underutil_count = 0;
} else {
buf->underutil_count++;
}
buf->peak_utilization = 0;
if (buf->underutil_count < 100) {
continue;
}
struct wlr_vk_stage_buffer *shrunk = stage_buffer_create(renderer, buf->buf_size / 2);
if (shrunk == NULL) {
// We'll just keep using the old buffer for now
continue;
}
wl_list_insert(&renderer->stage.buffers, &shrunk->link);
stage_buffer_destroy(renderer, buf);
}
}
VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) {
if (renderer->stage.cb == NULL) {
renderer->stage.cb = vulkan_acquire_command_buffer(renderer);
@ -465,16 +570,21 @@ bool vulkan_submit_stage_wait(struct wlr_vk_renderer *renderer, int wait_sync_fi
submit_info.pWaitDstStageMask = &wait_stage;
}
vulkan_stage_mark_submit(renderer, timeline_point);
VkResult res = vkQueueSubmit(renderer->dev->queue, 1, &submit_info, VK_NULL_HANDLE);
if (res != VK_SUCCESS) {
wlr_vk_error("vkQueueSubmit", res);
return false;
}
// NOTE: don't release stage allocations here since they may still be
// used for reading. Will be done next frame.
if (!vulkan_wait_command_buffer(cb, renderer)) {
return false;
}
return vulkan_wait_command_buffer(cb, renderer);
// We did a blocking wait so this is now the current point
vulkan_stage_buffer_gc(renderer, timeline_point);
return true;
}
struct wlr_vk_format_props *vulkan_format_props_from_drm(
@ -508,7 +618,6 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb,
.vk = vk_cb,
};
wl_list_init(&cb->destroy_textures);
wl_list_init(&cb->stage_buffers);
return true;
}
@ -534,7 +643,7 @@ bool vulkan_wait_command_buffer(struct wlr_vk_command_buffer *cb,
}
static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
struct wlr_vk_renderer *renderer, int64_t now) {
struct wlr_vk_renderer *renderer) {
struct wlr_vk_texture *texture, *texture_tmp;
wl_list_for_each_safe(texture, texture_tmp, &cb->destroy_textures, destroy_link) {
wl_list_remove(&texture->destroy_link);
@ -542,15 +651,6 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
wlr_texture_destroy(&texture->wlr_texture);
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) {
buf->allocs.size = 0;
buf->last_used_ms = now;
wl_list_remove(&buf->link);
wl_list_insert(&renderer->stage.buffers, &buf->link);
}
if (cb->color_transform) {
wlr_color_transform_unref(cb->color_transform);
cb->color_transform = NULL;
@ -569,22 +669,14 @@ static struct wlr_vk_command_buffer *get_command_buffer(
return NULL;
}
// Garbage collect any buffers that have remained unused for too long
int64_t now = get_current_time_msec();
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (buf->allocs.size == 0 && buf->last_used_ms + 10000 < now) {
shared_buffer_destroy(renderer, buf);
}
}
vulkan_stage_buffer_gc(renderer, current_point);
// Destroy textures for completed command buffers
for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) {
struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i];
if (cb->vk != VK_NULL_HANDLE && !cb->recording &&
cb->timeline_point <= current_point) {
release_command_buffer_resources(cb, renderer, now);
release_command_buffer_resources(cb, renderer);
}
}
@ -1187,7 +1279,7 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
if (cb->vk == VK_NULL_HANDLE) {
continue;
}
release_command_buffer_resources(cb, renderer, 0);
release_command_buffer_resources(cb, renderer);
if (cb->binary_semaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(renderer->dev->dev, cb->binary_semaphore, NULL);
}
@ -1199,9 +1291,9 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
}
// stage.cb automatically freed with command pool
struct wlr_vk_shared_buffer *buf, *tmp_buf;
struct wlr_vk_stage_buffer *buf, *tmp_buf;
wl_list_for_each_safe(buf, tmp_buf, &renderer->stage.buffers, link) {
shared_buffer_destroy(renderer, buf);
stage_buffer_destroy(renderer, buf);
}
struct wlr_vk_texture *tex, *tex_tmp;