render/vulkan: Upgrade stage span allocator

The old stage span allocator had two primary flaws:

1. If a shared buffer was used by one command buffer, the entire buffer
   would be held and any remaining regions unusable until that command
   buffer completed.

2. Allocated shared buffers would never be released.

Instead, have command buffers only hold the specific spans rather than
whole buffers, and release any buffers who have been unused across a
certain number of command buffer fetches.
This commit is contained in:
Kenny Levinsen 2024-07-01 01:13:30 +02:00
parent 67b88e46b0
commit cc30acfd4e
4 changed files with 165 additions and 99 deletions

View file

@ -247,7 +247,7 @@ struct wlr_vk_command_buffer {
// Textures to destroy after the command buffer completes
struct wl_list destroy_textures; // wlr_vk_texture.destroy_link
// Staging shared buffers to release after the command buffer completes
struct wl_list stage_buffers; // wlr_vk_shared_buffer.link
struct wl_list stage_spans; // wlr_vk_stage_span.link
// Color transform to unref after the command buffer completes
struct wlr_color_transform *color_transform;
@ -313,6 +313,7 @@ struct wlr_vk_renderer {
struct wlr_vk_command_buffer *cb;
uint64_t last_timeline_point;
struct wl_list buffers; // wlr_vk_shared_buffer.link
struct wl_list spans; // wlr_vk_stage_span.link
} stage;
struct {
@ -387,9 +388,10 @@ struct wlr_vk_render_pass *vulkan_begin_render_pass(struct wlr_vk_renderer *rend
// and used as staging buffer. The allocation is implicitly released when the
// stage cb has finished execution. The start of the span will be a multiple
// of the given alignment.
struct wlr_vk_buffer_span vulkan_get_stage_span(
struct wlr_vk_stage_span *vulkan_get_stage_span(
struct wlr_vk_renderer *renderer, VkDeviceSize size,
VkDeviceSize alignment);
void vulkan_return_stage_span(struct wlr_vk_renderer *r, struct wlr_vk_stage_span *span);
// Tries to allocate a texture descriptor set. Will additionally
// return the pool it was allocated from when successful (for freeing it later).
@ -471,29 +473,32 @@ struct wlr_vk_descriptor_pool {
struct wl_list link; // wlr_vk_renderer.descriptor_pools
};
struct wlr_vk_allocation {
struct wlr_vk_stage_span {
struct wl_list link; // wlr_vk_renderer.stage.spans
// usage_link is a reference from the command buffer using the span.
// Separate from the main link to not mess up ordering.
struct wl_list usage_link; // wlr_vk_command_buffer.stage_spans
struct wlr_vk_shared_buffer *buffer;
VkDeviceSize start;
VkDeviceSize size;
bool free;
};
// List of suballocated staging buffers.
// Used to upload to/read from device local images.
struct wlr_vk_shared_buffer {
struct wl_list link; // wlr_vk_renderer.stage.buffers or wlr_vk_command_buffer.stage_buffers
struct wl_list link; // wlr_vk_renderer.stage.buffers
VkBuffer buffer;
VkDeviceMemory memory;
VkDeviceSize buf_size;
void *cpu_mapping;
struct wl_array allocs; // struct wlr_vk_allocation
};
// Suballocated range on a buffer.
struct wlr_vk_buffer_span {
struct wlr_vk_shared_buffer *buffer;
struct wlr_vk_allocation alloc;
VkDeviceSize active;
uint64_t unused_counter;
};
// Lookup table for a color transform
struct wlr_vk_color_transform {
struct wlr_addon addon; // owned by: wlr_vk_renderer

View file

@ -430,15 +430,6 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
free(render_wait);
struct wlr_vk_shared_buffer *stage_buf, *stage_buf_tmp;
wl_list_for_each_safe(stage_buf, stage_buf_tmp, &renderer->stage.buffers, link) {
if (stage_buf->allocs.size == 0) {
continue;
}
wl_list_remove(&stage_buf->link);
wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link);
}
if (!vulkan_sync_render_buffer(renderer, render_buffer, render_cb)) {
wlr_log(WLR_ERROR, "Failed to sync render buffer");
}
@ -815,14 +806,14 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
size_t bytes_per_block = 4 * sizeof(float);
size_t size = lut_3d->dim_len * lut_3d->dim_len * lut_3d->dim_len * bytes_per_block;
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer,
struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer,
size, bytes_per_block);
if (!span.buffer || span.alloc.size != size) {
if (span == NULL) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
goto fail_imageview;
}
char *map = (char*)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char*)span->buffer->cpu_mapping + span->start;
float *dst = (float*)map;
size_t dim_len = lut_3d->dim_len;
for (size_t b_index = 0; b_index < dim_len; b_index++) {
@ -840,19 +831,21 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
}
VkCommandBuffer cb = vulkan_record_stage_cb(renderer);
wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link);
vulkan_change_layout(cb, *image,
VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT);
VkBufferImageCopy copy = {
.bufferOffset = span.alloc.start,
.bufferOffset = span->start,
.imageExtent.width = lut_3d->dim_len,
.imageExtent.height = lut_3d->dim_len,
.imageExtent.depth = lut_3d->dim_len,
.imageSubresource.layerCount = 1,
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
};
vkCmdCopyBufferToImage(cb, span.buffer->buffer, *image,
vkCmdCopyBufferToImage(cb, span->buffer->buffer, *image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
vulkan_change_layout(cb, *image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,

View file

@ -39,7 +39,9 @@
// might still be a good idea.
static const VkDeviceSize min_stage_size = 1024 * 1024; // 1MB
static const VkDeviceSize special_stage_size = 16 * min_stage_size; // 16MB
static const VkDeviceSize max_stage_size = 256 * min_stage_size; // 256MB
static const uint64_t max_stage_unused_frames = 1024;
static const size_t start_descriptor_pool_size = 256u;
static bool default_debug = true;
@ -174,13 +176,21 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
if (!buffer) {
return;
}
wlr_log(WLR_DEBUG, "Destroying vk staging buffer of size %" PRIu64, buffer->buf_size);
if (buffer->allocs.size > 0) {
wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left",
buffer->allocs.size / sizeof(struct wlr_vk_allocation));
if (buffer->active > 0) {
wlr_log(WLR_ERROR, "shared_buffer_destroy: spans still in use");
}
struct wlr_vk_stage_span *span, *span_tmp;
wl_list_for_each_safe(span, span_tmp, &r->stage.spans, link) {
if (span->buffer != buffer) {
continue;
}
wl_list_remove(&span->usage_link);
wl_list_remove(&span->link);
free(span);
}
wl_array_release(&buffer->allocs);
if (buffer->cpu_mapping) {
vkUnmapMemory(r->dev->dev, buffer->memory);
buffer->cpu_mapping = NULL;
@ -196,44 +206,71 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
free(buffer);
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
void vulkan_return_stage_span(struct wlr_vk_renderer *r, struct wlr_vk_stage_span *span) {
span->buffer->active -= span->size;
span->free = true;
// Merge next free span into this one, if one exists
if (span->link.next != &r->stage.spans) {
struct wlr_vk_stage_span *next = wl_container_of(span->link.next, next, link);
if (next->free && next->buffer == span->buffer) {
span->size += next->size;
wl_list_remove(&next->link);
free(next);
}
}
// Merge this free span into the previous one, if one exists
if (span->link.prev != &r->stage.spans) {
struct wlr_vk_stage_span *prev = wl_container_of(span->link.prev, prev, link);
if (prev->free && prev->buffer == span->buffer) {
prev->size += span->size;
wl_list_remove(&span->link);
free(span);
}
}
}
struct wlr_vk_stage_span *vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
// try to find free span
// simple greedy allocation algorithm - should be enough for this usecase
// since all allocations are freed together after the frame
struct wlr_vk_shared_buffer *buf;
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
VkDeviceSize start = 0u;
if (buf->allocs.size > 0) {
const struct wlr_vk_allocation *allocs = buf->allocs.data;
size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation);
const struct wlr_vk_allocation *last = &allocs[allocs_len - 1];
start = last->start + last->size;
}
assert(start <= buf->buf_size);
// ensure the proposed start is a multiple of alignment
start += alignment - 1 - ((start + alignment - 1) % alignment);
if (buf->buf_size - start < size) {
struct wlr_vk_stage_span *span;
wl_list_for_each_reverse(span, &r->stage.spans, link) {
if (!span->free || span->size < size) {
continue;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
if (size <= special_stage_size && span->buffer->buf_size > special_stage_size) {
// Avoid accidentally holding on to big buffers
continue;
}
*a = (struct wlr_vk_allocation){
.start = start,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
span->free = false;
span->buffer->active += size;
if (span->size == size) {
// Perfect fit
return span;
}
// Cleave the span
struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span));
if (free_span == NULL) {
span->free = true;
span->buffer->active -= size;
return NULL;
}
*free_span = (struct wlr_vk_stage_span){
.buffer = span->buffer,
.size = span->size - size,
.start = span->start + size,
.free = true,
};
wl_list_init(&free_span->usage_link);
wl_list_insert(&span->link, &free_span->link);
span->size = size;
return span;
}
if (size > max_stage_size) {
@ -243,21 +280,28 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error_alloc;
}
// we didn't find a free buffer - create one
// size = clamp(max(size * 2, prev_size * 2), min_size, max_size)
VkDeviceSize bsize = size * 2;
bsize = bsize < min_stage_size ? min_stage_size : bsize;
if (!wl_list_empty(&r->stage.buffers)) {
struct wl_list *last_link = r->stage.buffers.prev;
struct wlr_vk_shared_buffer *prev = wl_container_of(
last_link, prev, link);
VkDeviceSize last_size = 2 * prev->buf_size;
bsize = bsize < last_size ? last_size : bsize;
}
// Pick the next bucket size. If the size is below our "special" threshold,
// double the last bucket size. Otherwise allocate the requested size
// directly.
VkDeviceSize bsize = min_stage_size;
struct wlr_vk_shared_buffer *buf;
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffers have reached max size");
bsize = max_stage_size;
if (size > special_stage_size) {
// The size is too big for our buckets, alloate directly
bsize = size;
} else {
bsize = min_stage_size;
// We start by picking the last bucket size * 2
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
if (buf->buf_size < special_stage_size && buf->buf_size * 2 > bsize) {
bsize = buf->buf_size * 2;
}
}
// If double the last bucket is not enough, keep doubling until we hit the
// size for dedicated allocations.
while (bsize < size && bsize < special_stage_size) {
bsize *= 2;
}
}
// create buffer
@ -266,6 +310,8 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
}
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
VkResult res;
VkBufferCreateInfo buf_info = {
@ -315,33 +361,41 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
span = malloc(sizeof(*span));
struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span));
if (span == NULL || free_span == NULL) {
free(span);
free(free_span);
goto error;
}
*free_span = (struct wlr_vk_stage_span){
.buffer = buf,
.start = size,
.size = bsize - size,
.free = true,
};
wl_list_init(&free_span->usage_link);
wl_list_insert(&r->stage.spans, &free_span->link);
wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize);
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
*a = (struct wlr_vk_allocation){
*span = (struct wlr_vk_stage_span){
.buffer = buf,
.start = 0,
.size = size,
.free = false,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
};
wl_list_init(&span->usage_link);
wl_list_insert(&r->stage.spans, &span->link);
buf->active = size;
wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize);
return span;
error:
shared_buffer_destroy(r, buf);
error_alloc:
return (struct wlr_vk_buffer_span) {
.buffer = NULL,
.alloc = (struct wlr_vk_allocation) {0, 0},
};
return NULL;
}
VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) {
@ -429,7 +483,7 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb,
.vk = vk_cb,
};
wl_list_init(&cb->destroy_textures);
wl_list_init(&cb->stage_buffers);
wl_list_init(&cb->stage_spans);
return true;
}
@ -463,12 +517,11 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
wlr_texture_destroy(&texture->wlr_texture);
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) {
buf->allocs.size = 0;
wl_list_remove(&buf->link);
wl_list_insert(&renderer->stage.buffers, &buf->link);
struct wlr_vk_stage_span *span, *span_tmp;
wl_list_for_each_safe(span, span_tmp, &cb->stage_spans, usage_link) {
wl_list_remove(&span->usage_link);
wl_list_init(&span->usage_link);
vulkan_return_stage_span(renderer, span);
}
if (cb->color_transform) {
@ -489,6 +542,18 @@ static struct wlr_vk_command_buffer *get_command_buffer(
return NULL;
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (buf->active > 0) {
buf->unused_counter = 0;
continue;
}
buf->unused_counter++;
if (buf->unused_counter > max_stage_unused_frames) {
shared_buffer_destroy(renderer, buf);
}
}
// Destroy textures for completed command buffers
for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) {
struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i];
@ -2414,6 +2479,7 @@ struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev
wlr_renderer_init(&renderer->wlr_renderer, &renderer_impl, WLR_BUFFER_CAP_DMABUF);
renderer->wlr_renderer.features.output_color_transform = true;
wl_list_init(&renderer->stage.buffers);
wl_list_init(&renderer->stage.spans);
wl_list_init(&renderer->foreign_textures);
wl_list_init(&renderer->textures);
wl_list_init(&renderer->descriptor_pools);

View file

@ -71,17 +71,17 @@ static bool write_pixels(struct wlr_vk_texture *texture,
}
// get staging buffer
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block);
if (!span.buffer || span.alloc.size != bsize) {
struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block);
if (span == NULL) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
free(copies);
return false;
}
char *map = (char*)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char*)span->buffer->cpu_mapping + span->start;
// upload data
uint32_t buf_off = span.alloc.start;
uint32_t buf_off = span->start;
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;
@ -130,6 +130,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
// will be executed before next frame
VkCommandBuffer cb = vulkan_record_stage_cb(renderer);
if (cb == VK_NULL_HANDLE) {
vulkan_return_stage_span(renderer, span);
free(copies);
return false;
}
@ -139,7 +140,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT);
vkCmdCopyBufferToImage(cb, span.buffer->buffer, texture->image,
vkCmdCopyBufferToImage(cb, span->buffer->buffer, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (uint32_t)rects_len, copies);
vulkan_change_layout(cb, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
@ -147,6 +148,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
texture->last_used_cb = renderer->stage.cb;
wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link);
free(copies);