Merge branch 'vulkan-stage-allocator' into 'master'

Draft: render/vulkan: Upgrade stage span allocator

See merge request wlroots/wlroots!4733
This commit is contained in:
Kenny Levinsen 2024-10-28 17:52:40 +00:00
commit c034ead5ac
4 changed files with 165 additions and 99 deletions

View file

@ -437,15 +437,6 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
free(render_wait);
struct wlr_vk_shared_buffer *stage_buf, *stage_buf_tmp;
wl_list_for_each_safe(stage_buf, stage_buf_tmp, &renderer->stage.buffers, link) {
if (stage_buf->allocs.size == 0) {
continue;
}
wl_list_remove(&stage_buf->link);
wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link);
}
if (!vulkan_sync_render_buffer(renderer, render_buffer, render_cb)) {
wlr_log(WLR_ERROR, "Failed to sync render buffer");
}
@ -816,14 +807,14 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
size_t bytes_per_block = 4 * sizeof(float);
size_t size = lut_3d->dim_len * lut_3d->dim_len * lut_3d->dim_len * bytes_per_block;
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer,
struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer,
size, bytes_per_block);
if (!span.buffer || span.alloc.size != size) {
if (span == NULL) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
goto fail_imageview;
}
char *map = (char*)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char*)span->buffer->cpu_mapping + span->start;
float *dst = (float*)map;
size_t dim_len = lut_3d->dim_len;
for (size_t b_index = 0; b_index < dim_len; b_index++) {
@ -841,19 +832,21 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer,
}
VkCommandBuffer cb = vulkan_record_stage_cb(renderer);
wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link);
vulkan_change_layout(cb, *image,
VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT);
VkBufferImageCopy copy = {
.bufferOffset = span.alloc.start,
.bufferOffset = span->start,
.imageExtent.width = lut_3d->dim_len,
.imageExtent.height = lut_3d->dim_len,
.imageExtent.depth = lut_3d->dim_len,
.imageSubresource.layerCount = 1,
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
};
vkCmdCopyBufferToImage(cb, span.buffer->buffer, *image,
vkCmdCopyBufferToImage(cb, span->buffer->buffer, *image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
vulkan_change_layout(cb, *image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,

View file

@ -39,7 +39,9 @@
// might still be a good idea.
static const VkDeviceSize min_stage_size = 1024 * 1024; // 1MB
static const VkDeviceSize special_stage_size = 16 * min_stage_size; // 16MB
static const VkDeviceSize max_stage_size = 256 * min_stage_size; // 256MB
static const uint64_t max_stage_unused_frames = 1024;
static const size_t start_descriptor_pool_size = 256u;
static bool default_debug = true;
@ -174,13 +176,21 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
if (!buffer) {
return;
}
wlr_log(WLR_DEBUG, "Destroying vk staging buffer of size %" PRIu64, buffer->buf_size);
if (buffer->allocs.size > 0) {
wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left",
buffer->allocs.size / sizeof(struct wlr_vk_allocation));
if (buffer->active > 0) {
wlr_log(WLR_ERROR, "shared_buffer_destroy: spans still in use");
}
struct wlr_vk_stage_span *span, *span_tmp;
wl_list_for_each_safe(span, span_tmp, &r->stage.spans, link) {
if (span->buffer != buffer) {
continue;
}
wl_list_remove(&span->usage_link);
wl_list_remove(&span->link);
free(span);
}
wl_array_release(&buffer->allocs);
if (buffer->cpu_mapping) {
vkUnmapMemory(r->dev->dev, buffer->memory);
buffer->cpu_mapping = NULL;
@ -196,44 +206,71 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
free(buffer);
}
struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
void vulkan_return_stage_span(struct wlr_vk_renderer *r, struct wlr_vk_stage_span *span) {
span->buffer->active -= span->size;
span->free = true;
// Merge next free span into this one, if one exists
if (span->link.next != &r->stage.spans) {
struct wlr_vk_stage_span *next = wl_container_of(span->link.next, next, link);
if (next->free && next->buffer == span->buffer) {
span->size += next->size;
wl_list_remove(&next->link);
free(next);
}
}
// Merge this free span into the previous one, if one exists
if (span->link.prev != &r->stage.spans) {
struct wlr_vk_stage_span *prev = wl_container_of(span->link.prev, prev, link);
if (prev->free && prev->buffer == span->buffer) {
prev->size += span->size;
wl_list_remove(&span->link);
free(span);
}
}
}
struct wlr_vk_stage_span *vulkan_get_stage_span(struct wlr_vk_renderer *r,
VkDeviceSize size, VkDeviceSize alignment) {
// try to find free span
// simple greedy allocation algorithm - should be enough for this usecase
// since all allocations are freed together after the frame
struct wlr_vk_shared_buffer *buf;
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
VkDeviceSize start = 0u;
if (buf->allocs.size > 0) {
const struct wlr_vk_allocation *allocs = buf->allocs.data;
size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation);
const struct wlr_vk_allocation *last = &allocs[allocs_len - 1];
start = last->start + last->size;
}
assert(start <= buf->buf_size);
// ensure the proposed start is a multiple of alignment
start += alignment - 1 - ((start + alignment - 1) % alignment);
if (buf->buf_size - start < size) {
struct wlr_vk_stage_span *span;
wl_list_for_each_reverse(span, &r->stage.spans, link) {
if (!span->free || span->size < size) {
continue;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
if (size <= special_stage_size && span->buffer->buf_size > special_stage_size) {
// Avoid accidentally holding on to big buffers
continue;
}
*a = (struct wlr_vk_allocation){
.start = start,
.size = size,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
span->free = false;
span->buffer->active += size;
if (span->size == size) {
// Perfect fit
return span;
}
// Cleave the span
struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span));
if (free_span == NULL) {
span->free = true;
span->buffer->active -= size;
return NULL;
}
*free_span = (struct wlr_vk_stage_span){
.buffer = span->buffer,
.size = span->size - size,
.start = span->start + size,
.free = true,
};
wl_list_init(&free_span->usage_link);
wl_list_insert(&span->link, &free_span->link);
span->size = size;
return span;
}
if (size > max_stage_size) {
@ -243,21 +280,28 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error_alloc;
}
// we didn't find a free buffer - create one
// size = clamp(max(size * 2, prev_size * 2), min_size, max_size)
VkDeviceSize bsize = size * 2;
bsize = bsize < min_stage_size ? min_stage_size : bsize;
if (!wl_list_empty(&r->stage.buffers)) {
struct wl_list *last_link = r->stage.buffers.prev;
struct wlr_vk_shared_buffer *prev = wl_container_of(
last_link, prev, link);
VkDeviceSize last_size = 2 * prev->buf_size;
bsize = bsize < last_size ? last_size : bsize;
}
// Pick the next bucket size. If the size is below our "special" threshold,
// double the last bucket size. Otherwise allocate the requested size
// directly.
VkDeviceSize bsize = min_stage_size;
struct wlr_vk_shared_buffer *buf;
if (bsize > max_stage_size) {
wlr_log(WLR_INFO, "vulkan stage buffers have reached max size");
bsize = max_stage_size;
if (size > special_stage_size) {
// The size is too big for our buckets, alloate directly
bsize = size;
} else {
bsize = min_stage_size;
// We start by picking the last bucket size * 2
wl_list_for_each_reverse(buf, &r->stage.buffers, link) {
if (buf->buf_size < special_stage_size && buf->buf_size * 2 > bsize) {
bsize = buf->buf_size * 2;
}
}
// If double the last bucket is not enough, keep doubling until we hit the
// size for dedicated allocations.
while (bsize < size && bsize < special_stage_size) {
bsize *= 2;
}
}
// create buffer
@ -266,6 +310,8 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
wlr_log_errno(WLR_ERROR, "Allocation failed");
goto error_alloc;
}
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
VkResult res;
VkBufferCreateInfo buf_info = {
@ -315,33 +361,41 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
span = malloc(sizeof(*span));
struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span));
if (span == NULL || free_span == NULL) {
free(span);
free(free_span);
goto error;
}
*free_span = (struct wlr_vk_stage_span){
.buffer = buf,
.start = size,
.size = bsize - size,
.free = true,
};
wl_list_init(&free_span->usage_link);
wl_list_insert(&r->stage.spans, &free_span->link);
wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize);
buf->buf_size = bsize;
wl_list_insert(&r->stage.buffers, &buf->link);
*a = (struct wlr_vk_allocation){
*span = (struct wlr_vk_stage_span){
.buffer = buf,
.start = 0,
.size = size,
.free = false,
};
return (struct wlr_vk_buffer_span) {
.buffer = buf,
.alloc = *a,
};
wl_list_init(&span->usage_link);
wl_list_insert(&r->stage.spans, &span->link);
buf->active = size;
wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize);
return span;
error:
shared_buffer_destroy(r, buf);
error_alloc:
return (struct wlr_vk_buffer_span) {
.buffer = NULL,
.alloc = (struct wlr_vk_allocation) {0, 0},
};
return NULL;
}
VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) {
@ -429,7 +483,7 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb,
.vk = vk_cb,
};
wl_list_init(&cb->destroy_textures);
wl_list_init(&cb->stage_buffers);
wl_list_init(&cb->stage_spans);
return true;
}
@ -463,12 +517,11 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb,
wlr_texture_destroy(&texture->wlr_texture);
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) {
buf->allocs.size = 0;
wl_list_remove(&buf->link);
wl_list_insert(&renderer->stage.buffers, &buf->link);
struct wlr_vk_stage_span *span, *span_tmp;
wl_list_for_each_safe(span, span_tmp, &cb->stage_spans, usage_link) {
wl_list_remove(&span->usage_link);
wl_list_init(&span->usage_link);
vulkan_return_stage_span(renderer, span);
}
if (cb->color_transform) {
@ -489,6 +542,18 @@ static struct wlr_vk_command_buffer *get_command_buffer(
return NULL;
}
struct wlr_vk_shared_buffer *buf, *buf_tmp;
wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) {
if (buf->active > 0) {
buf->unused_counter = 0;
continue;
}
buf->unused_counter++;
if (buf->unused_counter > max_stage_unused_frames) {
shared_buffer_destroy(renderer, buf);
}
}
// Destroy textures for completed command buffers
for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) {
struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i];
@ -2427,6 +2492,7 @@ struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev
wlr_renderer_init(&renderer->wlr_renderer, &renderer_impl, WLR_BUFFER_CAP_DMABUF);
renderer->wlr_renderer.features.output_color_transform = true;
wl_list_init(&renderer->stage.buffers);
wl_list_init(&renderer->stage.spans);
wl_list_init(&renderer->foreign_textures);
wl_list_init(&renderer->textures);
wl_list_init(&renderer->descriptor_pools);

View file

@ -71,17 +71,17 @@ static bool write_pixels(struct wlr_vk_texture *texture,
}
// get staging buffer
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block);
if (!span.buffer || span.alloc.size != bsize) {
struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block);
if (span == NULL) {
wlr_log(WLR_ERROR, "Failed to retrieve staging buffer");
free(copies);
return false;
}
char *map = (char*)span.buffer->cpu_mapping + span.alloc.start;
char *map = (char*)span->buffer->cpu_mapping + span->start;
// upload data
uint32_t buf_off = span.alloc.start;
uint32_t buf_off = span->start;
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;
@ -130,6 +130,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
// will be executed before next frame
VkCommandBuffer cb = vulkan_record_stage_cb(renderer);
if (cb == VK_NULL_HANDLE) {
vulkan_return_stage_span(renderer, span);
free(copies);
return false;
}
@ -139,7 +140,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT);
vkCmdCopyBufferToImage(cb, span.buffer->buffer, texture->image,
vkCmdCopyBufferToImage(cb, span->buffer->buffer, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (uint32_t)rects_len, copies);
vulkan_change_layout(cb, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
@ -147,6 +148,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
texture->last_used_cb = renderer->stage.cb;
wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link);
free(copies);