diff --git a/include/render/vulkan.h b/include/render/vulkan.h index 29403f01f..0543bb0a6 100644 --- a/include/render/vulkan.h +++ b/include/render/vulkan.h @@ -247,7 +247,7 @@ struct wlr_vk_command_buffer { // Textures to destroy after the command buffer completes struct wl_list destroy_textures; // wlr_vk_texture.destroy_link // Staging shared buffers to release after the command buffer completes - struct wl_list stage_buffers; // wlr_vk_shared_buffer.link + struct wl_list stage_spans; // wlr_vk_stage_span.link // Color transform to unref after the command buffer completes struct wlr_color_transform *color_transform; @@ -313,6 +313,7 @@ struct wlr_vk_renderer { struct wlr_vk_command_buffer *cb; uint64_t last_timeline_point; struct wl_list buffers; // wlr_vk_shared_buffer.link + struct wl_list spans; // wlr_vk_stage_span.link } stage; struct { @@ -387,9 +388,10 @@ struct wlr_vk_render_pass *vulkan_begin_render_pass(struct wlr_vk_renderer *rend // and used as staging buffer. The allocation is implicitly released when the // stage cb has finished execution. The start of the span will be a multiple // of the given alignment. -struct wlr_vk_buffer_span vulkan_get_stage_span( +struct wlr_vk_stage_span *vulkan_get_stage_span( struct wlr_vk_renderer *renderer, VkDeviceSize size, VkDeviceSize alignment); +void vulkan_return_stage_span(struct wlr_vk_renderer *r, struct wlr_vk_stage_span *span); // Tries to allocate a texture descriptor set. Will additionally // return the pool it was allocated from when successful (for freeing it later). @@ -471,29 +473,32 @@ struct wlr_vk_descriptor_pool { struct wl_list link; // wlr_vk_renderer.descriptor_pools }; -struct wlr_vk_allocation { +struct wlr_vk_stage_span { + struct wl_list link; // wlr_vk_renderer.stage.spans + + // usage_link is a reference from the command buffer using the span. + // Separate from the main link to not mess up ordering. + struct wl_list usage_link; // wlr_vk_command_buffer.stage_spans + struct wlr_vk_shared_buffer *buffer; + VkDeviceSize start; VkDeviceSize size; + bool free; }; // List of suballocated staging buffers. // Used to upload to/read from device local images. struct wlr_vk_shared_buffer { - struct wl_list link; // wlr_vk_renderer.stage.buffers or wlr_vk_command_buffer.stage_buffers + struct wl_list link; // wlr_vk_renderer.stage.buffers VkBuffer buffer; VkDeviceMemory memory; VkDeviceSize buf_size; void *cpu_mapping; - struct wl_array allocs; // struct wlr_vk_allocation -}; -// Suballocated range on a buffer. -struct wlr_vk_buffer_span { - struct wlr_vk_shared_buffer *buffer; - struct wlr_vk_allocation alloc; + VkDeviceSize active; + uint64_t unused_counter; }; - // Lookup table for a color transform struct wlr_vk_color_transform { struct wlr_addon addon; // owned by: wlr_vk_renderer diff --git a/render/vulkan/pass.c b/render/vulkan/pass.c index c879b71c8..a1b671258 100644 --- a/render/vulkan/pass.c +++ b/render/vulkan/pass.c @@ -430,15 +430,6 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) { free(render_wait); - struct wlr_vk_shared_buffer *stage_buf, *stage_buf_tmp; - wl_list_for_each_safe(stage_buf, stage_buf_tmp, &renderer->stage.buffers, link) { - if (stage_buf->allocs.size == 0) { - continue; - } - wl_list_remove(&stage_buf->link); - wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link); - } - if (!vulkan_sync_render_buffer(renderer, render_buffer, render_cb)) { wlr_log(WLR_ERROR, "Failed to sync render buffer"); } @@ -815,14 +806,14 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer, size_t bytes_per_block = 4 * sizeof(float); size_t size = lut_3d->dim_len * lut_3d->dim_len * lut_3d->dim_len * bytes_per_block; - struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, + struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer, size, bytes_per_block); - if (!span.buffer || span.alloc.size != size) { + if (span == NULL) { wlr_log(WLR_ERROR, "Failed to retrieve staging buffer"); goto fail_imageview; } - char *map = (char*)span.buffer->cpu_mapping + span.alloc.start; + char *map = (char*)span->buffer->cpu_mapping + span->start; float *dst = (float*)map; size_t dim_len = lut_3d->dim_len; for (size_t b_index = 0; b_index < dim_len; b_index++) { @@ -840,19 +831,21 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer, } VkCommandBuffer cb = vulkan_record_stage_cb(renderer); + wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link); + vulkan_change_layout(cb, *image, VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); VkBufferImageCopy copy = { - .bufferOffset = span.alloc.start, + .bufferOffset = span->start, .imageExtent.width = lut_3d->dim_len, .imageExtent.height = lut_3d->dim_len, .imageExtent.depth = lut_3d->dim_len, .imageSubresource.layerCount = 1, .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, }; - vkCmdCopyBufferToImage(cb, span.buffer->buffer, *image, + vkCmdCopyBufferToImage(cb, span->buffer->buffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); vulkan_change_layout(cb, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, diff --git a/render/vulkan/renderer.c b/render/vulkan/renderer.c index 32effb5a7..07ad8549c 100644 --- a/render/vulkan/renderer.c +++ b/render/vulkan/renderer.c @@ -39,7 +39,9 @@ // might still be a good idea. static const VkDeviceSize min_stage_size = 1024 * 1024; // 1MB +static const VkDeviceSize special_stage_size = 16 * min_stage_size; // 16MB static const VkDeviceSize max_stage_size = 256 * min_stage_size; // 256MB +static const uint64_t max_stage_unused_frames = 1024; static const size_t start_descriptor_pool_size = 256u; static bool default_debug = true; @@ -174,13 +176,21 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r, if (!buffer) { return; } + wlr_log(WLR_DEBUG, "Destroying vk staging buffer of size %" PRIu64, buffer->buf_size); - if (buffer->allocs.size > 0) { - wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left", - buffer->allocs.size / sizeof(struct wlr_vk_allocation)); + if (buffer->active > 0) { + wlr_log(WLR_ERROR, "shared_buffer_destroy: spans still in use"); + } + struct wlr_vk_stage_span *span, *span_tmp; + wl_list_for_each_safe(span, span_tmp, &r->stage.spans, link) { + if (span->buffer != buffer) { + continue; + } + wl_list_remove(&span->usage_link); + wl_list_remove(&span->link); + free(span); } - wl_array_release(&buffer->allocs); if (buffer->cpu_mapping) { vkUnmapMemory(r->dev->dev, buffer->memory); buffer->cpu_mapping = NULL; @@ -196,44 +206,71 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r, free(buffer); } -struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, +void vulkan_return_stage_span(struct wlr_vk_renderer *r, struct wlr_vk_stage_span *span) { + span->buffer->active -= span->size; + span->free = true; + + // Merge next free span into this one, if one exists + if (span->link.next != &r->stage.spans) { + struct wlr_vk_stage_span *next = wl_container_of(span->link.next, next, link); + if (next->free && next->buffer == span->buffer) { + span->size += next->size; + wl_list_remove(&next->link); + free(next); + } + } + + // Merge this free span into the previous one, if one exists + if (span->link.prev != &r->stage.spans) { + struct wlr_vk_stage_span *prev = wl_container_of(span->link.prev, prev, link); + if (prev->free && prev->buffer == span->buffer) { + prev->size += span->size; + wl_list_remove(&span->link); + free(span); + } + } +} + +struct wlr_vk_stage_span *vulkan_get_stage_span(struct wlr_vk_renderer *r, VkDeviceSize size, VkDeviceSize alignment) { // try to find free span // simple greedy allocation algorithm - should be enough for this usecase // since all allocations are freed together after the frame - struct wlr_vk_shared_buffer *buf; - wl_list_for_each_reverse(buf, &r->stage.buffers, link) { - VkDeviceSize start = 0u; - if (buf->allocs.size > 0) { - const struct wlr_vk_allocation *allocs = buf->allocs.data; - size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation); - const struct wlr_vk_allocation *last = &allocs[allocs_len - 1]; - start = last->start + last->size; - } - - assert(start <= buf->buf_size); - - // ensure the proposed start is a multiple of alignment - start += alignment - 1 - ((start + alignment - 1) % alignment); - - if (buf->buf_size - start < size) { + struct wlr_vk_stage_span *span; + wl_list_for_each_reverse(span, &r->stage.spans, link) { + if (!span->free || span->size < size) { continue; } - struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a)); - if (a == NULL) { - wlr_log_errno(WLR_ERROR, "Allocation failed"); - goto error_alloc; + if (size <= special_stage_size && span->buffer->buf_size > special_stage_size) { + // Avoid accidentally holding on to big buffers + continue; } - *a = (struct wlr_vk_allocation){ - .start = start, - .size = size, - }; - return (struct wlr_vk_buffer_span) { - .buffer = buf, - .alloc = *a, + span->free = false; + span->buffer->active += size; + if (span->size == size) { + // Perfect fit + return span; + } + + // Cleave the span + struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span)); + if (free_span == NULL) { + span->free = true; + span->buffer->active -= size; + return NULL; + } + *free_span = (struct wlr_vk_stage_span){ + .buffer = span->buffer, + .size = span->size - size, + .start = span->start + size, + .free = true, }; + wl_list_init(&free_span->usage_link); + wl_list_insert(&span->link, &free_span->link); + span->size = size; + return span; } if (size > max_stage_size) { @@ -243,21 +280,28 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, goto error_alloc; } - // we didn't find a free buffer - create one - // size = clamp(max(size * 2, prev_size * 2), min_size, max_size) - VkDeviceSize bsize = size * 2; - bsize = bsize < min_stage_size ? min_stage_size : bsize; - if (!wl_list_empty(&r->stage.buffers)) { - struct wl_list *last_link = r->stage.buffers.prev; - struct wlr_vk_shared_buffer *prev = wl_container_of( - last_link, prev, link); - VkDeviceSize last_size = 2 * prev->buf_size; - bsize = bsize < last_size ? last_size : bsize; - } + // Pick the next bucket size. If the size is below our "special" threshold, + // double the last bucket size. Otherwise allocate the requested size + // directly. + VkDeviceSize bsize = min_stage_size; + struct wlr_vk_shared_buffer *buf; - if (bsize > max_stage_size) { - wlr_log(WLR_INFO, "vulkan stage buffers have reached max size"); - bsize = max_stage_size; + if (size > special_stage_size) { + // The size is too big for our buckets, alloate directly + bsize = size; + } else { + bsize = min_stage_size; + // We start by picking the last bucket size * 2 + wl_list_for_each_reverse(buf, &r->stage.buffers, link) { + if (buf->buf_size < special_stage_size && buf->buf_size * 2 > bsize) { + bsize = buf->buf_size * 2; + } + } + // If double the last bucket is not enough, keep doubling until we hit the + // size for dedicated allocations. + while (bsize < size && bsize < special_stage_size) { + bsize *= 2; + } } // create buffer @@ -266,6 +310,8 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, wlr_log_errno(WLR_ERROR, "Allocation failed"); goto error_alloc; } + buf->buf_size = bsize; + wl_list_insert(&r->stage.buffers, &buf->link); VkResult res; VkBufferCreateInfo buf_info = { @@ -315,33 +361,41 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, goto error; } - struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a)); - if (a == NULL) { - wlr_log_errno(WLR_ERROR, "Allocation failed"); + span = malloc(sizeof(*span)); + struct wlr_vk_stage_span *free_span = malloc(sizeof(*free_span)); + if (span == NULL || free_span == NULL) { + free(span); + free(free_span); goto error; } + *free_span = (struct wlr_vk_stage_span){ + .buffer = buf, + .start = size, + .size = bsize - size, + .free = true, + }; + wl_list_init(&free_span->usage_link); + wl_list_insert(&r->stage.spans, &free_span->link); - wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize); - buf->buf_size = bsize; - wl_list_insert(&r->stage.buffers, &buf->link); - - *a = (struct wlr_vk_allocation){ + *span = (struct wlr_vk_stage_span){ + .buffer = buf, .start = 0, .size = size, + .free = false, }; - return (struct wlr_vk_buffer_span) { - .buffer = buf, - .alloc = *a, - }; + wl_list_init(&span->usage_link); + wl_list_insert(&r->stage.spans, &span->link); + buf->active = size; + + wlr_log(WLR_DEBUG, "Created new vk staging buffer of size %" PRIu64, bsize); + + return span; error: shared_buffer_destroy(r, buf); error_alloc: - return (struct wlr_vk_buffer_span) { - .buffer = NULL, - .alloc = (struct wlr_vk_allocation) {0, 0}, - }; + return NULL; } VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) { @@ -429,7 +483,7 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb, .vk = vk_cb, }; wl_list_init(&cb->destroy_textures); - wl_list_init(&cb->stage_buffers); + wl_list_init(&cb->stage_spans); return true; } @@ -463,12 +517,11 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb, wlr_texture_destroy(&texture->wlr_texture); } - struct wlr_vk_shared_buffer *buf, *buf_tmp; - wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) { - buf->allocs.size = 0; - - wl_list_remove(&buf->link); - wl_list_insert(&renderer->stage.buffers, &buf->link); + struct wlr_vk_stage_span *span, *span_tmp; + wl_list_for_each_safe(span, span_tmp, &cb->stage_spans, usage_link) { + wl_list_remove(&span->usage_link); + wl_list_init(&span->usage_link); + vulkan_return_stage_span(renderer, span); } if (cb->color_transform) { @@ -489,6 +542,18 @@ static struct wlr_vk_command_buffer *get_command_buffer( return NULL; } + struct wlr_vk_shared_buffer *buf, *buf_tmp; + wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) { + if (buf->active > 0) { + buf->unused_counter = 0; + continue; + } + buf->unused_counter++; + if (buf->unused_counter > max_stage_unused_frames) { + shared_buffer_destroy(renderer, buf); + } + } + // Destroy textures for completed command buffers for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) { struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i]; @@ -2414,6 +2479,7 @@ struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev wlr_renderer_init(&renderer->wlr_renderer, &renderer_impl, WLR_BUFFER_CAP_DMABUF); renderer->wlr_renderer.features.output_color_transform = true; wl_list_init(&renderer->stage.buffers); + wl_list_init(&renderer->stage.spans); wl_list_init(&renderer->foreign_textures); wl_list_init(&renderer->textures); wl_list_init(&renderer->descriptor_pools); diff --git a/render/vulkan/texture.c b/render/vulkan/texture.c index c40533133..d3635731e 100644 --- a/render/vulkan/texture.c +++ b/render/vulkan/texture.c @@ -71,17 +71,17 @@ static bool write_pixels(struct wlr_vk_texture *texture, } // get staging buffer - struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block); - if (!span.buffer || span.alloc.size != bsize) { + struct wlr_vk_stage_span *span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block); + if (span == NULL) { wlr_log(WLR_ERROR, "Failed to retrieve staging buffer"); free(copies); return false; } - char *map = (char*)span.buffer->cpu_mapping + span.alloc.start; + char *map = (char*)span->buffer->cpu_mapping + span->start; // upload data - uint32_t buf_off = span.alloc.start; + uint32_t buf_off = span->start; for (int i = 0; i < rects_len; i++) { pixman_box32_t rect = rects[i]; uint32_t width = rect.x2 - rect.x1; @@ -130,6 +130,7 @@ static bool write_pixels(struct wlr_vk_texture *texture, // will be executed before next frame VkCommandBuffer cb = vulkan_record_stage_cb(renderer); if (cb == VK_NULL_HANDLE) { + vulkan_return_stage_span(renderer, span); free(copies); return false; } @@ -139,7 +140,7 @@ static bool write_pixels(struct wlr_vk_texture *texture, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); - vkCmdCopyBufferToImage(cb, span.buffer->buffer, texture->image, + vkCmdCopyBufferToImage(cb, span->buffer->buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (uint32_t)rects_len, copies); vulkan_change_layout(cb, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -147,6 +148,7 @@ static bool write_pixels(struct wlr_vk_texture *texture, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); texture->last_used_cb = renderer->stage.cb; + wl_list_insert(&renderer->stage.cb->stage_spans, &span->usage_link); free(copies);