diff --git a/include/render/vulkan.h b/include/render/vulkan.h index 021749c27..12a7ff173 100644 --- a/include/render/vulkan.h +++ b/include/render/vulkan.h @@ -284,8 +284,6 @@ struct wlr_vk_command_buffer { uint64_t timeline_point; // Textures to destroy after the command buffer completes struct wl_list destroy_textures; // wlr_vk_texture.destroy_link - // Staging shared buffers to release after the command buffer completes - struct wl_list stage_buffers; // wlr_vk_shared_buffer.link // Color transform to unref after the command buffer completes struct wlr_color_transform *color_transform; @@ -352,7 +350,7 @@ struct wlr_vk_renderer { struct { struct wlr_vk_command_buffer *cb; uint64_t last_timeline_point; - struct wl_list buffers; // wlr_vk_shared_buffer.link + struct wl_list buffers; // wlr_vk_stage_buffer.link } stage; struct { @@ -453,14 +451,27 @@ struct wlr_vk_render_pass { struct wlr_vk_render_pass *vulkan_begin_render_pass(struct wlr_vk_renderer *renderer, struct wlr_vk_render_buffer *buffer, const struct wlr_buffer_pass_options *options); -// Suballocates a buffer span with the given size that can be mapped -// and used as staging buffer. The allocation is implicitly released when the -// stage cb has finished execution. The start of the span will be a multiple -// of the given alignment. +// Suballocates a buffer span with the given size from the staging ring buffer +// that is mapped for CPU access. vulkan_stage_mark_submit must be called after +// allocations are made to mark the timeline point after which the allocations +// will be released. The start of the span will be a multiple of alignment. struct wlr_vk_buffer_span vulkan_get_stage_span( struct wlr_vk_renderer *renderer, VkDeviceSize size, VkDeviceSize alignment); +// Returns unused bytes at the end of a buffer span back to the ring buffer. +// This allows the caller to allocate for worst-case consumption and return the +// unused remainder. This must not be called after vulkan_stage_mark_submit, +// and only works for the last made allocation. +void vulkan_return_stage_span(struct wlr_vk_buffer_span *span, + VkDeviceSize return_size); + +// Records a watermark on all staging buffers with new allocations with the +// specified timeline point. Once the timeline point is passed, the span will +// be reclaimed by vulkan_stage_buffer_reclaim. +void vulkan_stage_mark_submit(struct wlr_vk_renderer *renderer, + uint64_t timeline_point); + // Tries to allocate a texture descriptor set. Will additionally // return the pool it was allocated from when successful (for freeing it later). struct wlr_vk_descriptor_pool *vulkan_alloc_texture_ds( @@ -544,29 +555,45 @@ struct wlr_vk_descriptor_pool { struct wl_list link; // wlr_vk_renderer.descriptor_pools }; -struct wlr_vk_allocation { - VkDeviceSize start; - VkDeviceSize size; +struct wlr_vk_stage_watermark { + VkDeviceSize head; + uint64_t timeline_point; }; -// List of suballocated staging buffers. -// Used to upload to/read from device local images. -struct wlr_vk_shared_buffer { - struct wl_list link; // wlr_vk_renderer.stage.buffers or wlr_vk_command_buffer.stage_buffers +// Ring buffer for staging transfers +struct wlr_vk_stage_buffer { + struct wl_list link; // wlr_vk_renderer.stage.buffers + bool active; VkBuffer buffer; VkDeviceMemory memory; VkDeviceSize buf_size; void *cpu_mapping; - struct wl_array allocs; // struct wlr_vk_allocation - int64_t last_used_ms; + + VkDeviceSize head; + VkDeviceSize tail; + + struct wl_array watermarks; // struct wlr_vk_stage_watermark + VkDeviceSize peak_utilization; + int underutil_count; }; -// Suballocated range on a buffer. +// Suballocated range on a staging ring buffer. struct wlr_vk_buffer_span { - struct wlr_vk_shared_buffer *buffer; - struct wlr_vk_allocation alloc; + struct wlr_vk_stage_buffer *buffer; + VkDeviceSize offset; + VkDeviceSize size; }; +// Suballocate a span of size bytes from a staging ring buffer, with the +// returned offset rounded up to the given alignment. Returns the byte offset +// of the allocation, or (VkDeviceSize)-1 if the buffer is too full to fit it. +VkDeviceSize vulkan_stage_buffer_alloc(struct wlr_vk_stage_buffer *buf, + VkDeviceSize size, VkDeviceSize alignment); + +// Free all allocations covered by watermarks whose timeline point has been +// reached. Returns true if the buffer is now fully drained. +bool vulkan_stage_buffer_reclaim(struct wlr_vk_stage_buffer *buf, + uint64_t current_point); // Prepared form for a color transform struct wlr_vk_color_transform { diff --git a/render/vulkan/pass.c b/render/vulkan/pass.c index 01e8fbd7a..2dca9d0f3 100644 --- a/render/vulkan/pass.c +++ b/render/vulkan/pass.c @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include @@ -285,6 +287,20 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) { int clip_rects_len; const pixman_box32_t *clip_rects = pixman_region32_rectangles( clip, &clip_rects_len); + + float identity[4] = { 0.0f, 0.0f, 1.0f, 1.0f }; + struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, + sizeof(identity), sizeof(identity)); + if (!span.buffer) { + pass->failed = true; + goto error; + } + + memcpy((char *)span.buffer->cpu_mapping + span.offset, identity, sizeof(identity)); + + VkDeviceSize vb_offset = span.offset; + vkCmdBindVertexBuffers(render_cb->vk, 0, 1, &span.buffer->buffer, &vb_offset); + for (int i = 0; i < clip_rects_len; i++) { VkRect2D rect; convert_pixman_box_to_vk_rect(&clip_rects[i], &rect); @@ -595,14 +611,7 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) { free(render_wait); - struct wlr_vk_shared_buffer *stage_buf, *stage_buf_tmp; - wl_list_for_each_safe(stage_buf, stage_buf_tmp, &renderer->stage.buffers, link) { - if (stage_buf->allocs.size == 0) { - continue; - } - wl_list_remove(&stage_buf->link); - wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link); - } + vulkan_stage_mark_submit(renderer, render_timeline_point); if (!vulkan_sync_render_pass_release(renderer, pass)) { wlr_log(WLR_ERROR, "Failed to sync render buffer"); @@ -663,20 +672,6 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass, int clip_rects_len; const pixman_box32_t *clip_rects = pixman_region32_rectangles(&clip, &clip_rects_len); - // Record regions possibly updated for use in second subpass - for (int i = 0; i < clip_rects_len; i++) { - struct wlr_box clip_box = { - .x = clip_rects[i].x1, - .y = clip_rects[i].y1, - .width = clip_rects[i].x2 - clip_rects[i].x1, - .height = clip_rects[i].y2 - clip_rects[i].y1, - }; - struct wlr_box intersection; - if (!wlr_box_intersection(&intersection, &options->box, &clip_box)) { - continue; - } - render_pass_mark_box_updated(pass, &intersection); - } struct wlr_box box; wlr_render_rect_options_get_box(options, pass->render_buffer->wlr_buffer, &box); @@ -699,6 +694,45 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass, break; } + if (clip_rects_len == 0) { + break; + } + + const VkDeviceSize instance_size = 4 * sizeof(float); + struct wlr_vk_buffer_span span = vulkan_get_stage_span(pass->renderer, + clip_rects_len * instance_size, 16); + if (!span.buffer) { + pass->failed = true; + break; + } + float *instance_data = (float *)((char *)span.buffer->cpu_mapping + span.offset); + int instance_count = 0; + for (int i = 0; i < clip_rects_len; i++) { + struct wlr_box clip_box = { + .x = clip_rects[i].x1, + .y = clip_rects[i].y1, + .width = clip_rects[i].x2 - clip_rects[i].x1, + .height = clip_rects[i].y2 - clip_rects[i].y1, + }; + struct wlr_box intersection; + if (!wlr_box_intersection(&intersection, &box, &clip_box)) { + continue; + } + render_pass_mark_box_updated(pass, &intersection); + instance_data[instance_count * 4 + 0] = (float)(intersection.x - box.x) / box.width; + instance_data[instance_count * 4 + 1] = (float)(intersection.y - box.y) / box.height; + instance_data[instance_count * 4 + 2] = (float)intersection.width / box.width; + instance_data[instance_count * 4 + 3] = (float)intersection.height / box.height; + instance_count++; + } + if (instance_count < clip_rects_len) { + vulkan_return_stage_span(&span, + (clip_rects_len - instance_count) * instance_size); + if (instance_count == 0) { + break; + } + } + struct wlr_vk_vert_pcr_data vert_pcr_data = { .uv_off = { 0, 0 }, .uv_size = { 1, 1 }, @@ -712,12 +746,17 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass, VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(vert_pcr_data), sizeof(float) * 4, linear_color); - for (int i = 0; i < clip_rects_len; i++) { - VkRect2D rect; - convert_pixman_box_to_vk_rect(&clip_rects[i], &rect); - vkCmdSetScissor(cb, 0, 1, &rect); - vkCmdDraw(cb, 4, 1, 0, 0); - } + VkDeviceSize vb_offset = span.offset; + vkCmdBindVertexBuffers(cb, 0, 1, &span.buffer->buffer, &vb_offset); + + VkRect2D full_scissor = { + .extent = { + .width = pass->render_buffer->wlr_buffer->width, + .height = pass->render_buffer->wlr_buffer->height, + }, + }; + vkCmdSetScissor(cb, 0, 1, &full_scissor); + vkCmdDraw(cb, 4, instance_count, 0, 0); break; case WLR_RENDER_BLEND_MODE_NONE:; VkClearAttachment clear_att = { @@ -734,6 +773,18 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass, .layerCount = 1, }; for (int i = 0; i < clip_rects_len; i++) { + struct wlr_box clip_box = { + .x = clip_rects[i].x1, + .y = clip_rects[i].y1, + .width = clip_rects[i].x2 - clip_rects[i].x1, + .height = clip_rects[i].y2 - clip_rects[i].y1, + }; + struct wlr_box intersection; + if (!wlr_box_intersection(&intersection, &options->box, &clip_box)) { + continue; + } + render_pass_mark_box_updated(pass, &intersection); + convert_pixman_box_to_vk_rect(&clip_rects[i], &clear_rect.rect); vkCmdClearAttachments(cb, 1, &clear_att, 1, &clear_rect); } @@ -895,12 +946,23 @@ static void render_pass_add_texture(struct wlr_render_pass *wlr_pass, int clip_rects_len; const pixman_box32_t *clip_rects = pixman_region32_rectangles(&clip, &clip_rects_len); - for (int i = 0; i < clip_rects_len; i++) { - VkRect2D rect; - convert_pixman_box_to_vk_rect(&clip_rects[i], &rect); - vkCmdSetScissor(cb, 0, 1, &rect); - vkCmdDraw(cb, 4, 1, 0, 0); + if (clip_rects_len == 0) { + goto out; + } + + const VkDeviceSize instance_size = 4 * sizeof(float); + struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, + clip_rects_len * instance_size, 16); + if (!span.buffer) { + pass->failed = true; + goto out; + } + float *instance_data = (float *)((char *)span.buffer->cpu_mapping + span.offset); + int instance_count = 0; + enum wl_output_transform inv_transform = + wlr_output_transform_invert(options->transform); + for (int i = 0; i < clip_rects_len; i++) { struct wlr_box clip_box = { .x = clip_rects[i].x1, .y = clip_rects[i].y1, @@ -912,8 +974,44 @@ static void render_pass_add_texture(struct wlr_render_pass *wlr_pass, continue; } render_pass_mark_box_updated(pass, &intersection); + + struct wlr_fbox norm = { + .x = (double)(intersection.x - dst_box.x) / dst_box.width, + .y = (double)(intersection.y - dst_box.y) / dst_box.height, + .width = (double)intersection.width / dst_box.width, + .height = (double)intersection.height / dst_box.height, + }; + + if (options->transform != WL_OUTPUT_TRANSFORM_NORMAL) { + wlr_fbox_transform(&norm, &norm, inv_transform, 1.0, 1.0); + } + + instance_data[instance_count * 4 + 0] = (float)norm.x; + instance_data[instance_count * 4 + 1] = (float)norm.y; + instance_data[instance_count * 4 + 2] = (float)norm.width; + instance_data[instance_count * 4 + 3] = (float)norm.height; + instance_count++; + } + if (instance_count < clip_rects_len) { + vulkan_return_stage_span(&span, + (clip_rects_len - instance_count) * instance_size); } + if (instance_count > 0) { + VkDeviceSize vb_offset = span.offset; + vkCmdBindVertexBuffers(cb, 0, 1, &span.buffer->buffer, &vb_offset); + + VkRect2D full_scissor = { + .extent = { + .width = pass->render_buffer->wlr_buffer->width, + .height = pass->render_buffer->wlr_buffer->height, + }, + }; + vkCmdSetScissor(cb, 0, 1, &full_scissor); + vkCmdDraw(cb, 4, instance_count, 0, 0); + } + +out: texture->last_used_cb = pass->command_buffer; pixman_region32_fini(&clip); @@ -1056,13 +1154,13 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer, size_t size = dim_len * dim_len * dim_len * bytes_per_block; struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, size, bytes_per_block); - if (!span.buffer || span.alloc.size != size) { + if (!span.buffer || span.size != size) { wlr_log(WLR_ERROR, "Failed to retrieve staging buffer"); goto fail_imageview; } float sample_range = 1.0f / (dim_len - 1); - char *map = (char *)span.buffer->cpu_mapping + span.alloc.start; + char *map = (char *)span.buffer->cpu_mapping + span.offset; float *dst = (float *)map; for (size_t b_index = 0; b_index < dim_len; b_index++) { for (size_t g_index = 0; g_index < dim_len; g_index++) { @@ -1092,7 +1190,7 @@ static bool create_3d_lut_image(struct wlr_vk_renderer *renderer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); VkBufferImageCopy copy = { - .bufferOffset = span.alloc.start, + .bufferOffset = span.offset, .imageExtent.width = dim_len, .imageExtent.height = dim_len, .imageExtent.depth = dim_len, diff --git a/render/vulkan/renderer.c b/render/vulkan/renderer.c index 434ab4769..38e8ac9f4 100644 --- a/render/vulkan/renderer.c +++ b/render/vulkan/renderer.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -8,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -26,11 +26,9 @@ #include "render/vulkan/shaders/texture.frag.h" #include "render/vulkan/shaders/quad.frag.h" #include "render/vulkan/shaders/output.frag.h" -#include "types/wlr_buffer.h" -#include "util/time.h" +#include "util/array.h" // TODO: -// - simplify stage allocation, don't track allocations but use ringbuffer-like // - use a pipeline cache (not sure when to save though, after every pipeline // creation?) // - create pipelines as derivatives of each other @@ -187,18 +185,13 @@ static void destroy_render_format_setup(struct wlr_vk_renderer *renderer, free(setup); } -static void shared_buffer_destroy(struct wlr_vk_renderer *r, - struct wlr_vk_shared_buffer *buffer) { +static void stage_buffer_destroy(struct wlr_vk_renderer *r, + struct wlr_vk_stage_buffer *buffer) { if (!buffer) { return; } - if (buffer->allocs.size > 0) { - wlr_log(WLR_ERROR, "shared_buffer_finish: %zu allocations left", - buffer->allocs.size / sizeof(struct wlr_vk_allocation)); - } - - wl_array_release(&buffer->allocs); + wl_array_release(&buffer->watermarks); if (buffer->cpu_mapping) { vkUnmapMemory(r->dev->dev, buffer->memory); buffer->cpu_mapping = NULL; @@ -214,75 +207,12 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r, free(buffer); } -struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, - VkDeviceSize size, VkDeviceSize alignment) { - // try to find free span - // simple greedy allocation algorithm - should be enough for this usecase - // since all allocations are freed together after the frame - struct wlr_vk_shared_buffer *buf; - wl_list_for_each_reverse(buf, &r->stage.buffers, link) { - VkDeviceSize start = 0u; - if (buf->allocs.size > 0) { - const struct wlr_vk_allocation *allocs = buf->allocs.data; - size_t allocs_len = buf->allocs.size / sizeof(struct wlr_vk_allocation); - const struct wlr_vk_allocation *last = &allocs[allocs_len - 1]; - start = last->start + last->size; - } - - assert(start <= buf->buf_size); - - // ensure the proposed start is a multiple of alignment - start += alignment - 1 - ((start + alignment - 1) % alignment); - - if (buf->buf_size - start < size) { - continue; - } - - struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a)); - if (a == NULL) { - wlr_log_errno(WLR_ERROR, "Allocation failed"); - goto error_alloc; - } - - *a = (struct wlr_vk_allocation){ - .start = start, - .size = size, - }; - return (struct wlr_vk_buffer_span) { - .buffer = buf, - .alloc = *a, - }; - } - - if (size > max_stage_size) { - wlr_log(WLR_ERROR, "cannot vulkan stage buffer: " - "requested size (%zu bytes) exceeds maximum (%zu bytes)", - (size_t)size, (size_t)max_stage_size); - goto error_alloc; - } - - // we didn't find a free buffer - create one - // size = clamp(max(size * 2, prev_size * 2), min_size, max_size) - VkDeviceSize bsize = size * 2; - bsize = bsize < min_stage_size ? min_stage_size : bsize; - if (!wl_list_empty(&r->stage.buffers)) { - struct wl_list *last_link = r->stage.buffers.prev; - struct wlr_vk_shared_buffer *prev = wl_container_of( - last_link, prev, link); - VkDeviceSize last_size = 2 * prev->buf_size; - bsize = bsize < last_size ? last_size : bsize; - } - - if (bsize > max_stage_size) { - wlr_log(WLR_INFO, "vulkan stage buffers have reached max size"); - bsize = max_stage_size; - } - - // create buffer - buf = calloc(1, sizeof(*buf)); +static struct wlr_vk_stage_buffer *stage_buffer_create( + struct wlr_vk_renderer *r, VkDeviceSize bsize) { + struct wlr_vk_stage_buffer *buf = calloc(1, sizeof(*buf)); if (!buf) { wlr_log_errno(WLR_ERROR, "Allocation failed"); - goto error_alloc; + return NULL; } wl_list_init(&buf->link); @@ -292,7 +222,8 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = bsize, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; res = vkCreateBuffer(r->dev->dev, &buf_info, NULL, &buf->buffer); @@ -319,7 +250,7 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, }; res = vkAllocateMemory(r->dev->dev, &mem_info, NULL, &buf->memory); if (res != VK_SUCCESS) { - wlr_vk_error("vkAllocatorMemory", res); + wlr_vk_error("vkAllocateMemory", res); goto error; } @@ -335,34 +266,209 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, goto error; } - struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a)); - if (a == NULL) { - wlr_log_errno(WLR_ERROR, "Allocation failed"); + buf->active = true; + buf->buf_size = bsize; + return buf; + +error: + stage_buffer_destroy(r, buf); + return NULL; +} + +// Returns true if the buffer is fully drained. +bool vulkan_stage_buffer_reclaim(struct wlr_vk_stage_buffer *buf, + uint64_t current_point) { + + // Update utilization metrics before cleaning + VkDeviceSize occupied = buf->head >= buf->tail + ? buf->head - buf->tail + : buf->buf_size - buf->tail + buf->head; + if (occupied > buf->peak_utilization) { + buf->peak_utilization = occupied; + } + + size_t completed = 0; + struct wlr_vk_stage_watermark *mark; + wl_array_for_each(mark, &buf->watermarks) { + if (mark->timeline_point > current_point) { + break; + } + buf->tail = mark->head; + completed++; + } + + if (completed > 0) { + completed *= sizeof(struct wlr_vk_stage_watermark); + if (completed == buf->watermarks.size) { + buf->watermarks.size = 0; + } else { + array_remove_at(&buf->watermarks, 0, completed); + } + } + + return buf->head == buf->tail; +} + +VkDeviceSize vulkan_stage_buffer_alloc(struct wlr_vk_stage_buffer *buf, + VkDeviceSize size, VkDeviceSize alignment) { + VkDeviceSize head = buf->head; + + // Round up to the next multiple of alignment + VkDeviceSize rem = head % alignment; + if (rem != 0) { + head += alignment - rem; + } + + VkDeviceSize end = head >= buf->tail ? buf->buf_size : buf->tail; + if (head + size < end) { + // Regular allocation head till end of available space + buf->head = head + size; + return head; + } else if (size < buf->tail && head >= buf->tail) { + // First allocation after wrap-around + buf->head = size; + return 0; + } + + return (VkDeviceSize)-1; +} + +struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r, + VkDeviceSize size, VkDeviceSize alignment) { + if (size > max_stage_size) { + wlr_log(WLR_ERROR, "cannot allocate stage buffer: " + "requested size (%zu bytes) exceeds maximum (%zu bytes)", + (size_t)size, (size_t)max_stage_size); goto error; } - buf->buf_size = bsize; - wl_list_insert(&r->stage.buffers, &buf->link); + // Try to reclaim and allocate from the active buffer + struct wlr_vk_stage_buffer *buf; + VkDeviceSize max_buf_size = min_stage_size; + wl_list_for_each(buf, &r->stage.buffers, link) { + if (!buf->active) { + continue; + } + VkDeviceSize offset = vulkan_stage_buffer_alloc(buf, size, alignment); + if (offset != (VkDeviceSize)-1) { + return (struct wlr_vk_buffer_span) { + .buffer = buf, + .offset = offset, + .size = size, + }; + } + if (buf->buf_size > max_buf_size) { + max_buf_size = buf->buf_size; + } + + // Buffer is full, retire it + buf->active = false; + } + + VkDeviceSize bsize = max_buf_size * 2; + while (size * 2 > bsize) { + bsize *= 2; + } + if (bsize > max_stage_size) { + wlr_log(WLR_INFO, "vulkan stage buffer has reached max size"); + bsize = max_stage_size; + } + + struct wlr_vk_stage_buffer *new_buf = stage_buffer_create(r, bsize); + if (new_buf == NULL) { + goto error; + } + + wl_list_insert(&r->stage.buffers, &new_buf->link); + + VkDeviceSize offset = vulkan_stage_buffer_alloc(new_buf, size, alignment); + assert(offset != (VkDeviceSize)-1); - *a = (struct wlr_vk_allocation){ - .start = 0, - .size = size, - }; return (struct wlr_vk_buffer_span) { - .buffer = buf, - .alloc = *a, + .buffer = new_buf, + .offset = offset, + .size = size, }; error: - shared_buffer_destroy(r, buf); - -error_alloc: return (struct wlr_vk_buffer_span) { .buffer = NULL, - .alloc = (struct wlr_vk_allocation) {0, 0}, + .offset = 0, + .size = 0, }; } +void vulkan_return_stage_span(struct wlr_vk_buffer_span *span, VkDeviceSize return_size) { + assert(return_size <= span->size); + if (span->buffer->head == span->offset + span->size) { + // If the current buffer head is our current buffer, move the head back + span->size -= return_size; + span->buffer->head = span->offset + span->size; + } +} + +void vulkan_stage_mark_submit(struct wlr_vk_renderer *renderer, + uint64_t timeline_point) { + struct wlr_vk_stage_buffer *buf; + wl_list_for_each(buf, &renderer->stage.buffers, link) { + if (buf->head == buf->tail) { + continue; + } + + struct wlr_vk_stage_watermark *mark = wl_array_add( + &buf->watermarks, sizeof(*mark)); + if (mark == NULL) { + wlr_log_errno(WLR_ERROR, "Allocation failed"); + continue; + } + + *mark = (struct wlr_vk_stage_watermark){ + .head = buf->head, + .timeline_point = timeline_point, + }; + } +} + +static void vulkan_stage_buffer_gc(struct wlr_vk_renderer *renderer, uint64_t current_point) { + struct wlr_vk_stage_buffer *buf, *buf_tmp; + wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) { + if (!vulkan_stage_buffer_reclaim(buf, current_point)) { + // There are active allocations on this buffer + continue; + } + if (!buf->active) { + stage_buffer_destroy(renderer, buf); + continue; + } + if (buf->buf_size < min_stage_size * 2) { + // We will neither shrink nor deallocate the first buffer + continue; + } + + // Note: We use 1/4th as the underutilization threshold, and when + // underutilized for 100 GC runs we cut the buffer size in half + if (buf->peak_utilization > buf->buf_size / 4) { + buf->underutil_count = 0; + } else { + buf->underutil_count++; + } + buf->peak_utilization = 0; + + if (buf->underutil_count < 100) { + continue; + } + + struct wlr_vk_stage_buffer *shrunk = stage_buffer_create(renderer, buf->buf_size / 2); + if (shrunk == NULL) { + // We'll just keep using the old buffer for now + continue; + } + + wl_list_insert(&renderer->stage.buffers, &shrunk->link); + stage_buffer_destroy(renderer, buf); + } +} + VkCommandBuffer vulkan_record_stage_cb(struct wlr_vk_renderer *renderer) { if (renderer->stage.cb == NULL) { renderer->stage.cb = vulkan_acquire_command_buffer(renderer); @@ -465,16 +571,21 @@ bool vulkan_submit_stage_wait(struct wlr_vk_renderer *renderer, int wait_sync_fi submit_info.pWaitDstStageMask = &wait_stage; } + vulkan_stage_mark_submit(renderer, timeline_point); + VkResult res = vkQueueSubmit(renderer->dev->queue, 1, &submit_info, VK_NULL_HANDLE); if (res != VK_SUCCESS) { wlr_vk_error("vkQueueSubmit", res); return false; } - // NOTE: don't release stage allocations here since they may still be - // used for reading. Will be done next frame. + if (!vulkan_wait_command_buffer(cb, renderer)) { + return false; + } - return vulkan_wait_command_buffer(cb, renderer); + // We did a blocking wait so this is now the current point + vulkan_stage_buffer_gc(renderer, timeline_point); + return true; } struct wlr_vk_format_props *vulkan_format_props_from_drm( @@ -508,7 +619,6 @@ static bool init_command_buffer(struct wlr_vk_command_buffer *cb, .vk = vk_cb, }; wl_list_init(&cb->destroy_textures); - wl_list_init(&cb->stage_buffers); return true; } @@ -534,7 +644,7 @@ bool vulkan_wait_command_buffer(struct wlr_vk_command_buffer *cb, } static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb, - struct wlr_vk_renderer *renderer, int64_t now) { + struct wlr_vk_renderer *renderer) { struct wlr_vk_texture *texture, *texture_tmp; wl_list_for_each_safe(texture, texture_tmp, &cb->destroy_textures, destroy_link) { wl_list_remove(&texture->destroy_link); @@ -542,15 +652,6 @@ static void release_command_buffer_resources(struct wlr_vk_command_buffer *cb, wlr_texture_destroy(&texture->wlr_texture); } - struct wlr_vk_shared_buffer *buf, *buf_tmp; - wl_list_for_each_safe(buf, buf_tmp, &cb->stage_buffers, link) { - buf->allocs.size = 0; - buf->last_used_ms = now; - - wl_list_remove(&buf->link); - wl_list_insert(&renderer->stage.buffers, &buf->link); - } - if (cb->color_transform) { wlr_color_transform_unref(cb->color_transform); cb->color_transform = NULL; @@ -569,22 +670,14 @@ static struct wlr_vk_command_buffer *get_command_buffer( return NULL; } - - // Garbage collect any buffers that have remained unused for too long - int64_t now = get_current_time_msec(); - struct wlr_vk_shared_buffer *buf, *buf_tmp; - wl_list_for_each_safe(buf, buf_tmp, &renderer->stage.buffers, link) { - if (buf->allocs.size == 0 && buf->last_used_ms + 10000 < now) { - shared_buffer_destroy(renderer, buf); - } - } + vulkan_stage_buffer_gc(renderer, current_point); // Destroy textures for completed command buffers for (size_t i = 0; i < VULKAN_COMMAND_BUFFERS_CAP; i++) { struct wlr_vk_command_buffer *cb = &renderer->command_buffers[i]; if (cb->vk != VK_NULL_HANDLE && !cb->recording && cb->timeline_point <= current_point) { - release_command_buffer_resources(cb, renderer, now); + release_command_buffer_resources(cb, renderer); } } @@ -1187,7 +1280,7 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) { if (cb->vk == VK_NULL_HANDLE) { continue; } - release_command_buffer_resources(cb, renderer, 0); + release_command_buffer_resources(cb, renderer); if (cb->binary_semaphore != VK_NULL_HANDLE) { vkDestroySemaphore(renderer->dev->dev, cb->binary_semaphore, NULL); } @@ -1199,9 +1292,9 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) { } // stage.cb automatically freed with command pool - struct wlr_vk_shared_buffer *buf, *tmp_buf; + struct wlr_vk_stage_buffer *buf, *tmp_buf; wl_list_for_each_safe(buf, tmp_buf, &renderer->stage.buffers, link) { - shared_buffer_destroy(renderer, buf); + stage_buffer_destroy(renderer, buf); } struct wlr_vk_texture *tex, *tex_tmp; @@ -1838,6 +1931,25 @@ static bool pipeline_key_equals(const struct wlr_vk_pipeline_key *a, return true; } +static const VkVertexInputBindingDescription instance_vert_binding = { + .binding = 0, + .stride = sizeof(float) * 4, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE, +}; +static const VkVertexInputAttributeDescription instance_vert_attr = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 0, +}; +static const VkPipelineVertexInputStateCreateInfo instance_vert_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &instance_vert_binding, + .vertexAttributeDescriptionCount = 1, + .pVertexAttributeDescriptions = &instance_vert_attr, +}; + // Initializes the pipeline for rendering textures and using the given // VkRenderPass and VkPipelineLayout. struct wlr_vk_pipeline *setup_get_or_create_pipeline( @@ -1969,10 +2081,6 @@ struct wlr_vk_pipeline *setup_get_or_create_pipeline( .dynamicStateCount = sizeof(dyn_states) / sizeof(dyn_states[0]), }; - VkPipelineVertexInputStateCreateInfo vertex = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - }; - VkGraphicsPipelineCreateInfo pinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .layout = pipeline_layout->vk, @@ -1987,7 +2095,7 @@ struct wlr_vk_pipeline *setup_get_or_create_pipeline( .pMultisampleState = &multisample, .pViewportState = &viewport, .pDynamicState = &dynamic, - .pVertexInputState = &vertex, + .pVertexInputState = &instance_vert_input, }; VkPipelineCache cache = VK_NULL_HANDLE; @@ -2086,10 +2194,6 @@ static bool init_blend_to_output_pipeline(struct wlr_vk_renderer *renderer, .dynamicStateCount = sizeof(dyn_states) / sizeof(dyn_states[0]), }; - VkPipelineVertexInputStateCreateInfo vertex = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - }; - VkGraphicsPipelineCreateInfo pinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = NULL, @@ -2104,7 +2208,7 @@ static bool init_blend_to_output_pipeline(struct wlr_vk_renderer *renderer, .pMultisampleState = &multisample, .pViewportState = &viewport, .pDynamicState = &dynamic, - .pVertexInputState = &vertex, + .pVertexInputState = &instance_vert_input, }; VkPipelineCache cache = VK_NULL_HANDLE; diff --git a/render/vulkan/shaders/common.vert b/render/vulkan/shaders/common.vert index f1579790d..82ea9658c 100644 --- a/render/vulkan/shaders/common.vert +++ b/render/vulkan/shaders/common.vert @@ -8,11 +8,14 @@ layout(push_constant, row_major) uniform UBO { vec2 uv_size; } data; +layout(location = 0) in vec4 inst_rect; + layout(location = 0) out vec2 uv; void main() { vec2 pos = vec2(float((gl_VertexIndex + 1) & 2) * 0.5f, float(gl_VertexIndex & 2) * 0.5f); + pos = inst_rect.xy + pos * inst_rect.zw; uv = data.uv_offset + pos * data.uv_size; gl_Position = data.proj * vec4(pos, 0.0, 1.0); } diff --git a/render/vulkan/texture.c b/render/vulkan/texture.c index c6365c90b..9298de804 100644 --- a/render/vulkan/texture.c +++ b/render/vulkan/texture.c @@ -72,16 +72,16 @@ static bool write_pixels(struct wlr_vk_texture *texture, // get staging buffer struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer, bsize, format_info->bytes_per_block); - if (!span.buffer || span.alloc.size != bsize) { + if (!span.buffer || span.size != bsize) { wlr_log(WLR_ERROR, "Failed to retrieve staging buffer"); free(copies); return false; } - char *map = (char*)span.buffer->cpu_mapping + span.alloc.start; + char *map = (char*)span.buffer->cpu_mapping + span.offset; // upload data - uint32_t buf_off = span.alloc.start; + uint32_t buf_off = span.offset; for (int i = 0; i < rects_len; i++) { pixman_box32_t rect = rects[i]; uint32_t width = rect.x2 - rect.x1; diff --git a/test/meson.build b/test/meson.build index f51b2c02c..9c622e3ef 100644 --- a/test/meson.build +++ b/test/meson.build @@ -1,8 +1,30 @@ +# Used to test internal symbols +lib_wlr_internal = static_library( + versioned_name + '-internal', + objects: lib_wlr.extract_all_objects(recursive: false), + dependencies: wlr_deps, + include_directories: [wlr_inc], + install: false, +) + test( 'box', executable('test-box', 'test_box.c', dependencies: wlroots), ) +if features.get('vulkan-renderer') + test( + 'vulkan_stage_buffer', + executable( + 'test-vulkan-stage-buffer', + 'test_vulkan_stage_buffer.c', + link_with: lib_wlr_internal, + dependencies: wlr_deps, + include_directories: wlr_inc, + ), + ) +endif + benchmark( 'scene', executable('bench-scene', 'bench_scene.c', dependencies: wlroots), diff --git a/test/test_vulkan_stage_buffer.c b/test/test_vulkan_stage_buffer.c new file mode 100644 index 000000000..cceefa8de --- /dev/null +++ b/test/test_vulkan_stage_buffer.c @@ -0,0 +1,234 @@ +#include +#include +#include + +#include "render/vulkan.h" + +#define BUF_SIZE 1024 +#define ALLOC_FAIL ((VkDeviceSize)-1) + +static void stage_buffer_init(struct wlr_vk_stage_buffer *buf) { + *buf = (struct wlr_vk_stage_buffer){ + .buf_size = BUF_SIZE, + }; + wl_array_init(&buf->watermarks); +} + +static void stage_buffer_finish(struct wlr_vk_stage_buffer *buf) { + wl_array_release(&buf->watermarks); +} + +static void push_watermark(struct wlr_vk_stage_buffer *buf, + uint64_t timeline_point) { + struct wlr_vk_stage_watermark *mark = wl_array_add( + &buf->watermarks, sizeof(*mark)); + assert(mark != NULL); + *mark = (struct wlr_vk_stage_watermark){ + .head = buf->head, + .timeline_point = timeline_point, + }; +} + +static size_t watermark_count(const struct wlr_vk_stage_buffer *buf) { + return buf->watermarks.size / sizeof(struct wlr_vk_stage_watermark); +} + +static void test_alloc_simple(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 0); + assert(buf.head == 100); + assert(vulkan_stage_buffer_alloc(&buf, 200, 1) == 100); + assert(buf.head == 300); + assert(buf.tail == 0); + + stage_buffer_finish(&buf); +} + +static void test_alloc_alignment(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(vulkan_stage_buffer_alloc(&buf, 7, 1) == 0); + assert(buf.head == 7); + + assert(vulkan_stage_buffer_alloc(&buf, 4, 16) == 16); + assert(buf.head == 20); + + assert(vulkan_stage_buffer_alloc(&buf, 8, 8) == 24); + assert(buf.head == 32); + + stage_buffer_finish(&buf); +} + +static void test_alloc_limit(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + // We do not allow allocations that would cause head to equal tail + assert(vulkan_stage_buffer_alloc(&buf, BUF_SIZE, 1) == ALLOC_FAIL); + assert(buf.head == 0); + + assert(vulkan_stage_buffer_alloc(&buf, BUF_SIZE-1, 1) == 0); + assert(buf.head == BUF_SIZE-1); + + stage_buffer_finish(&buf); +} + +static void test_alloc_wrap(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + // Fill the first 924 bytes + assert(vulkan_stage_buffer_alloc(&buf, BUF_SIZE - 100, 1) == 0); + push_watermark(&buf, 1); + + // Fill the end of the buffer + assert(vulkan_stage_buffer_alloc(&buf, 50, 1) == 924); + push_watermark(&buf, 2); + + // First, check that we don't wrap prematurely + assert(vulkan_stage_buffer_alloc(&buf, 50, 1) == ALLOC_FAIL); + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == ALLOC_FAIL); + + // Free the beginning of the buffer and try to wrap again + vulkan_stage_buffer_reclaim(&buf, 1); + assert(vulkan_stage_buffer_alloc(&buf, 50, 1) == 0); + assert(buf.tail == 924); + assert(buf.head == 50); + + // Check that freeing from the end of the buffer still works + vulkan_stage_buffer_reclaim(&buf, 2); + assert(buf.tail == 974); + assert(buf.head == 50); + + // Check that allocations still work + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 50); + assert(buf.tail == 974); + assert(buf.head == 150); + + stage_buffer_finish(&buf); +} + +static void test_reclaim_empty(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + // Fresh buffer with no watermarks and head == tail == 0 is drained. + assert(vulkan_stage_buffer_reclaim(&buf, 0)); + assert(buf.tail == 0); + + stage_buffer_finish(&buf); +} + +static void test_reclaim_pending_not_completed(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 0); + push_watermark(&buf, 1); + + // current point hasn't reached the watermark yet. + assert(!vulkan_stage_buffer_reclaim(&buf, 0)); + assert(buf.tail == 0); + assert(watermark_count(&buf) == 1); + + stage_buffer_finish(&buf); +} + +static void test_reclaim_partial(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 0); + push_watermark(&buf, 1); + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 100); + push_watermark(&buf, 2); + + // Only the first watermark is reached. + assert(!vulkan_stage_buffer_reclaim(&buf, 1)); + assert(buf.tail == 100); + assert(watermark_count(&buf) == 1); + + const struct wlr_vk_stage_watermark *remaining = buf.watermarks.data; + assert(remaining[0].head == 200); + assert(remaining[0].timeline_point == 2); + + stage_buffer_finish(&buf); +} + +static void test_reclaim_all(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 0); + push_watermark(&buf, 1); + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 100); + push_watermark(&buf, 2); + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 200); + push_watermark(&buf, 3); + + assert(vulkan_stage_buffer_reclaim(&buf, 100)); + assert(buf.tail == 300); + assert(watermark_count(&buf) == 0); + + stage_buffer_finish(&buf); +} + + +static void test_peak_utilization(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + assert(buf.peak_utilization == 0); + assert(vulkan_stage_buffer_alloc(&buf, 100, 1) == 0); + assert(vulkan_stage_buffer_alloc(&buf, 200, 1) == 100); + vulkan_stage_buffer_reclaim(&buf, 0); + assert(buf.peak_utilization == 300); + + + stage_buffer_finish(&buf); +} + +static void test_peak_utilization_wrap(void) { + struct wlr_vk_stage_buffer buf; + stage_buffer_init(&buf); + + // 200 bytes used, 100 bytes from wrap + buf.head = BUF_SIZE - 100; + buf.tail = buf.head - 200; + + // With 100 byte left, we wrap to front and waste 100 bytes + assert(vulkan_stage_buffer_alloc(&buf, 200, 1) == 0); + vulkan_stage_buffer_reclaim(&buf, 0); + assert(buf.head == 200); + assert(buf.tail == BUF_SIZE - 300); + + // 200 bytes initial + 100 bytes wasted + 200 bytes allocated = 500 + assert(buf.peak_utilization == 500); + + stage_buffer_finish(&buf); +} + +int main(void) { +#ifdef NDEBUG + fprintf(stderr, "NDEBUG must be disabled for tests\n"); + return 1; +#endif + + test_alloc_simple(); + test_alloc_alignment(); + test_alloc_limit(); + test_alloc_wrap(); + + test_reclaim_empty(); + test_reclaim_pending_not_completed(); + test_reclaim_partial(); + test_reclaim_all(); + + test_peak_utilization(); + test_peak_utilization_wrap(); + + return 0; +}