render/vulkan: Use instanced draws instead of scissors

Similar to what we have already done for gles2. To simplify things we
use the staging ring buffer for the vertex buffers by extending the
usage bits, rather than introducing a separate pool.

Signed-off-by: Kenny Levinsen <kl@kl.wtf>
This commit is contained in:
Kenny Levinsen 2026-04-12 17:47:40 +02:00 committed by Félix Poisot
parent 439258a43b
commit 8abe53d1d2
3 changed files with 113 additions and 39 deletions

View file

@ -2,7 +2,9 @@
#include <drm_fourcc.h>
#include <stdlib.h>
#include <unistd.h>
#include <wlr/util/box.h>
#include <wlr/util/log.h>
#include <wlr/util/transform.h>
#include <wlr/render/color.h>
#include <wlr/render/drm_syncobj.h>
@ -191,11 +193,13 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
int width = pass->render_buffer->wlr_buffer->width;
int height = pass->render_buffer->wlr_buffer->height;
float final_matrix[9] = {
width, 0, -1,
0, height, -1,
0, 0, 0,
};
struct wlr_box output_box = { 0, 0, width, height };
float proj[9], final_matrix[9];
wlr_matrix_identity(proj);
wlr_matrix_project_box(final_matrix, &output_box,
WL_OUTPUT_TRANSFORM_NORMAL, proj);
wlr_matrix_multiply(final_matrix, pass->projection, final_matrix);
struct wlr_vk_vert_pcr_data vert_pcr_data = {
.uv_off = { 0, 0 },
.uv_size = { 1, 1 },
@ -274,11 +278,28 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
int clip_rects_len;
const pixman_box32_t *clip_rects = pixman_region32_rectangles(
clip, &clip_rects_len);
if (clip_rects_len > 0) {
const VkDeviceSize instance_size = 4 * sizeof(float);
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer,
clip_rects_len * instance_size, 16);
if (!span.buffer) {
pass->failed = true;
goto error;
}
float *instance_data = (float *)((char *)span.buffer->cpu_mapping + span.offset);
for (int i = 0; i < clip_rects_len; i++) {
VkRect2D rect;
convert_pixman_box_to_vk_rect(&clip_rects[i], &rect);
vkCmdSetScissor(render_cb->vk, 0, 1, &rect);
vkCmdDraw(render_cb->vk, 4, 1, 0, 0);
const pixman_box32_t *b = &clip_rects[i];
instance_data[i * 4 + 0] = (float)b->x1 / width;
instance_data[i * 4 + 1] = (float)b->y1 / height;
instance_data[i * 4 + 2] = (float)(b->x2 - b->x1) / width;
instance_data[i * 4 + 3] = (float)(b->y2 - b->y1) / height;
}
VkDeviceSize vb_offset = span.offset;
vkCmdBindVertexBuffers(render_cb->vk, 0, 1, &span.buffer->buffer, &vb_offset);
vkCmdDraw(render_cb->vk, 4, clip_rects_len, 0, 0);
}
}
@ -653,11 +674,6 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass,
return;
}
// Record regions possibly updated for use in second subpass
for (int i = 0; i < clip_rects_len; i++) {
render_pass_mark_box_updated(pass, &clip_rects[i]);
}
switch (options->blend_mode) {
case WLR_RENDER_BLEND_MODE_PREMULTIPLIED:;
float proj[9], matrix[9];
@ -676,6 +692,23 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass,
break;
}
const VkDeviceSize instance_size = 4 * sizeof(float);
struct wlr_vk_buffer_span span = vulkan_get_stage_span(pass->renderer,
clip_rects_len * instance_size, 16);
if (!span.buffer) {
pass->failed = true;
break;
}
float *instance_data = (float *)((char *)span.buffer->cpu_mapping + span.offset);
for (int i = 0; i < clip_rects_len; i++) {
const pixman_box32_t *rect = &clip_rects[i];
render_pass_mark_box_updated(pass, rect);
instance_data[i * 4 + 0] = (float)(rect->x1 - box.x) / box.width;
instance_data[i * 4 + 1] = (float)(rect->y1 - box.y) / box.height;
instance_data[i * 4 + 2] = (float)(rect->x2 - rect->x1) / box.width;
instance_data[i * 4 + 3] = (float)(rect->y2 - rect->y1) / box.height;
}
struct wlr_vk_vert_pcr_data vert_pcr_data = {
.uv_off = { 0, 0 },
.uv_size = { 1, 1 },
@ -689,12 +722,9 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass,
VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(vert_pcr_data), sizeof(float) * 4,
linear_color);
for (int i = 0; i < clip_rects_len; i++) {
VkRect2D rect;
convert_pixman_box_to_vk_rect(&clip_rects[i], &rect);
vkCmdSetScissor(cb, 0, 1, &rect);
vkCmdDraw(cb, 4, 1, 0, 0);
}
VkDeviceSize vb_offset = span.offset;
vkCmdBindVertexBuffers(cb, 0, 1, &span.buffer->buffer, &vb_offset);
vkCmdDraw(cb, 4, clip_rects_len, 0, 0);
break;
case WLR_RENDER_BLEND_MODE_NONE:;
VkClearAttachment clear_att = {
@ -711,7 +741,9 @@ static void render_pass_add_rect(struct wlr_render_pass *wlr_pass,
.layerCount = 1,
};
for (int i = 0; i < clip_rects_len; i++) {
convert_pixman_box_to_vk_rect(&clip_rects[i], &clear_rect.rect);
const pixman_box32_t *rect = &clip_rects[i];
render_pass_mark_box_updated(pass, rect);
convert_pixman_box_to_vk_rect(rect, &clear_rect.rect);
vkCmdClearAttachments(cb, 1, &clear_att, 1, &clear_rect);
}
break;
@ -769,6 +801,15 @@ static void render_pass_add_texture(struct wlr_render_pass *wlr_pass,
return;
}
const VkDeviceSize instance_size = 4 * sizeof(float);
struct wlr_vk_buffer_span span = vulkan_get_stage_span(renderer,
clip_rects_len * instance_size, 16);
if (!span.buffer) {
pixman_region32_fini(&clip);
pass->failed = true;
return;
}
struct wlr_vk_vert_pcr_data vert_pcr_data = {
.uv_off = {
src_box.x / options->texture->width,
@ -885,17 +926,34 @@ static void render_pass_add_texture(struct wlr_render_pass *wlr_pass,
VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(vert_pcr_data),
sizeof(frag_pcr_data), &frag_pcr_data);
float *instance_data = (float *)((char *)span.buffer->cpu_mapping + span.offset);
for (int i = 0; i < clip_rects_len; i++) {
VkRect2D rect;
convert_pixman_box_to_vk_rect(&clip_rects[i], &rect);
vkCmdSetScissor(cb, 0, 1, &rect);
vkCmdDraw(cb, 4, 1, 0, 0);
render_pass_mark_box_updated(pass, &clip_rects[i]);
const pixman_box32_t *rect = &clip_rects[i];
render_pass_mark_box_updated(pass, rect);
struct wlr_fbox norm = {
.x = (double)(rect->x1 - dst_box.x) / dst_box.width,
.y = (double)(rect->y1 - dst_box.y) / dst_box.height,
.width = (double)(rect->x2 - rect->x1) / dst_box.width,
.height = (double)(rect->y2 - rect->y1) / dst_box.height,
};
if (options->transform != WL_OUTPUT_TRANSFORM_NORMAL) {
wlr_fbox_transform(&norm, &norm, options->transform, 1.0, 1.0);
}
instance_data[i * 4 + 0] = (float)norm.x;
instance_data[i * 4 + 1] = (float)norm.y;
instance_data[i * 4 + 2] = (float)norm.width;
instance_data[i * 4 + 3] = (float)norm.height;
}
pixman_region32_fini(&clip);
texture->last_used_cb = pass->command_buffer;
VkDeviceSize vb_offset = span.offset;
vkCmdBindVertexBuffers(cb, 0, 1, &span.buffer->buffer, &vb_offset);
vkCmdDraw(cb, 4, clip_rects_len, 0, 0);
texture->last_used_cb = pass->command_buffer;
if (texture->dmabuf_imported || (options != NULL && options->wait_timeline != NULL)) {
struct wlr_vk_render_pass_texture *pass_texture =
@ -1290,6 +1348,7 @@ struct wlr_vk_render_pass *vulkan_begin_render_pass(struct wlr_vk_renderer *rend
.height = height,
.maxDepth = 1,
});
vkCmdSetScissor(cb->vk, 0, 1, &rect);
// matrix_projection() assumes a GL coordinate system so we need
// to pass WL_OUTPUT_TRANSFORM_FLIPPED_180 to adjust it for vulkan.

View file

@ -222,7 +222,8 @@ static struct wlr_vk_stage_buffer *stage_buffer_create(
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = bsize,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
res = vkCreateBuffer(r->dev->dev, &buf_info, NULL, &buf->buffer);
@ -1881,6 +1882,25 @@ static bool pipeline_key_equals(const struct wlr_vk_pipeline_key *a,
return true;
}
static const VkVertexInputBindingDescription instance_vert_binding = {
.binding = 0,
.stride = sizeof(float) * 4,
.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE,
};
static const VkVertexInputAttributeDescription instance_vert_attr = {
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32A32_SFLOAT,
.offset = 0,
};
static const VkPipelineVertexInputStateCreateInfo instance_vert_input = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &instance_vert_binding,
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = &instance_vert_attr,
};
// Initializes the pipeline for rendering textures and using the given
// VkRenderPass and VkPipelineLayout.
struct wlr_vk_pipeline *setup_get_or_create_pipeline(
@ -2012,10 +2032,6 @@ struct wlr_vk_pipeline *setup_get_or_create_pipeline(
.dynamicStateCount = sizeof(dyn_states) / sizeof(dyn_states[0]),
};
VkPipelineVertexInputStateCreateInfo vertex = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
};
VkGraphicsPipelineCreateInfo pinfo = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.layout = pipeline_layout->vk,
@ -2030,7 +2046,7 @@ struct wlr_vk_pipeline *setup_get_or_create_pipeline(
.pMultisampleState = &multisample,
.pViewportState = &viewport,
.pDynamicState = &dynamic,
.pVertexInputState = &vertex,
.pVertexInputState = &instance_vert_input,
};
VkPipelineCache cache = VK_NULL_HANDLE;
@ -2129,10 +2145,6 @@ static bool init_blend_to_output_pipeline(struct wlr_vk_renderer *renderer,
.dynamicStateCount = sizeof(dyn_states) / sizeof(dyn_states[0]),
};
VkPipelineVertexInputStateCreateInfo vertex = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
};
VkGraphicsPipelineCreateInfo pinfo = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = NULL,
@ -2147,7 +2159,7 @@ static bool init_blend_to_output_pipeline(struct wlr_vk_renderer *renderer,
.pMultisampleState = &multisample,
.pViewportState = &viewport,
.pDynamicState = &dynamic,
.pVertexInputState = &vertex,
.pVertexInputState = &instance_vert_input,
};
VkPipelineCache cache = VK_NULL_HANDLE;

View file

@ -8,11 +8,14 @@ layout(push_constant, row_major) uniform UBO {
vec2 uv_size;
} data;
layout(location = 0) in vec4 inst_rect;
layout(location = 0) out vec2 uv;
void main() {
vec2 pos = vec2(float((gl_VertexIndex + 1) & 2) * 0.5f,
float(gl_VertexIndex & 2) * 0.5f);
pos = inst_rect.xy + pos * inst_rect.zw;
uv = data.uv_offset + pos * data.uv_size;
gl_Position = data.proj * vec4(pos, 0.0, 1.0);
}