Merge branch 'vk-upload-thread' into 'master'

Draft: render/vulkan: add upload thread

See merge request wlroots/wlroots!4454
This commit is contained in:
Simon Ser 2024-01-25 11:00:38 +00:00
commit 9e53f89c1b
21 changed files with 378 additions and 88 deletions

View file

@ -46,6 +46,7 @@ wlr_files += files(
)
wlr_deps += dep_vulkan
wlr_deps += dependency('threads')
features += { 'vulkan-renderer': true }
subdir('shaders')

View file

@ -300,27 +300,38 @@ static bool render_pass_submit(struct wlr_render_pass *wlr_pass) {
.semaphore = renderer->timeline_semaphore,
.value = stage_timeline_point,
};
VkSubmitInfo2KHR stage_submit = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &stage_cb_info,
.signalSemaphoreInfoCount = 1,
.pSignalSemaphoreInfos = &stage_signal,
};
VkSemaphoreSubmitInfoKHR stage_wait;
VkSemaphoreSubmitInfoKHR stage_wait[2];
uint32_t stage_wait_len = 0;
if (renderer->upload_timeline_point > 0) {
stage_wait[stage_wait_len++] = (VkSemaphoreSubmitInfoKHR){
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR,
.semaphore = renderer->upload_timeline_semaphore,
.value = renderer->upload_timeline_point,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
};
}
if (renderer->stage.last_timeline_point > 0) {
stage_wait = (VkSemaphoreSubmitInfoKHR){
stage_wait[stage_wait_len++] = (VkSemaphoreSubmitInfoKHR){
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR,
.semaphore = renderer->timeline_semaphore,
.value = renderer->stage.last_timeline_point,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
};
stage_submit.waitSemaphoreInfoCount = 1;
stage_submit.pWaitSemaphoreInfos = &stage_wait;
}
VkSubmitInfo2KHR stage_submit = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &stage_cb_info,
.waitSemaphoreInfoCount = stage_wait_len,
.pWaitSemaphoreInfos = stage_wait,
.signalSemaphoreInfoCount = 1,
.pSignalSemaphoreInfos = &stage_signal,
};
renderer->stage.last_timeline_point = stage_timeline_point;
uint64_t render_timeline_point = vulkan_end_command_buffer(render_cb, renderer);

View file

@ -178,6 +178,9 @@ static void shared_buffer_destroy(struct wlr_vk_renderer *r,
}
wl_array_release(&buffer->allocs);
if (buffer->map) {
vkUnmapMemory(r->dev->dev, buffer->memory);
}
if (buffer->buffer) {
vkDestroyBuffer(r->dev->dev, buffer->buffer, NULL);
}
@ -302,6 +305,12 @@ struct wlr_vk_buffer_span vulkan_get_stage_span(struct wlr_vk_renderer *r,
goto error;
}
res = vkMapMemory(r->dev->dev, buf->memory, 0, VK_WHOLE_SIZE, 0, &buf->map);
if (res != VK_SUCCESS) {
wlr_vk_error("vkMapMemory", res);
goto error;
}
struct wlr_vk_allocation *a = wl_array_add(&buf->allocs, sizeof(*a));
if (a == NULL) {
wlr_log_errno(WLR_ERROR, "Allocation failed");
@ -360,6 +369,7 @@ bool vulkan_submit_stage_wait(struct wlr_vk_renderer *renderer) {
return false;
}
// TODO
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.signalSemaphoreValueCount = 1,
@ -1048,6 +1058,7 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
}
vkDestroySemaphore(dev->dev, renderer->timeline_semaphore, NULL);
vkDestroySemaphore(dev->dev, renderer->upload_timeline_semaphore, NULL);
vkDestroyPipelineLayout(dev->dev, renderer->output_pipe_layout, NULL);
vkDestroyDescriptorSetLayout(dev->dev, renderer->output_ds_layout, NULL);
vkDestroyCommandPool(dev->dev, renderer->command_pool, NULL);
@ -2145,7 +2156,8 @@ error:
return NULL;
}
struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev) {
struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev,
struct wl_event_loop *loop) {
struct wlr_vk_renderer *renderer;
VkResult res;
if (!(renderer = calloc(1, sizeof(*renderer)))) {
@ -2195,6 +2207,16 @@ struct wlr_renderer *vulkan_renderer_create_for_device(struct wlr_vk_device *dev
wlr_vk_error("vkCreateSemaphore", res);
goto error;
}
res = vkCreateSemaphore(dev->dev, &semaphore_info, NULL,
&renderer->upload_timeline_semaphore);
if (res != VK_SUCCESS) {
wlr_vk_error("vkCreateSemaphore", res);
goto error;
}
if (!vulkan_init_upload_worker(renderer, loop)) {
goto error;
}
return &renderer->wlr_renderer;
@ -2203,7 +2225,8 @@ error:
return NULL;
}
struct wlr_renderer *wlr_vk_renderer_create_with_drm_fd(int drm_fd) {
struct wlr_renderer *wlr_vk_renderer_create_with_drm_fd(struct wl_event_loop *loop,
int drm_fd) {
wlr_log(WLR_INFO, "The vulkan renderer is only experimental and "
"not expected to be ready for daily use");
wlr_log(WLR_INFO, "Run with VK_INSTANCE_LAYERS=VK_LAYER_KHRONOS_validation "
@ -2238,7 +2261,7 @@ struct wlr_renderer *wlr_vk_renderer_create_with_drm_fd(int drm_fd) {
return NULL;
}
return vulkan_renderer_create_for_device(dev);
return vulkan_renderer_create_for_device(dev, loop);
}
VkInstance wlr_vk_renderer_get_instance(struct wlr_renderer *renderer) {

View file

@ -2,9 +2,11 @@
#include <assert.h>
#include <drm_fourcc.h>
#include <fcntl.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <unistd.h>
#include <wlr/render/wlr_texture.h>
@ -14,6 +16,8 @@
#include "render/pixel_format.h"
#include "render/vulkan.h"
#include "util/time.h"
static const struct wlr_texture_impl texture_impl;
bool wlr_texture_is_vk(struct wlr_texture *wlr_texture) {
@ -36,15 +40,202 @@ static VkImageAspectFlagBits mem_plane_aspect(unsigned i) {
}
}
static void copy_pixels(char *vmap, const char *vdata, uint32_t tex_width,
uint32_t stride, uint32_t size, const pixman_region32_t *region,
const struct wlr_pixel_format_info *format_info) {
int rects_len = 0;
const pixman_box32_t *rects = pixman_region32_rectangles(region, &rects_len);
char *map = vmap;
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;
uint32_t height = rect.y2 - rect.y1;
uint32_t src_x = rect.x1;
uint32_t src_y = rect.y1;
uint32_t packed_stride = (uint32_t)pixel_format_info_min_stride(format_info, width);
// write data into staging buffer span
const char *pdata = vdata; // data iterator
pdata += stride * src_y;
pdata += format_info->bytes_per_block * src_x;
if (src_x == 0 && width == tex_width && stride == packed_stride) {
memcpy(map, pdata, packed_stride * height);
map += packed_stride * height;
} else {
for (unsigned i = 0u; i < height; ++i) {
memcpy(map, pdata, packed_stride);
pdata += stride;
map += packed_stride;
}
}
}
assert((uint32_t)(map - vmap) == size);
}
static bool read_upload_task(struct wlr_vk_upload_task *task, int fd) {
while (true) {
errno = 0;
ssize_t n = read(fd, task, sizeof(*task));
if (errno == EINTR) {
continue;
}
if (n == sizeof(*task)) {
return true;
} else if (n < 0) {
wlr_log_errno(WLR_ERROR, "read() failed");
} else if (n > 0) {
wlr_log(WLR_ERROR, "Unexpected partial read");
}
return false;
}
}
static bool write_upload_task(const struct wlr_vk_upload_task *task, int fd) {
while (true) {
errno = 0;
ssize_t n = write(fd, task, sizeof(*task));
if (errno == EINTR) {
continue;
}
if (n == sizeof(*task)) {
return true;
} else if (n < 0) {
wlr_log_errno(WLR_ERROR, "write() failed");
} else if (n > 0) {
wlr_log(WLR_ERROR, "Unexpected partial write");
}
return false;
}
}
static void process_upload_task(struct wlr_vk_renderer *renderer,
struct wlr_vk_upload_task *task) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
int64_t start = timespec_to_nsec(&ts);
copy_pixels(task->dst, task->src, task->buffer->width, task->src_stride,
task->dst_size, &task->region, task->format_info);
clock_gettime(CLOCK_MONOTONIC, &ts);
int64_t dur_ns = timespec_to_nsec(&ts) - start;
wlr_log(WLR_INFO, "UPLOAD: %f ms", (double)dur_ns / 1000 / 1000);
VkSemaphoreSignalInfoKHR signal_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR,
.semaphore = renderer->upload_timeline_semaphore,
.value = task->timeline_point,
};
VkResult res = renderer->dev->api.vkSignalSemaphoreKHR(renderer->dev->dev, &signal_info);
if (res != VK_SUCCESS) {
wlr_vk_error("vkMapMemory", res);
}
}
static void *run_uploads(void *data) {
struct wlr_vk_renderer *renderer = data;
while (true) {
struct wlr_vk_upload_task task = {0};
if (!read_upload_task(&task, renderer->upload.worker_fd)) {
break;
}
process_upload_task(renderer, &task);
if (!write_upload_task(&task, renderer->upload.worker_fd)) {
break;
}
}
close(renderer->upload.worker_fd);
return NULL;
}
static void handle_upload_task_complete(struct wlr_vk_renderer *renderer,
struct wlr_vk_upload_task *task) {
wlr_buffer_end_data_ptr_access(task->buffer);
wlr_buffer_unlock(task->buffer);
pixman_region32_fini(&task->region);
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
int64_t dur_ns = timespec_to_nsec(&ts) - task->start;
wlr_log(WLR_INFO, "TOTAL: %f ms", (double)dur_ns / 1000 / 1000);
}
static int handle_upload_fd_event(int fd, uint32_t mask, void *data) {
struct wlr_vk_renderer *renderer = data;
if (mask & WL_EVENT_ERROR) {
wlr_log(WLR_ERROR, "Upload worker FD error");
return 0;
}
if (mask & WL_EVENT_HANGUP) {
return 0;
}
if (mask & WL_EVENT_READABLE) {
struct wlr_vk_upload_task task = {0};
if (!read_upload_task(&task, fd)) {
return 0;
}
handle_upload_task_complete(renderer, &task);
}
return 0;
}
bool vulkan_init_upload_worker(struct wlr_vk_renderer *renderer,
struct wl_event_loop *loop) {
int sockets[2];
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) != 0) {
wlr_log_errno(WLR_ERROR, "pipe() failed");
return false;
}
renderer->upload.worker_fd = sockets[0];
renderer->upload.control_fd = sockets[1];
renderer->upload.event_source = wl_event_loop_add_fd(loop,
renderer->upload.control_fd, WL_EVENT_READABLE,
handle_upload_fd_event, renderer);
if (renderer->upload.event_source == NULL) {
wlr_log(WLR_ERROR, "wl_event_loop_add_fd() failed");
goto error_fds;
}
// Block all signals in the new thread: let the main thread handle these
sigset_t saved_sigset, new_sigset;
sigfillset(&new_sigset);
pthread_sigmask(SIG_BLOCK, &new_sigset, &saved_sigset);
int ret = pthread_create(&renderer->upload.thread, NULL, run_uploads, renderer);
pthread_sigmask(SIG_SETMASK, &saved_sigset, NULL);
if (ret != 0) {
wlr_log_errno(WLR_ERROR, "pthread_create() failed");
goto error_event_source;
}
return true;
error_event_source:
wl_event_source_remove(renderer->upload.event_source);
error_fds:
close(renderer->upload.worker_fd);
close(renderer->upload.control_fd);
return false;
}
// Will transition the texture to shaderReadOnlyOptimal layout for reading
// from fragment shader later on
static bool write_pixels(struct wlr_vk_texture *texture,
static bool start_upload(struct wlr_vk_texture *texture, struct wlr_buffer *buffer,
uint32_t stride, const pixman_region32_t *region, const void *vdata,
VkImageLayout old_layout, VkPipelineStageFlags src_stage,
VkAccessFlags src_access) {
VkResult res;
struct wlr_vk_renderer *renderer = texture->renderer;
VkDevice dev = texture->renderer->dev->dev;
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
int64_t start = timespec_to_nsec(&ts);
const struct wlr_pixel_format_info *format_info = drm_get_pixel_format_info(texture->format->drm);
assert(format_info);
@ -81,19 +272,9 @@ static bool write_pixels(struct wlr_vk_texture *texture,
return false;
}
void *vmap;
res = vkMapMemory(dev, span.buffer->memory, span.alloc.start,
bsize, 0, &vmap);
if (res != VK_SUCCESS) {
wlr_vk_error("vkMapMemory", res);
free(copies);
return false;
}
char *map = (char *)vmap;
uint64_t timeline_point = ++renderer->upload_timeline_point;
// upload data
uint32_t buf_off = span.alloc.start + (map - (char *)vmap);
uint32_t buf_off = span.alloc.start;
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;
@ -102,22 +283,6 @@ static bool write_pixels(struct wlr_vk_texture *texture,
uint32_t src_y = rect.y1;
uint32_t packed_stride = (uint32_t)pixel_format_info_min_stride(format_info, width);
// write data into staging buffer span
const char *pdata = vdata; // data iterator
pdata += stride * src_y;
pdata += format_info->bytes_per_block * src_x;
if (src_x == 0 && width == texture->wlr_texture.width &&
stride == packed_stride) {
memcpy(map, pdata, packed_stride * height);
map += packed_stride * height;
} else {
for (unsigned i = 0u; i < height; ++i) {
memcpy(map, pdata, packed_stride);
pdata += stride;
map += packed_stride;
}
}
copies[i] = (VkBufferImageCopy) {
.imageExtent.width = width,
.imageExtent.height = height,
@ -134,12 +299,34 @@ static bool write_pixels(struct wlr_vk_texture *texture,
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
};
buf_off += height * packed_stride;
}
assert((uint32_t)(map - (char *)vmap) == bsize);
vkUnmapMemory(dev, span.buffer->memory);
struct wlr_vk_upload_task task = {
.buffer = wlr_buffer_lock(buffer),
.memory = span.buffer->memory,
.timeline_point = timeline_point,
.dst = (char *)span.buffer->map + span.alloc.start,
.src = vdata,
.src_stride = stride,
.dst_size = bsize,
.format_info = format_info,
.start = start,
};
pixman_region32_init(&task.region);
pixman_region32_copy(&task.region, region);
#if 1
if (!write_upload_task(&task, renderer->upload.control_fd)) {
free(copies);
return false;
}
#else
process_upload_task(renderer, &task);
handle_upload_task_complete(renderer, &task);
#endif
clock_gettime(CLOCK_MONOTONIC, &ts);
start = timespec_to_nsec(&ts);
// record staging cb
// will be executed before next frame
@ -149,6 +336,10 @@ static bool write_pixels(struct wlr_vk_texture *texture,
return false;
}
clock_gettime(CLOCK_MONOTONIC, &ts);
int64_t dur_ns = timespec_to_nsec(&ts) - start;
wlr_log(WLR_INFO, "STARTUP: %f ms", (double)dur_ns / 1000 / 1000);
vulkan_change_layout(cb, texture->image,
old_layout, src_stage, src_access,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
@ -156,6 +347,7 @@ static bool write_pixels(struct wlr_vk_texture *texture,
vkCmdCopyBufferToImage(cb, span.buffer->buffer, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, (uint32_t)rects_len, copies);
vulkan_change_layout(cb, texture->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
@ -180,19 +372,21 @@ static bool vulkan_texture_update_from_buffer(struct wlr_texture *wlr_texture,
return false;
}
bool ok = true;
if (format != texture->format->drm) {
ok = false;
goto out;
goto error;
}
ok = write_pixels(texture, stride, damage, data, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT);
if (!start_upload(texture, buffer, stride, damage, data,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT)) {
goto error;
}
out:
return true;
error:
wlr_buffer_end_data_ptr_access(buffer);
return ok;
return false;
}
void vulkan_texture_destroy(struct wlr_vk_texture *texture) {
@ -390,7 +584,8 @@ static void texture_set_format(struct wlr_vk_texture *texture,
}
static struct wlr_texture *vulkan_texture_from_pixels(
struct wlr_vk_renderer *renderer, uint32_t drm_fmt, uint32_t stride,
struct wlr_vk_renderer *renderer, struct wlr_buffer *buffer,
uint32_t drm_fmt, uint32_t stride,
uint32_t width, uint32_t height, const void *data) {
VkResult res;
VkDevice dev = renderer->dev->dev;
@ -476,7 +671,8 @@ static struct wlr_texture *vulkan_texture_from_pixels(
pixman_region32_t region;
pixman_region32_init_rect(&region, 0, 0, width, height);
if (!write_pixels(texture, stride, &region, data, VK_IMAGE_LAYOUT_UNDEFINED,
if (!start_upload(texture, buffer, stride, &region, data,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0)) {
goto error;
}
@ -829,8 +1025,10 @@ struct wlr_texture *vulkan_texture_from_buffer(struct wlr_renderer *wlr_renderer
} else if (wlr_buffer_begin_data_ptr_access(buffer,
WLR_BUFFER_DATA_PTR_ACCESS_READ, &data, &format, &stride)) {
struct wlr_texture *tex = vulkan_texture_from_pixels(renderer,
format, stride, buffer->width, buffer->height, data);
wlr_buffer_end_data_ptr_access(buffer);
buffer, format, stride, buffer->width, buffer->height, data);
if (tex == NULL) {
wlr_buffer_end_data_ptr_access(buffer);
}
return tex;
} else {
return NULL;

View file

@ -617,6 +617,8 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
load_device_proc(dev, "vkWaitSemaphoresKHR", &dev->api.vkWaitSemaphoresKHR);
load_device_proc(dev, "vkGetSemaphoreCounterValueKHR",
&dev->api.vkGetSemaphoreCounterValueKHR);
load_device_proc(dev, "vkSignalSemaphoreKHR",
&dev->api.vkSignalSemaphoreKHR);
load_device_proc(dev, "vkGetSemaphoreFdKHR", &dev->api.vkGetSemaphoreFdKHR);
load_device_proc(dev, "vkImportSemaphoreFdKHR", &dev->api.vkImportSemaphoreFdKHR);
load_device_proc(dev, "vkQueueSubmit2KHR", &dev->api.vkQueueSubmit2KHR);

View file

@ -221,7 +221,8 @@ static bool has_render_node(struct wlr_backend *backend) {
return has_render_node;
}
static struct wlr_renderer *renderer_autocreate(struct wlr_backend *backend, int drm_fd) {
static struct wlr_renderer *renderer_autocreate(struct wlr_backend *backend, int drm_fd,
struct wl_event_loop *loop) {
const char *renderer_options[] = {
"auto",
"gles2",
@ -258,7 +259,7 @@ static struct wlr_renderer *renderer_autocreate(struct wlr_backend *backend, int
log_creation_failure(is_auto, "Cannot create Vulkan renderer: no DRM FD available");
} else {
#if WLR_HAS_VULKAN_RENDERER
renderer = wlr_vk_renderer_create_with_drm_fd(drm_fd);
renderer = wlr_vk_renderer_create_with_drm_fd(loop, drm_fd);
#else
wlr_log(WLR_ERROR, "Cannot create Vulkan renderer: disabled at compile-time");
#endif
@ -289,14 +290,13 @@ out:
return renderer;
}
struct wlr_renderer *renderer_autocreate_with_drm_fd(int drm_fd) {
struct wlr_renderer *renderer_autocreate_with_drm_fd(int drm_fd, struct wl_event_loop *loop) {
assert(drm_fd >= 0);
return renderer_autocreate(NULL, drm_fd);
return renderer_autocreate(NULL, drm_fd, loop);
}
struct wlr_renderer *wlr_renderer_autocreate(struct wlr_backend *backend) {
return renderer_autocreate(backend, -1);
return renderer_autocreate(backend, -1, backend->event_loop);
}
int wlr_renderer_get_drm_fd(struct wlr_renderer *r) {