Merge branch 'render-pass-bench' into 'master'

test: Add Go-compatible render pass benchmark

See merge request wlroots/wlroots!5342
This commit is contained in:
Kenny Levinsen 2026-04-14 10:51:11 +00:00
commit d0557b55b8
2 changed files with 377 additions and 0 deletions

371
test/bench_render_pass.c Normal file
View file

@ -0,0 +1,371 @@
#include <assert.h>
#include <drm_fourcc.h>
#include <getopt.h>
#include <math.h>
#include <pixman.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <wlr/backend/headless.h>
#include <wlr/render/allocator.h>
#include <wlr/render/color.h>
#include <wlr/render/drm_format_set.h>
#include <wlr/render/drm_syncobj.h>
#include <wlr/render/pass.h>
#include <wlr/render/swapchain.h>
#include <wlr/render/wlr_renderer.h>
#include <wlr/render/wlr_texture.h>
#include <wlr/util/log.h>
// Switch to e.g., XRGB2101010 for the Vulkan two-pass path
#define OUTPUT_FORMAT DRM_FORMAT_XRGB8888
#define TARGET_NS 100000000
#define OUTPUT_WIDTH 1920
#define OUTPUT_HEIGHT 1080
#define TEXTURE_SIZE 500
#define STACKED_SIZE 500
#define CLIP_MANY_ROWS 200
#define MAX_ITER 10000
#define MIN_ITER 10
#define WARMUP_ITER 2
enum primitive_type {
RECT,
TEXTURE,
};
enum layout_type {
STACKED,
GRID,
};
struct bench_case {
enum primitive_type primitive;
enum layout_type layout;
int clips;
int count;
};
struct bench_result {
int iters;
int64_t cpu_ns;
int64_t gpu_ns;
};
struct bench_ctx {
struct wl_event_loop *ev;
struct wlr_backend *backend;
struct wlr_renderer *renderer;
struct wlr_allocator *allocator;
struct wlr_swapchain *swapchain;
struct wlr_drm_syncobj_timeline *timeline;
struct wlr_color_transform *color_transform;
struct wlr_render_timer *timer;
struct wlr_texture *texture;
uint64_t signal_point;
};
struct render_wait {
struct wlr_drm_syncobj_timeline_waiter waiter;
bool ready;
};
static void handle_render_ready(struct wlr_drm_syncobj_timeline_waiter *waiter) {
struct render_wait *wait = wl_container_of(waiter, wait, waiter);
wait->ready = true;
}
static int64_t timespec_to_ns(const struct timespec *ts) {
return (int64_t)ts->tv_sec * 1000000000L + ts->tv_nsec;
}
static int64_t timespec_diff_ns(const struct timespec *start,
const struct timespec *end) {
return timespec_to_ns(end) - timespec_to_ns(start);
}
static void bench_ctx_init(struct bench_ctx *ctx) {
ctx->ev = wl_event_loop_create();
assert(ctx->ev);
wlr_log_init(WLR_ERROR, NULL);
ctx->backend = wlr_headless_backend_create(ctx->ev);
assert(ctx->backend);
ctx->renderer = wlr_renderer_autocreate(ctx->backend);
assert(ctx->renderer);
if (ctx->renderer->features.timeline) {
int drm_fd = wlr_renderer_get_drm_fd(ctx->renderer);
assert(drm_fd >= 0);
ctx->timeline = wlr_drm_syncobj_timeline_create(drm_fd);
assert(ctx->timeline);
}
ctx->color_transform = wlr_color_transform_init_linear_to_inverse_eotf(
WLR_COLOR_TRANSFER_FUNCTION_SRGB);
assert(ctx->color_transform);
ctx->allocator = wlr_allocator_autocreate(ctx->backend, ctx->renderer);
assert(ctx->allocator);
const struct wlr_drm_format_set *formats =
wlr_renderer_get_texture_formats(ctx->renderer,
ctx->allocator->buffer_caps);
ctx->swapchain = wlr_swapchain_create(ctx->allocator,
OUTPUT_WIDTH, OUTPUT_HEIGHT,
wlr_drm_format_set_get(formats, OUTPUT_FORMAT));
assert(ctx->swapchain);
ctx->timer = wlr_render_timer_create(ctx->renderer);
size_t stride = TEXTURE_SIZE * 4;
size_t size = stride * TEXTURE_SIZE;
uint8_t *data = malloc(size);
assert(data);
for (size_t i = 0; i < size; i++) {
data[i] = i & 0xFF;
}
ctx->texture = wlr_texture_from_pixels(ctx->renderer,
DRM_FORMAT_ARGB8888, stride, TEXTURE_SIZE, TEXTURE_SIZE, data);
assert(ctx->texture);
free(data);
}
static void bench_ctx_finish(struct bench_ctx *ctx) {
wlr_texture_destroy(ctx->texture);
if (ctx->timer) {
wlr_render_timer_destroy(ctx->timer);
}
wlr_swapchain_destroy(ctx->swapchain);
wlr_allocator_destroy(ctx->allocator);
wlr_color_transform_unref(ctx->color_transform);
if (ctx->timeline) {
wlr_drm_syncobj_timeline_unref(ctx->timeline);
}
wlr_renderer_destroy(ctx->renderer);
wlr_backend_destroy(ctx->backend);
wl_event_loop_destroy(ctx->ev);
}
static void run_one(struct bench_ctx *ctx, const struct bench_case *bc,
const pixman_region32_t *clip, int64_t *out_cpu_ns,
int64_t *out_gpu_ns) {
struct wlr_buffer *buffer = wlr_swapchain_acquire(ctx->swapchain);
assert(buffer);
uint64_t point = ctx->signal_point++;
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
struct wlr_render_pass *pass = wlr_renderer_begin_buffer_pass(
ctx->renderer, buffer, &(struct wlr_buffer_pass_options){
.timer = ctx->timer,
.color_transform = ctx->color_transform,
.signal_timeline = ctx->timeline,
.signal_point = point,
});
assert(pass);
for (int i = 0; i < bc->count; i++) {
struct wlr_box box;
if (bc->layout == STACKED) {
box = (struct wlr_box){
.x = 0, .y = 0,
.width = STACKED_SIZE,
.height = STACKED_SIZE,
};
} else {
int cols = ceil(sqrt(bc->count));
int rows = (bc->count + cols - 1) / cols;
int tile_w = OUTPUT_WIDTH / cols;
int tile_h = OUTPUT_HEIGHT / rows;
box = (struct wlr_box){
.x = (i % cols) * tile_w,
.y = (i / cols) * tile_h,
.width = tile_w,
.height = tile_h,
};
}
if (bc->primitive == RECT) {
wlr_render_pass_add_rect(pass, &(struct wlr_render_rect_options){
.box = box,
.color = { .r = 0.5, .g = 0.25, .b = 0.05, .a = 0.5 },
.clip = clip,
});
} else {
wlr_render_pass_add_texture(pass, &(struct wlr_render_texture_options){
.texture = ctx->texture,
.dst_box = box,
.clip = clip,
});
}
}
wlr_render_pass_submit(pass);
clock_gettime(CLOCK_MONOTONIC, &end);
*out_cpu_ns = timespec_diff_ns(&start, &end);
if (ctx->renderer->features.timeline) {
struct render_wait wait = { .ready = false };
assert(wlr_drm_syncobj_timeline_waiter_init(&wait.waiter, ctx->timeline,
point, 0, ctx->ev, handle_render_ready));
while (!wait.ready) {
wl_event_loop_dispatch(ctx->ev, -1);
}
wlr_drm_syncobj_timeline_waiter_finish(&wait.waiter);
}
wlr_buffer_unlock(buffer);
if (ctx->timer) {
*out_gpu_ns = wlr_render_timer_get_duration_ns(ctx->timer);
}
}
static int64_t run(struct bench_ctx *ctx, const struct bench_case *bc,
const pixman_region32_t *clip, int64_t *out_cpu_ns,
int64_t *out_gpu_ns, int64_t iters) {
struct timespec wall_start, wall_end;
clock_gettime(CLOCK_MONOTONIC, &wall_start);
for (int64_t i = 0; i < iters; i++) {
int64_t cpu = 0, gpu = 0;
run_one(ctx, bc, clip, &cpu, &gpu);
*out_cpu_ns += cpu;
*out_gpu_ns += gpu;
}
clock_gettime(CLOCK_MONOTONIC, &wall_end);
return timespec_diff_ns(&wall_start, &wall_end);
}
static struct bench_result run_benchmark(struct bench_ctx *ctx,
const struct bench_case *bc) {
pixman_region32_t clip;
if (bc->clips == 1) {
pixman_region32_init_rect(&clip, 0, 0, OUTPUT_WIDTH, OUTPUT_HEIGHT);
} else {
// Varying width ensures that pixman does not merge adjacent rows.
pixman_region32_init(&clip);
for (int row = 0; row < bc->clips; row++) {
pixman_region32_union_rect(&clip, &clip,
0, row, row + 1, 1);
}
}
int64_t iters = WARMUP_ITER, discard;
int64_t wall_ns = run(ctx, bc, &clip, &discard, &discard, iters);
struct bench_result result = {0};
for (;;) {
// To avoid being slightly below target we aim for 10% over
assert(wall_ns > 0);
iters = iters * TARGET_NS * 1.1 / wall_ns + 1;
if (iters < MIN_ITER) {
iters = MIN_ITER;
}
int64_t total_cpu = 0;
int64_t total_gpu = 0;
wall_ns = run(ctx, bc, &clip, &total_cpu, &total_gpu, iters);
if (wall_ns >= TARGET_NS || iters >= MAX_ITER) {
// The test either ran long enough or we're giving up
result.iters = iters;
result.cpu_ns = total_cpu;
result.gpu_ns = total_gpu;
break;
}
}
pixman_region32_fini(&clip);
return result;
}
static void print_result(const struct bench_case *bc,
const struct bench_result *r) {
int64_t cpu_per_op = r->cpu_ns / r->iters;
int64_t gpu_per_op = r->gpu_ns / r->iters;
const char *primitive_name = bc->primitive == RECT ? "Rect" : "Texture";
const char *layout_name = bc->layout == STACKED ? "stacked" : "grid";
char name[64];
snprintf(name, sizeof(name), "Benchmark%s/%s/clip%d/%d",
primitive_name, layout_name, bc->clips, bc->count);
printf("%-40s %8d %12lld cpu-ns/op",
name, r->iters, (long long)cpu_per_op);
if (r->gpu_ns > 0) {
printf(" %12lld gpu-ns/op", (long long)gpu_per_op);
}
printf("\n");
fflush(stdout);
}
int main(int argc, char *argv[]) {
int reruns = 1;
static const struct option long_options[] = {
{ "count", required_argument, NULL, 'c' },
{ 0, 0, 0, 0 },
};
int opt;
while ((opt = getopt_long_only(argc, argv, "", long_options, NULL)) != -1) {
switch (opt) {
case 'c':
reruns = atoi(optarg);
if (reruns <= 0) {
fprintf(stderr, "count must be positive\n");
return 1;
}
break;
default:
fprintf(stderr, "Usage: %s [-count=N]\n", argv[0]);
return 1;
}
}
struct bench_ctx ctx = {0};
bench_ctx_init(&ctx);
static const int primitives[] = { RECT, TEXTURE, -1 };
static const int layouts[] = { STACKED, GRID, -1 };
static const int clips[] = { 1, 200, -1 };
static const int counts[] = { 1, 4, 16, 64, 256, 1024, -1 };
// *art*.
for (int pi = 0; primitives[pi] != -1; pi++) {
for (int li = 0; layouts[li] != -1; li++) {
for (int ci = 0; clips[ci] != -1; ci++) {
for (int ni = 0; counts[ni] != -1; ni++) {
for (int ri = 0; ri < reruns; ri++) {
struct bench_case bc = {
.primitive = primitives[pi],
.layout = layouts[li],
.clips = clips[ci],
.count = counts[ni],
};
struct bench_result result =
run_benchmark(&ctx, &bc);
print_result(&bc, &result);
}
}
}
}
}
bench_ctx_finish(&ctx);
return 0;
}

View file

@ -8,3 +8,9 @@ benchmark(
executable('bench-scene', 'bench_scene.c', dependencies: wlroots),
timeout: 30,
)
benchmark(
'render-pass',
executable('bench-render-pass', 'bench_render_pass.c', dependencies: wlroots),
timeout: 30,
)