mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-10-29 05:40:23 -04:00
atomic: fix load and store for armv7 and higher
The original atomic implementation in pulseaudio based on libatomic stated that the intent was to use full memory barriers. According to [1], the load and store implementation based on gcc builtins matches sequential consistent (i.e. full memory barrier) load and store ordering only for x86. I observed random crashes in client applications using memfd srbchannel transport on an armv8-aarch64 platform (cortex-a57). In all those crashes the first read on the pstream descriptor (the size field) was wrong and looked like it contained old data. I boiled the relevant parts of the srbchannel implementation down to a simple test case and could observe random test failures. So I figured that the atomic implementation was broken for armv8 with respect to cross-cpu memory access ordering consistency. In order to come up with a minimal fix, I used the newer __atomic_load_n/__atomic_store_n builtins from gcc. With aarch64-linux-gnu-gcc (Linaro GCC 7.3-2018.05) 7.3.1 20180425 they compile to ldar and stlxr on arm64, which is correct according to [1] and [2]. The other atomic operations based on __sync builtins don't need to be touched since they already are of the full memory barrier variety. [1] https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html [2] <https://community.arm.com/developer/ip-products/processors /b/processors-ip-blog/posts/armv8-a-architecture-2016-additions>
This commit is contained in:
parent
12bb46a768
commit
d4ff4adce2
5 changed files with 198 additions and 1 deletions
12
configure.ac
12
configure.ac
|
|
@ -246,6 +246,15 @@ fi
|
|||
# If everything else fails use libatomic_ops
|
||||
need_libatomic_ops=yes
|
||||
|
||||
AC_CACHE_CHECK([whether $CC knows __atomic_store_n()],
|
||||
pulseaudio_cv_atomic_store_n, [
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([], [[int c = 0; __atomic_store_n(&c, 4, __ATOMIC_SEQ_CST);]])],
|
||||
[pulseaudio_cv_atomic_store_n=yes],
|
||||
[pulseaudio_cv_atomic_store_n=no])
|
||||
])
|
||||
|
||||
|
||||
AC_CACHE_CHECK([whether $CC knows __sync_bool_compare_and_swap()],
|
||||
pulseaudio_cv_sync_bool_compare_and_swap, [
|
||||
AC_LINK_IFELSE(
|
||||
|
|
@ -256,6 +265,9 @@ AC_CACHE_CHECK([whether $CC knows __sync_bool_compare_and_swap()],
|
|||
|
||||
if test "$pulseaudio_cv_sync_bool_compare_and_swap" = "yes" ; then
|
||||
AC_DEFINE([HAVE_ATOMIC_BUILTINS], 1, [Have __sync_bool_compare_and_swap() and friends.])
|
||||
if test "$pulseaudio_cv_atomic_store_n" = "yes" ; then
|
||||
AC_DEFINE([HAVE_ATOMIC_BUILTINS_MEMORY_MODEL], 1, [Have __atomic_store_n() and friends.])
|
||||
fi
|
||||
need_libatomic_ops=no
|
||||
else
|
||||
# HW specific atomic ops stuff
|
||||
|
|
|
|||
11
meson.build
11
meson.build
|
|
@ -285,12 +285,23 @@ if shm_dep.found()
|
|||
cdata.set('HAVE_SHM_OPEN', 1)
|
||||
endif
|
||||
|
||||
|
||||
atomictest = '''void func() {
|
||||
volatile int atomic = 2;
|
||||
__sync_bool_compare_and_swap (&atomic, 2, 3);
|
||||
}
|
||||
'''
|
||||
if cc.compiles(atomictest)
|
||||
newatomictest = '''void func() {
|
||||
int c = 0;
|
||||
__atomic_store_n(&c, 4, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
'''
|
||||
|
||||
if(cc.compiles(newatomictest))
|
||||
cdata.set('HAVE_ATOMIC_BUILTINS_MEMORY_MODEL', true)
|
||||
endif
|
||||
|
||||
cdata.set('HAVE_ATOMIC_BUILTINS', true)
|
||||
else
|
||||
# FIXME: check if we need libatomic_ops
|
||||
|
|
|
|||
|
|
@ -293,7 +293,8 @@ TESTS_norun = \
|
|||
sig2str-test \
|
||||
stripnul \
|
||||
echo-cancel-test \
|
||||
lo-latency-test
|
||||
lo-latency-test \
|
||||
atomic-test
|
||||
|
||||
# These tests need a running pulseaudio daemon
|
||||
TESTS_daemon = \
|
||||
|
|
@ -412,6 +413,11 @@ srbchannel_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
|
|||
srbchannel_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon-@PA_MAJORMINOR@.la
|
||||
srbchannel_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
|
||||
|
||||
atomic_test_SOURCES = tests/atomic-test.c
|
||||
atomic_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
|
||||
atomic_test_LDADD = $(AM_LDADD) libpulsecommon-@PA_MAJORMINOR@.la libpulse.la
|
||||
atomic_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
|
||||
|
||||
get_binary_name_test_SOURCES = tests/get-binary-name-test.c
|
||||
get_binary_name_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
|
||||
get_binary_name_test_LDADD = $(AM_LDADD) libpulse.la libpulsecommon-@PA_MAJORMINOR@.la
|
||||
|
|
|
|||
|
|
@ -50,6 +50,20 @@ typedef struct pa_atomic {
|
|||
|
||||
#define PA_ATOMIC_INIT(v) { .value = (v) }
|
||||
|
||||
#ifdef HAVE_ATOMIC_BUILTINS_MEMORY_MODEL
|
||||
|
||||
/* __atomic based implementation */
|
||||
|
||||
static inline int pa_atomic_load(const pa_atomic_t *a) {
|
||||
return __atomic_load_n(&a->value, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline void pa_atomic_store(pa_atomic_t *a, int i) {
|
||||
__atomic_store_n(&a->value, i, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int pa_atomic_load(const pa_atomic_t *a) {
|
||||
__sync_synchronize();
|
||||
return a->value;
|
||||
|
|
@ -60,6 +74,9 @@ static inline void pa_atomic_store(pa_atomic_t *a, int i) {
|
|||
__sync_synchronize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* Returns the previously set value */
|
||||
static inline int pa_atomic_add(pa_atomic_t *a, int i) {
|
||||
return __sync_fetch_and_add(&a->value, i);
|
||||
|
|
@ -91,6 +108,20 @@ typedef struct pa_atomic_ptr {
|
|||
|
||||
#define PA_ATOMIC_PTR_INIT(v) { .value = (long) (v) }
|
||||
|
||||
#ifdef HAVE_ATOMIC_BUILTINS_MEMORY_MODEL
|
||||
|
||||
/* __atomic based implementation */
|
||||
|
||||
static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
|
||||
return (void*) __atomic_load_n(&a->value, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void* p) {
|
||||
__atomic_store_n(&a->value, p, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
|
||||
__sync_synchronize();
|
||||
return (void*) a->value;
|
||||
|
|
@ -101,6 +132,8 @@ static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void *p) {
|
|||
__sync_synchronize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool pa_atomic_ptr_cmpxchg(pa_atomic_ptr_t *a, void *old_p, void* new_p) {
|
||||
return __sync_bool_compare_and_swap(&a->value, (long) old_p, (long) new_p);
|
||||
}
|
||||
|
|
|
|||
135
src/tests/atomic-test.c
Normal file
135
src/tests/atomic-test.c
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
/***
|
||||
This file is part of PulseAudio.
|
||||
|
||||
Copyright 2014 David Henningsson, Canonical Ltd.
|
||||
|
||||
PulseAudio is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 2.1 of the License,
|
||||
or (at your option) any later version.
|
||||
|
||||
PulseAudio is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
/* This test spawns two threads on distinct cpu-cores that pass a value
|
||||
* between each other through shared memory protected by pa_atomic_t.
|
||||
* Thread "left" continuously increments a value and writes its contents to memory.
|
||||
* Thread "right" continuously reads the value and checks whether it was incremented.
|
||||
*
|
||||
* With the pa_atomic_load/pa_atomic_store implementations based on __sync_synchronize,
|
||||
* this will fail after some time (sometimes 2 seconds, sometimes 8 hours) at least
|
||||
* on ARM Cortex-A53 and ARM Cortex-A57 systems.
|
||||
*
|
||||
* On x86_64, it does not.
|
||||
*
|
||||
* The chosen implementation in some way mimics a situation that can also occur
|
||||
* using memfd srbchannel transport.
|
||||
*
|
||||
* NOTE: This is a long-running test, so don't execute in normal test suite.
|
||||
*
|
||||
* */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
#include <check.h>
|
||||
|
||||
#include <pulsecore/thread.h>
|
||||
#include <pulse/rtclock.h>
|
||||
#include <pulse/xmalloc.h>
|
||||
#include <pulsecore/semaphore.h>
|
||||
#include <pthread.h>
|
||||
#include <pulsecore/atomic.h>
|
||||
|
||||
#define MEMORY_SIZE (8 * 2 * 1024 * 1024)
|
||||
|
||||
|
||||
typedef struct io_t {
|
||||
pa_atomic_t *flag;
|
||||
char* memory;
|
||||
cpu_set_t cpuset;
|
||||
} io_t;
|
||||
|
||||
static void read_func(void* data) {
|
||||
io_t *io = (io_t *) data;
|
||||
size_t expect = 0;
|
||||
size_t value = 0;
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &io->cpuset);
|
||||
while(1) {
|
||||
if(pa_atomic_load(io->flag) == 1) {
|
||||
memcpy(&value, io->memory, sizeof(value));
|
||||
pa_atomic_sub(io->flag, 1);
|
||||
ck_assert_uint_eq(value, expect);
|
||||
++expect;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void write_func(void* data) {
|
||||
io_t *io = (io_t *) data;
|
||||
size_t value = 0;
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &io->cpuset);
|
||||
while(1) {
|
||||
if(pa_atomic_load(io->flag) == 0) {
|
||||
memcpy(io->memory, &value, sizeof(value));
|
||||
pa_atomic_add(io->flag, 1);
|
||||
++value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
START_TEST (atomic_test) {
|
||||
pa_thread *thread1, *thread2;
|
||||
io_t io1, io2;
|
||||
|
||||
char* memory = pa_xmalloc0(MEMORY_SIZE);
|
||||
pa_atomic_t flag = PA_ATOMIC_INIT(0);
|
||||
memset(memory, 0, MEMORY_SIZE);
|
||||
|
||||
/* intentionally misalign memory since srbchannel also does not
|
||||
* always read/write aligned. Might be a red hering. */
|
||||
io1.memory = io2.memory = memory + 1025;
|
||||
io1.flag = io2.flag = &flag;
|
||||
|
||||
CPU_ZERO(&io1.cpuset);
|
||||
CPU_SET(1, &io1.cpuset);
|
||||
thread1 = pa_thread_new("left", &write_func, &io1);
|
||||
|
||||
CPU_ZERO(&io2.cpuset);
|
||||
CPU_SET(3, &io2.cpuset);
|
||||
thread2 = pa_thread_new("right", &read_func, &io2);
|
||||
pa_thread_free(thread1);
|
||||
pa_thread_free(thread2);
|
||||
pa_xfree(memory);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int failed = 0;
|
||||
Suite *s;
|
||||
TCase *tc;
|
||||
SRunner *sr;
|
||||
|
||||
if (!getenv("MAKE_CHECK"))
|
||||
pa_log_set_level(PA_LOG_DEBUG);
|
||||
|
||||
s = suite_create("atomic");
|
||||
tc = tcase_create("atomic");
|
||||
tcase_add_test(tc, atomic_test);
|
||||
suite_add_tcase(s, tc);
|
||||
|
||||
sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
failed = srunner_ntests_failed(sr);
|
||||
srunner_free(sr);
|
||||
|
||||
return (failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue