atomic: fix load and store for armv7 and higher

The original atomic implementation in pulseaudio based on
libatomic stated that the intent was to use full memory barriers.

According to [1], the load and store implementation based on
gcc builtins matches sequential consistent (i.e. full memory barrier)
load and store ordering only for x86.

I observed random crashes in client applications using memfd srbchannel
transport on an armv8-aarch64 platform (cortex-a57).
In all those crashes the first read on the pstream descriptor
(the size field) was wrong and looked like it contained old data.
I boiled the relevant parts of the srbchannel implementation down to
a simple test case and could observe random test failures.
So I figured that the atomic implementation was broken for armv8
with respect to cross-cpu memory access ordering consistency.

In order to come up with a minimal fix, I used the newer
__atomic_load_n/__atomic_store_n builtins from gcc.

With
aarch64-linux-gnu-gcc (Linaro GCC 7.3-2018.05) 7.3.1 20180425
they compile to
ldar and stlxr on arm64, which is correct according to [1] and [2].

The other atomic operations based on __sync builtins don't need
to be touched since they already are of the full memory barrier
variety.

[1] https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
[2] <https://community.arm.com/developer/ip-products/processors
    /b/processors-ip-blog/posts/armv8-a-architecture-2016-additions>
This commit is contained in:
Thomas Hutschenreuther 2019-05-29 19:20:37 +02:00 committed by Tanu Kaskinen
parent 12bb46a768
commit d4ff4adce2
5 changed files with 198 additions and 1 deletions

View file

@ -50,6 +50,20 @@ typedef struct pa_atomic {
#define PA_ATOMIC_INIT(v) { .value = (v) }
#ifdef HAVE_ATOMIC_BUILTINS_MEMORY_MODEL
/* __atomic based implementation */
static inline int pa_atomic_load(const pa_atomic_t *a) {
return __atomic_load_n(&a->value, __ATOMIC_SEQ_CST);
}
static inline void pa_atomic_store(pa_atomic_t *a, int i) {
__atomic_store_n(&a->value, i, __ATOMIC_SEQ_CST);
}
#else
static inline int pa_atomic_load(const pa_atomic_t *a) {
__sync_synchronize();
return a->value;
@ -60,6 +74,9 @@ static inline void pa_atomic_store(pa_atomic_t *a, int i) {
__sync_synchronize();
}
#endif
/* Returns the previously set value */
static inline int pa_atomic_add(pa_atomic_t *a, int i) {
return __sync_fetch_and_add(&a->value, i);
@ -91,6 +108,20 @@ typedef struct pa_atomic_ptr {
#define PA_ATOMIC_PTR_INIT(v) { .value = (long) (v) }
#ifdef HAVE_ATOMIC_BUILTINS_MEMORY_MODEL
/* __atomic based implementation */
static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
return (void*) __atomic_load_n(&a->value, __ATOMIC_SEQ_CST);
}
static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void* p) {
__atomic_store_n(&a->value, p, __ATOMIC_SEQ_CST);
}
#else
static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
__sync_synchronize();
return (void*) a->value;
@ -101,6 +132,8 @@ static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void *p) {
__sync_synchronize();
}
#endif
static inline bool pa_atomic_ptr_cmpxchg(pa_atomic_ptr_t *a, void *old_p, void* new_p) {
return __sync_bool_compare_and_swap(&a->value, (long) old_p, (long) new_p);
}