1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/thirdparty/libatomic_ops/atomic_ops/sysdeps/gcc/arm.h
2022-08-14 13:16:52 +03:00

743 lines
25 KiB
C

/*
* Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
* Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
* Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
* Copyright (c) 2008-2017 Ivan Maidanski
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program
* for any purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is granted,
* provided the above notices are retained, and a notice that the code was
* modified is included with the above copyright notice.
*
*/
#if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 5)) \
&& !defined(AO_DISABLE_GCC_ATOMICS)
/* Probably, it could be enabled even for earlier gcc/clang versions. */
# define AO_GCC_ATOMIC_TEST_AND_SET
#endif
#ifdef __native_client__
/* Mask instruction should immediately precede access instruction. */
# define AO_MASK_PTR(reg) " bical " reg ", " reg ", #0xc0000000\n"
# define AO_BR_ALIGN " .align 4\n"
#else
# define AO_MASK_PTR(reg) /* empty */
# define AO_BR_ALIGN /* empty */
#endif
#if defined(__thumb__) && !defined(__thumb2__)
/* Thumb One mode does not have ARM "mcr", "swp" and some load/store */
/* instructions, so we temporarily switch to ARM mode and go back */
/* afterwards (clobbering "r3" register). */
# define AO_THUMB_GO_ARM \
" adr r3, 4f\n" \
" bx r3\n" \
" .align\n" \
" .arm\n" \
AO_BR_ALIGN \
"4:\n"
# define AO_THUMB_RESTORE_MODE \
" adr r3, 5f + 1\n" \
" bx r3\n" \
" .thumb\n" \
AO_BR_ALIGN \
"5:\n"
# define AO_THUMB_SWITCH_CLOBBERS "r3",
#else
# define AO_THUMB_GO_ARM /* empty */
# define AO_THUMB_RESTORE_MODE /* empty */
# define AO_THUMB_SWITCH_CLOBBERS /* empty */
#endif /* !__thumb__ */
/* NEC LE-IT: gcc has no way to easily check the arm architecture */
/* but it defines only one (or several) of __ARM_ARCH_x__ to be true. */
#if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_3__) \
&& !defined(__ARM_ARCH_3M__) && !defined(__ARM_ARCH_4__) \
&& !defined(__ARM_ARCH_4T__) \
&& ((!defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5E__) \
&& !defined(__ARM_ARCH_5T__) && !defined(__ARM_ARCH_5TE__) \
&& !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6M__)) \
|| defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
|| defined(__ARM_ARCH_8A__))
# define AO_ARM_HAVE_LDREX
# if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
&& !defined(__ARM_ARCH_6T2__)
/* LDREXB/STREXB and LDREXH/STREXH are present in ARMv6K/Z+. */
# define AO_ARM_HAVE_LDREXBH
# endif
# if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
&& !defined(__ARM_ARCH_6T2__) && !defined(__ARM_ARCH_6Z__) \
&& !defined(__ARM_ARCH_6ZT2__)
# if !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6KZ__) \
&& !defined(__ARM_ARCH_6ZK__)
/* DMB is present in ARMv6M and ARMv7+. */
# define AO_ARM_HAVE_DMB
# endif
# if (!defined(__thumb__) \
|| (defined(__thumb2__) && !defined(__ARM_ARCH_7__) \
&& !defined(__ARM_ARCH_7M__) && !defined(__ARM_ARCH_7EM__))) \
&& (!defined(__clang__) || AO_CLANG_PREREQ(3, 3))
/* LDREXD/STREXD present in ARMv6K/M+ (see gas/config/tc-arm.c). */
/* In the Thumb mode, this works only starting from ARMv7 (except */
/* for the base and 'M' models). Clang3.2 (and earlier) does not */
/* allocate register pairs for LDREXD/STREXD properly (besides, */
/* Clang3.1 does not support "%H<r>" operand specification). */
# define AO_ARM_HAVE_LDREXD
# endif /* !thumb || ARMv7A || ARMv7R+ */
# endif /* ARMv7+ */
#endif /* ARMv6+ */
#if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_6M__) \
&& !defined(__ARM_ARCH_8A__) && !defined(__thumb2__)
# define AO_ARM_HAVE_SWP
/* Note: ARMv6M is excluded due to no ARM mode support. */
/* Also, SWP is obsoleted for ARMv8+. */
#endif /* !__thumb2__ */
#if !defined(AO_UNIPROCESSOR) && defined(AO_ARM_HAVE_DMB) \
&& !defined(AO_PREFER_BUILTIN_ATOMICS)
AO_INLINE void
AO_nop_write(void)
{
/* AO_THUMB_GO_ARM is empty. */
/* This will target the system domain and thus be overly */
/* conservative as the CPUs (even in case of big.LITTLE SoC) will */
/* occupy the inner shareable domain. */
/* The plain variant (dmb st) is theoretically slower, and should */
/* not be needed. That said, with limited experimentation, a CPU */
/* implementation for which it actually matters has not been found */
/* yet, though they should already exist. */
/* Anyway, note that the "st" and "ishst" barriers are actually */
/* quite weak and, as the libatomic_ops documentation states, */
/* usually not what you really want. */
__asm__ __volatile__("dmb ishst" : : : "memory");
}
# define AO_HAVE_nop_write
#endif /* AO_ARM_HAVE_DMB */
#ifndef AO_GCC_ATOMIC_TEST_AND_SET
#ifdef AO_UNIPROCESSOR
/* If only a single processor (core) is used, AO_UNIPROCESSOR could */
/* be defined by the client to avoid unnecessary memory barrier. */
AO_INLINE void
AO_nop_full(void)
{
AO_compiler_barrier();
}
# define AO_HAVE_nop_full
#elif defined(AO_ARM_HAVE_DMB)
/* ARMv7 is compatible to ARMv6 but has a simpler command for issuing */
/* a memory barrier (DMB). Raising it via CP15 should still work */
/* (but slightly less efficient because it requires the use of */
/* a general-purpose register). */
AO_INLINE void
AO_nop_full(void)
{
/* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("dmb" : : : "memory");
}
# define AO_HAVE_nop_full
#elif defined(AO_ARM_HAVE_LDREX)
/* ARMv6 is the first architecture providing support for a simple */
/* LL/SC. A data memory barrier must be raised via CP15 command. */
AO_INLINE void
AO_nop_full(void)
{
unsigned dest = 0;
/* Issue a data memory barrier (keeps ordering of memory */
/* transactions before and after this operation). */
__asm__ __volatile__("@AO_nop_full\n"
AO_THUMB_GO_ARM
" mcr p15,0,%0,c7,c10,5\n"
AO_THUMB_RESTORE_MODE
: "=&r"(dest)
: /* empty */
: AO_THUMB_SWITCH_CLOBBERS "memory");
}
# define AO_HAVE_nop_full
#else
/* AO_nop_full() is emulated using AO_test_and_set_full(). */
#endif /* !AO_UNIPROCESSOR && !AO_ARM_HAVE_LDREX */
#endif /* !AO_GCC_ATOMIC_TEST_AND_SET */
#ifdef AO_ARM_HAVE_LDREX
/* "ARM Architecture Reference Manual" (chapter A3.5.3) says that the */
/* single-copy atomic processor accesses are all byte accesses, all */
/* halfword accesses to halfword-aligned locations, all word accesses */
/* to word-aligned locations. */
/* There is only a single concern related to AO store operations: */
/* a direct write (by STR[B/H] instruction) will not be recognized */
/* by the LL/SC construct on the same CPU (i.e., according to ARM */
/* documentation, e.g., see CortexA8 TRM reference, point 8.5, */
/* atomic "store" (using LDREX/STREX[B/H]) is the only safe way to */
/* set variables also used in LL/SC environment). */
/* This is only a problem if interrupt handlers do not clear the */
/* reservation (by CLREX instruction or a dummy STREX one), as they */
/* almost certainly should (e.g., see restore_user_regs defined in */
/* arch/arm/kernel/entry-header.S of Linux. Nonetheless, there is */
/* a doubt this was properly implemented in some ancient OS releases. */
# ifdef AO_BROKEN_TASKSWITCH_CLREX
# define AO_SKIPATOMIC_store
# define AO_SKIPATOMIC_store_release
# define AO_SKIPATOMIC_char_store
# define AO_SKIPATOMIC_char_store_release
# define AO_SKIPATOMIC_short_store
# define AO_SKIPATOMIC_short_store_release
# define AO_SKIPATOMIC_int_store
# define AO_SKIPATOMIC_int_store_release
# ifndef AO_PREFER_BUILTIN_ATOMICS
AO_INLINE void AO_store(volatile AO_t *addr, AO_t value)
{
int flag;
__asm__ __volatile__("@AO_store\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%2")
" ldrex %0, [%2]\n"
AO_MASK_PTR("%2")
" strex %0, %3, [%2]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (flag), "+m" (*addr)
: "r" (addr), "r" (value)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
# define AO_HAVE_store
# ifdef AO_ARM_HAVE_LDREXBH
AO_INLINE void AO_char_store(volatile unsigned char *addr,
unsigned char value)
{
int flag;
__asm__ __volatile__("@AO_char_store\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%2")
" ldrexb %0, [%2]\n"
AO_MASK_PTR("%2")
" strexb %0, %3, [%2]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (flag), "+m" (*addr)
: "r" (addr), "r" (value)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
# define AO_HAVE_char_store
AO_INLINE void AO_short_store(volatile unsigned short *addr,
unsigned short value)
{
int flag;
__asm__ __volatile__("@AO_short_store\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%2")
" ldrexh %0, [%2]\n"
AO_MASK_PTR("%2")
" strexh %0, %3, [%2]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (flag), "+m" (*addr)
: "r" (addr), "r" (value)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
# define AO_HAVE_short_store
# endif /* AO_ARM_HAVE_LDREXBH */
# endif /* !AO_PREFER_BUILTIN_ATOMICS */
# elif !defined(AO_GCC_ATOMIC_TEST_AND_SET)
# include "../loadstore/atomic_store.h"
/* AO_int_store is defined in ao_t_is_int.h. */
# endif /* !AO_BROKEN_TASKSWITCH_CLREX */
#endif /* AO_ARM_HAVE_LDREX */
#ifndef AO_GCC_ATOMIC_TEST_AND_SET
# include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */
#ifdef AO_ARM_HAVE_LDREX
/* AO_t/char/short/int load is simple reading. */
/* Unaligned accesses are not guaranteed to be atomic. */
# define AO_ACCESS_CHECK_ALIGNED
# define AO_ACCESS_short_CHECK_ALIGNED
# define AO_ACCESS_int_CHECK_ALIGNED
# include "../all_atomic_only_load.h"
# ifndef AO_HAVE_char_store
# include "../loadstore/char_atomic_store.h"
# include "../loadstore/short_atomic_store.h"
# endif
/* NEC LE-IT: replace the SWAP as recommended by ARM:
"Applies to: ARM11 Cores
Though the SWP instruction will still work with ARM V6 cores, it is
recommended to use the new V6 synchronization instructions. The SWP
instruction produces 'locked' read and write accesses which are atomic,
i.e. another operation cannot be done between these locked accesses which
ties up external bus (AHB, AXI) bandwidth and can increase worst case
interrupt latencies. LDREX, STREX are more flexible, other instructions
can be done between the LDREX and STREX accesses."
*/
#ifndef AO_PREFER_GENERALIZED
#if !defined(AO_FORCE_USE_SWP) || !defined(AO_ARM_HAVE_SWP)
/* But, on the other hand, there could be a considerable performance */
/* degradation in case of a race. Eg., test_atomic.c executing */
/* test_and_set test on a dual-core ARMv7 processor using LDREX/STREX */
/* showed around 35 times lower performance than that using SWP. */
/* To force use of SWP instruction, use -D AO_FORCE_USE_SWP option */
/* (the latter is ignored if SWP instruction is unsupported). */
AO_INLINE AO_TS_VAL_t
AO_test_and_set(volatile AO_TS_t *addr)
{
AO_TS_VAL_t oldval;
int flag;
__asm__ __volatile__("@AO_test_and_set\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%3")
" ldrex %0, [%3]\n"
AO_MASK_PTR("%3")
" strex %1, %4, [%3]\n"
" teq %1, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r"(oldval), "=&r"(flag), "+m"(*addr)
: "r"(addr), "r"(1)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return oldval;
}
# define AO_HAVE_test_and_set
#endif /* !AO_FORCE_USE_SWP */
AO_INLINE AO_t
AO_fetch_and_add(volatile AO_t *p, AO_t incr)
{
AO_t result, tmp;
int flag;
__asm__ __volatile__("@AO_fetch_and_add\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%5")
" ldrex %0, [%5]\n" /* get original */
" add %2, %0, %4\n" /* sum up in incr */
AO_MASK_PTR("%5")
" strex %1, %2, [%5]\n" /* store them */
" teq %1, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r"(result), "=&r"(flag), "=&r"(tmp), "+m"(*p) /* 0..3 */
: "r"(incr), "r"(p) /* 4..5 */
: AO_THUMB_SWITCH_CLOBBERS "cc");
return result;
}
#define AO_HAVE_fetch_and_add
AO_INLINE AO_t
AO_fetch_and_add1(volatile AO_t *p)
{
AO_t result, tmp;
int flag;
__asm__ __volatile__("@AO_fetch_and_add1\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%4")
" ldrex %0, [%4]\n" /* get original */
" add %1, %0, #1\n" /* increment */
AO_MASK_PTR("%4")
" strex %2, %1, [%4]\n" /* store them */
" teq %2, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
: "r"(p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return result;
}
#define AO_HAVE_fetch_and_add1
AO_INLINE AO_t
AO_fetch_and_sub1(volatile AO_t *p)
{
AO_t result, tmp;
int flag;
__asm__ __volatile__("@AO_fetch_and_sub1\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%4")
" ldrex %0, [%4]\n" /* get original */
" sub %1, %0, #1\n" /* decrement */
AO_MASK_PTR("%4")
" strex %2, %1, [%4]\n" /* store them */
" teq %2, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
: "r"(p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return result;
}
#define AO_HAVE_fetch_and_sub1
AO_INLINE void
AO_and(volatile AO_t *p, AO_t value)
{
AO_t tmp, result;
__asm__ __volatile__("@AO_and\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%4")
" ldrex %0, [%4]\n"
" and %1, %0, %3\n"
AO_MASK_PTR("%4")
" strex %0, %1, [%4]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (tmp), "=&r" (result), "+m" (*p)
: "r" (value), "r" (p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
#define AO_HAVE_and
AO_INLINE void
AO_or(volatile AO_t *p, AO_t value)
{
AO_t tmp, result;
__asm__ __volatile__("@AO_or\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%4")
" ldrex %0, [%4]\n"
" orr %1, %0, %3\n"
AO_MASK_PTR("%4")
" strex %0, %1, [%4]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (tmp), "=&r" (result), "+m" (*p)
: "r" (value), "r" (p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
#define AO_HAVE_or
AO_INLINE void
AO_xor(volatile AO_t *p, AO_t value)
{
AO_t tmp, result;
__asm__ __volatile__("@AO_xor\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%4")
" ldrex %0, [%4]\n"
" eor %1, %0, %3\n"
AO_MASK_PTR("%4")
" strex %0, %1, [%4]\n"
" teq %0, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (tmp), "=&r" (result), "+m" (*p)
: "r" (value), "r" (p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
}
#define AO_HAVE_xor
#endif /* !AO_PREFER_GENERALIZED */
#ifdef AO_ARM_HAVE_LDREXBH
AO_INLINE unsigned char
AO_char_fetch_and_add(volatile unsigned char *p, unsigned char incr)
{
unsigned result, tmp;
int flag;
__asm__ __volatile__("@AO_char_fetch_and_add\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%5")
" ldrexb %0, [%5]\n"
" add %2, %0, %4\n"
AO_MASK_PTR("%5")
" strexb %1, %2, [%5]\n"
" teq %1, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p)
: "r" ((unsigned)incr), "r" (p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return (unsigned char)result;
}
# define AO_HAVE_char_fetch_and_add
AO_INLINE unsigned short
AO_short_fetch_and_add(volatile unsigned short *p, unsigned short incr)
{
unsigned result, tmp;
int flag;
__asm__ __volatile__("@AO_short_fetch_and_add\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: " AO_MASK_PTR("%5")
" ldrexh %0, [%5]\n"
" add %2, %0, %4\n"
AO_MASK_PTR("%5")
" strexh %1, %2, [%5]\n"
" teq %1, #0\n"
" bne 1b\n"
AO_THUMB_RESTORE_MODE
: "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p)
: "r" ((unsigned)incr), "r" (p)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return (unsigned short)result;
}
# define AO_HAVE_short_fetch_and_add
#endif /* AO_ARM_HAVE_LDREXBH */
#ifndef AO_GENERALIZE_ASM_BOOL_CAS
/* Returns nonzero if the comparison succeeded. */
AO_INLINE int
AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
{
AO_t result, tmp;
__asm__ __volatile__("@AO_compare_and_swap\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: mov %0, #2\n" /* store a flag */
AO_MASK_PTR("%3")
" ldrex %1, [%3]\n" /* get original */
" teq %1, %4\n" /* see if match */
AO_MASK_PTR("%3")
# ifdef __thumb2__
/* TODO: Eliminate warning: it blocks containing wide Thumb */
/* instructions are deprecated in ARMv8. */
" it eq\n"
# endif
" strexeq %0, %5, [%3]\n" /* store new one if matched */
" teq %0, #1\n"
" beq 1b\n" /* if update failed, repeat */
AO_THUMB_RESTORE_MODE
: "=&r"(result), "=&r"(tmp), "+m"(*addr)
: "r"(addr), "r"(old_val), "r"(new_val)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return !(result&2); /* if succeeded then return 1 else 0 */
}
# define AO_HAVE_compare_and_swap
#endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
AO_INLINE AO_t
AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
{
AO_t fetched_val;
int flag;
__asm__ __volatile__("@AO_fetch_compare_and_swap\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN
"1: mov %0, #2\n" /* store a flag */
AO_MASK_PTR("%3")
" ldrex %1, [%3]\n" /* get original */
" teq %1, %4\n" /* see if match */
AO_MASK_PTR("%3")
# ifdef __thumb2__
" it eq\n"
# endif
" strexeq %0, %5, [%3]\n" /* store new one if matched */
" teq %0, #1\n"
" beq 1b\n" /* if update failed, repeat */
AO_THUMB_RESTORE_MODE
: "=&r"(flag), "=&r"(fetched_val), "+m"(*addr)
: "r"(addr), "r"(old_val), "r"(new_val)
: AO_THUMB_SWITCH_CLOBBERS "cc");
return fetched_val;
}
#define AO_HAVE_fetch_compare_and_swap
#ifdef AO_ARM_HAVE_LDREXD
# include "../standard_ao_double_t.h"
/* "ARM Architecture Reference Manual ARMv7-A/R edition" (chapter */
/* A3.5.3) says that memory accesses caused by LDREXD and STREXD */
/* instructions to doubleword-aligned locations are single-copy */
/* atomic; accesses to 64-bit elements by other instructions might */
/* not be single-copy atomic as they are executed as a sequence of */
/* 32-bit accesses. */
AO_INLINE AO_double_t
AO_double_load(const volatile AO_double_t *addr)
{
AO_double_t result;
/* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("@AO_double_load\n"
AO_MASK_PTR("%1")
" ldrexd %0, %H0, [%1]"
: "=&r" (result.AO_whole)
: "r" (addr)
/* : no clobber */);
return result;
}
# define AO_HAVE_double_load
AO_INLINE void
AO_double_store(volatile AO_double_t *addr, AO_double_t new_val)
{
AO_double_t old_val;
int status;
do {
/* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("@AO_double_store\n"
AO_MASK_PTR("%3")
" ldrexd %0, %H0, [%3]\n"
AO_MASK_PTR("%3")
" strexd %1, %4, %H4, [%3]"
: "=&r" (old_val.AO_whole), "=&r" (status), "+m" (*addr)
: "r" (addr), "r" (new_val.AO_whole)
: "cc");
} while (AO_EXPECT_FALSE(status));
}
# define AO_HAVE_double_store
AO_INLINE int
AO_double_compare_and_swap(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
double_ptr_storage tmp;
int result = 1;
do {
/* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("@AO_double_compare_and_swap\n"
AO_MASK_PTR("%1")
" ldrexd %0, %H0, [%1]\n" /* get original to r1 & r2 */
: "=&r"(tmp)
: "r"(addr)
/* : no clobber */);
if (tmp != old_val.AO_whole)
break;
__asm__ __volatile__(
AO_MASK_PTR("%2")
" strexd %0, %3, %H3, [%2]\n" /* store new one if matched */
: "=&r"(result), "+m"(*addr)
: "r" (addr), "r" (new_val.AO_whole)
: "cc");
} while (AO_EXPECT_FALSE(result));
return !result; /* if succeeded then return 1 else 0 */
}
# define AO_HAVE_double_compare_and_swap
#endif /* AO_ARM_HAVE_LDREXD */
#else
/* pre ARMv6 architectures ... */
/* I found a slide set that, if I read it correctly, claims that */
/* Loads followed by either a Load or Store are ordered, but nothing */
/* else is. */
/* It appears that SWP is the only simple memory barrier. */
#include "../all_aligned_atomic_load_store.h"
/* The code should run correctly on a multi-core ARMv6+ as well. */
#endif /* !AO_ARM_HAVE_LDREX */
#if !defined(AO_HAVE_test_and_set_full) && !defined(AO_HAVE_test_and_set) \
&& defined (AO_ARM_HAVE_SWP) && (!defined(AO_PREFER_GENERALIZED) \
|| !defined(AO_HAVE_fetch_compare_and_swap))
AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t *addr)
{
AO_TS_VAL_t oldval;
/* SWP on ARM is very similar to XCHG on x86. */
/* The first operand is the result, the second the value */
/* to be stored. Both registers must be different from addr. */
/* Make the address operand an early clobber output so it */
/* doesn't overlap with the other operands. The early clobber */
/* on oldval is necessary to prevent the compiler allocating */
/* them to the same register if they are both unused. */
__asm__ __volatile__("@AO_test_and_set_full\n"
AO_THUMB_GO_ARM
AO_MASK_PTR("%3")
" swp %0, %2, [%3]\n"
/* Ignore GCC "SWP is deprecated for this architecture" */
/* warning here (for ARMv6+). */
AO_THUMB_RESTORE_MODE
: "=&r"(oldval), "=&r"(addr)
: "r"(1), "1"(addr)
: AO_THUMB_SWITCH_CLOBBERS "memory");
return oldval;
}
# define AO_HAVE_test_and_set_full
#endif /* !AO_HAVE_test_and_set[_full] && AO_ARM_HAVE_SWP */
#define AO_T_IS_INT
#else /* AO_GCC_ATOMIC_TEST_AND_SET */
# if defined(__clang__) && !defined(AO_ARM_HAVE_LDREX)
/* As of clang-3.8, it cannot compile __atomic_and/or/xor_fetch */
/* library calls yet for pre ARMv6. */
# define AO_SKIPATOMIC_ANY_and_ANY
# define AO_SKIPATOMIC_ANY_or_ANY
# define AO_SKIPATOMIC_ANY_xor_ANY
# endif
# ifdef AO_ARM_HAVE_LDREXD
# include "../standard_ao_double_t.h"
# endif
# include "generic.h"
#endif /* AO_GCC_ATOMIC_TEST_AND_SET */
#undef AO_ARM_HAVE_DMB
#undef AO_ARM_HAVE_LDREX
#undef AO_ARM_HAVE_LDREXBH
#undef AO_ARM_HAVE_LDREXD
#undef AO_ARM_HAVE_SWP
#undef AO_BR_ALIGN
#undef AO_MASK_PTR
#undef AO_SKIPATOMIC_ANY_and_ANY
#undef AO_SKIPATOMIC_ANY_or_ANY
#undef AO_SKIPATOMIC_ANY_xor_ANY
#undef AO_SKIPATOMIC_char_store
#undef AO_SKIPATOMIC_char_store_release
#undef AO_SKIPATOMIC_int_store
#undef AO_SKIPATOMIC_int_store_release
#undef AO_SKIPATOMIC_short_store
#undef AO_SKIPATOMIC_short_store_release
#undef AO_SKIPATOMIC_store
#undef AO_SKIPATOMIC_store_release
#undef AO_THUMB_GO_ARM
#undef AO_THUMB_RESTORE_MODE
#undef AO_THUMB_SWITCH_CLOBBERS