diff options
author | CyrIng <labs@cyring.fr> | 2024-05-04 16:46:30 +0200 |
---|---|---|
committer | CyrIng <labs@cyring.fr> | 2024-05-04 16:46:30 +0200 |
commit | 25434d9ddfa2c3e1b7c7faa806d3258d50813fed (patch) | |
tree | afc9551a5a73fdbabc7b52711ac8c2d35a2c8cca | |
parent | 29c8c1f8f4b23de42ff2bff1d809b8bc09ab8c17 (diff) |
[AArch64] Providing atomic bit ops with Load/Store Exclusive
-rw-r--r-- | aarch64/arm_reg.h | 2 | ||||
-rw-r--r-- | aarch64/bitasm.h | 194 | ||||
-rw-r--r-- | aarch64/corefreqk.c | 2 |
3 files changed, 132 insertions, 66 deletions
diff --git a/aarch64/arm_reg.h b/aarch64/arm_reg.h index 868f7c1..34e6125 100644 --- a/aarch64/arm_reg.h +++ b/aarch64/arm_reg.h @@ -390,7 +390,7 @@ typedef union SHA1 : 12-8, SHA2 : 16-12, CRC32 : 20-16, - CAS : 24-20, + Atomic : 24-20, TME : 28-24, RDM : 32-28, SHA3 : 36-32, diff --git a/aarch64/bitasm.h b/aarch64/bitasm.h index 18bc77c..7669ddf 100644 --- a/aarch64/bitasm.h +++ b/aarch64/bitasm.h @@ -53,8 +53,8 @@ typedef unsigned int Bit32; #define InitCC(_val) {[0 ... CORE_WORD_TOP(CORE_COUNT) - 1] = _val} #endif -#define LOCKLESS " " -#define BUS_LOCK "lock " +#define LOCKLESS LOCK_LESS +#define BUS_LOCK FULL_LOCK #define BARRIER(pfx) \ __asm__ volatile \ @@ -205,142 +205,196 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) #else /* LEGACY */ +#define _BITSET_PRE_INST_FULL_LOCK \ + "1:" "\n\t" \ + "ldxr x11, [%[addr]]" "\n\t" \ + +#define _BITSET_PRE_INST_LOCK_LESS \ + "ldr x11, [%[addr]]" "\n\t" \ + +#define _BITSET_COMMON_INST \ + "tst x11, x12" "\n\t" \ + "cset w10, ne" "\n\t" \ + "strb w10, %[ret]" "\n\t" \ + "orr x11, x11, x12" "\n\t" \ + +#define _BITSET_POST_INST_FULL_LOCK \ + "stxr w9, x11, [%[addr]]" "\n\t" \ + "cbnz w9, 1b" "\n\t" \ + "dmb ish" + +#define _BITSET_POST_INST_LOCK_LESS \ + "str x11, [%[addr]]" \ + +#define _BITSET_CLOBBERS_FULL_LOCK \ + : "cc", "memory", "%w9", "%w10", "%x11", "%x12" \ + +#define _BITSET_CLOBBERS_LOCK_LESS \ + : "cc", "memory", "%w10", "%x11", "%x12" \ + #define _BITSET_GPR(_lock, _base, _offset) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[offset]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "orr x11, x11, x12" "\n\t" \ - "str x11, [x13]" "\n\t" \ - "strb w10, %[ret]" \ + _BITSET_PRE_INST_##_lock \ + _BITSET_COMMON_INST \ + _BITSET_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [offset] "r" (_offset) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BITSET_CLOBBERS_##_lock \ ); \ _ret; \ }) #define _BITSET_IMM(_lock, _base, _imm6) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[imm6]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "orr x11, x11, x12" "\n\t" \ - "str x11, [x13]" "\n\t" \ - "strb w10, %[ret]" \ + _BITSET_PRE_INST_##_lock \ + _BITSET_COMMON_INST \ + _BITSET_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [imm6] "i" (_imm6) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BITSET_CLOBBERS_##_lock \ ); \ _ret; \ }) +#define _BITCLR_PRE_INST_FULL_LOCK \ + "1:" "\n\t" \ + "ldxr x11, [%[addr]]" "\n\t" \ + +#define _BITCLR_PRE_INST_LOCK_LESS \ + "ldr x11, [%[addr]]" "\n\t" \ + +#define _BITCLR_COMMON_INST \ + "tst x11, x12" "\n\t" \ + "cset w10, ne" "\n\t" \ + "strb w10, %[ret]" "\n\t" \ + "bic x11, x11, x12" "\n\t" \ + +#define _BITCLR_POST_INST_FULL_LOCK \ + "stxr w9, x11, [%[addr]]" "\n\t" \ + "cbnz w9, 1b" "\n\t" \ + "dmb ish" + +#define _BITCLR_POST_INST_LOCK_LESS \ + "str x11, [%[addr]]" \ + +#define _BITCLR_CLOBBERS_FULL_LOCK \ + : "cc", "memory", "%w9", "%w10", "%x11", "%x12" \ + +#define _BITCLR_CLOBBERS_LOCK_LESS \ + : "cc", "memory", "%w10", "%x11", "%x12" \ + #define _BITCLR_GPR(_lock, _base, _offset) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[offset]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "bic x11, x11, x12" "\n\t" \ - "str x11, [x13]" "\n\t" \ - "strb w10, %[ret]" \ + _BITCLR_PRE_INST_##_lock \ + _BITCLR_COMMON_INST \ + _BITCLR_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [offset] "r" (_offset) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BITCLR_CLOBBERS_##_lock \ ); \ _ret; \ }) #define _BITCLR_IMM(_lock, _base, _imm6) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[imm6]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "bic x11, x11, x12" "\n\t" \ - "str x11, [x13]" "\n\t" \ - "strb w10, %[ret]" \ + _BITCLR_PRE_INST_##_lock \ + _BITCLR_COMMON_INST \ + _BITCLR_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [imm6] "i" (_imm6) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BITCLR_CLOBBERS_##_lock \ ); \ _ret; \ }) -#define _BIT_TEST_GPR(_base, _offset) \ +#define _BIT_TEST_PRE_INST_FULL_LOCK \ + "1:" "\n\t" \ + "ldxr x11, [%[addr]]" "\n\t" \ + +#define _BIT_TEST_PRE_INST_LOCK_LESS \ + "ldr x11, [%[addr]]" "\n\t" \ + +#define _BIT_TEST_COMMON_INST \ + "tst x11, x12" "\n\t" \ + "cset w10, ne" "\n\t" \ + "strb w10, %[ret]" "\n\t" \ + +#define _BIT_TEST_POST_INST_FULL_LOCK \ + "stxr w9, x11, [%[addr]]" "\n\t" \ + "cbnz w9, 1b" "\n\t" \ + "dmb ish" + +#define _BIT_TEST_POST_INST_LOCK_LESS \ + "# NOP" + +#define _BIT_TEST_CLOBBERS_FULL_LOCK \ + : "cc", "memory", "%w9", "%w10", "%x11", "%x12" \ + +#define _BIT_TEST_CLOBBERS_LOCK_LESS \ + : "cc", "memory", "%w10", "%x11", "%x12" \ + +#define _BIT_TEST_GPR(_lock, _base, _offset) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[offset]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "strb w10, %[ret]" \ + _BIT_TEST_PRE_INST_##_lock \ + _BIT_TEST_COMMON_INST \ + _BIT_TEST_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [offset] "r" (_offset) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BIT_TEST_CLOBBERS_##_lock \ ); \ _ret; \ }) -#define _BIT_TEST_IMM(_base, _imm6) \ +#define _BIT_TEST_IMM(_lock, _base, _imm6) \ ({ \ - const __typeof__(_base) *_adr = &_base; \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ "mov x12, #1" "\n\t" \ "lsl x12, x12, %[imm6]" "\n\t" \ - "ldr x13, %[base]" "\n\t" \ - "ldr x11, [x13]" "\n\t" \ - "tst x11, x12" "\n\t" \ - "cset w10, ne" "\n\t" \ - "strb w10, %[ret]" \ + _BIT_TEST_PRE_INST_##_lock \ + _BIT_TEST_COMMON_INST \ + _BIT_TEST_POST_INST_##_lock \ : [ret] "=m" (_ret) \ - : [base] "m" (_adr), \ + : [addr] "r" (&_base), \ [imm6] "i" (_imm6) \ - : "cc", "memory", "%w10", "%x11", "%x12", "%x13" \ + _BIT_TEST_CLOBBERS_##_lock \ ); \ _ret; \ }) @@ -409,14 +463,26 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) : _BITCLR_GPR(_lock, _base, _offset) \ ) -#define BITVAL_2xPARAM(_base, _offset) \ +#define BITVAL_3xPARAM(_lock, _base, _offset) \ ( \ __builtin_constant_p(_offset) ? \ - _BIT_TEST_IMM(_base, _offset) \ - : _BIT_TEST_GPR(_base, _offset) \ + _BIT_TEST_IMM(_lock, _base, _offset) \ + : _BIT_TEST_GPR(_lock, _base, _offset) \ +) + +#define BITVAL_2xPARAM(_base, _offset) \ +( \ + BITVAL_3xPARAM(LOCKLESS, _base, _offset) \ ) -#define BITVAL(...) BITVAL_2xPARAM( __VA_ARGS__ ) +#define BITVAL_DISPATCH( _1, _2, _3, BITVAL_CURSOR, ... ) \ + BITVAL_CURSOR + +#define BITVAL(...) \ + BITVAL_DISPATCH(__VA_ARGS__, BITVAL_3xPARAM, /*3*/ \ + BITVAL_2xPARAM, /*2*/ \ + NULL) /*1*/ \ + ( __VA_ARGS__ ) #define BITCPL(_src) \ ({ \ diff --git a/aarch64/corefreqk.c b/aarch64/corefreqk.c index e1d006b..46eec31 100644 --- a/aarch64/corefreqk.c +++ b/aarch64/corefreqk.c @@ -659,7 +659,7 @@ static void Query_Features(void *pArg) iArg->Features->CRC32 = 0; break; } - switch (isar0.CAS) { + switch (isar0.Atomic) { case 0b0011: iArg->Features->LSE128 = 1; fallthrough; |