diff options
author | CyrIng <labs@cyring.fr> | 2024-05-09 13:23:51 +0200 |
---|---|---|
committer | CyrIng <labs@cyring.fr> | 2024-05-09 13:23:51 +0200 |
commit | ced129d32daa89367b422cc3039ede4dd741bd12 (patch) | |
tree | 64eee403f5f3c6bfe7017bcc53896429c34fefc6 | |
parent | b8dae58f034ec8561cfe4ca17afa176b4417a2a3 (diff) |
[AArch64] Removed `LEGACY` ops and optimized `CORE_COUNT` macros
-rw-r--r-- | aarch64/bitasm.h | 310 |
1 files changed, 40 insertions, 270 deletions
diff --git a/aarch64/bitasm.h b/aarch64/bitasm.h index 49a7d3d..43b48e4 100644 --- a/aarch64/bitasm.h +++ b/aarch64/bitasm.h @@ -139,72 +139,6 @@ __asm__ volatile \ #define RDTSC_PMCx1(mem_tsc, ...) \ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) -#if defined(LEGACY) && LEGACY > 0 - -#define _BITSET_GPR(_lock, _base, _offset) \ -({ \ - const __typeof__(_base) _shl = 1LLU << _offset; \ - const unsigned char _ret = ((_base) & (_shl)) != 0; \ - _base = (_base) | (_shl); \ - _ret; \ -}) - -#define _BITSET_IMM(_lock, _base, _imm6) \ -({ \ - const __typeof__(_base) _shl = 1LLU << _imm6; \ - const unsigned char _ret = ((_base) & (_shl)) != 0; \ - _base = (_base) | (_shl); \ - _ret; \ -}) - -#define _BITCLR_GPR(_lock, _base, _offset) \ -({ \ - const __typeof__(_base) _shl = 1LLU << _offset; \ - const unsigned char _ret = ((_base) & (_shl)) != 0; \ - _base = (_base) & ~(_shl); \ - _ret; \ -}) - -#define _BITCLR_IMM(_lock, _base, _imm6) \ -({ \ - const __typeof__(_base) _shl = 1LLU << _imm6; \ - const unsigned char _ret = ((_base) & (_shl)) != 0; \ - _base = (_base) & ~(_shl); \ - _ret; \ -}) - -#define _BIT_TEST_GPR(_lock, _base, _offset) \ -({ \ - const unsigned char _ret = ((_base) & (1LLU << _offset)) != 0; \ - _ret; \ -}) - -#define _BIT_TEST_IMM(_lock, _base, _imm6) \ -({ \ - const unsigned char _ret = ((_base) & (1LLU << _imm6)) != 0; \ - _ret; \ -}) - -#define _BITWISEAND(_lock, _opl, _opr) \ -({ \ - const Bit64 _dest __attribute__ ((aligned (8)))=(_opl) & (_opr);\ - _dest; \ -}) - -#define _BITWISEOR(_lock, _opl, _opr) \ -({ \ - const Bit64 _dest __attribute__ ((aligned (8)))=(_opl) | (_opr);\ - _dest; \ -}) - -#define _BITWISEXOR(_lock, _opl, _opr) \ -({ \ - const Bit64 _dest __attribute__ ((aligned (8)))=(_opl) ^ (_opr);\ - _dest; \ -}) - -#else /* LEGACY */ - #define _BITSET_PRE_INST_FULL_LOCK \ "1:" "\n\t" \ "ldxr x11, [%[addr]]" "\n\t" @@ -516,8 +450,6 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) _dest; \ }) -#endif /* LEGACY */ - #define BITSET(_lock, _base, _offset) \ ( \ __builtin_constant_p(_offset) ? \ @@ -608,47 +540,6 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) #define BITSTOR(_lock, _dest, _src) \ _BITSTOR(_lock, _dest, _src) -#define _BITZERO_PRE_INST_FULL_LOCK \ - "1:" "\n\t" \ - "ldxr x11, [%[addr]]" "\n\t" - -#define _BITZERO_PRE_INST_LOCK_LESS \ - "ldr x11, [%[addr]]" "\n\t" - -#define _BITZERO_POST_INST_FULL_LOCK \ - "stxr w9, x11, [%[addr]]" "\n\t" \ - "cbnz w9, 1b" "\n\t" \ - "dmb ish" - -#define _BITZERO_POST_INST_LOCK_LESS \ - "# NOP" "\n\t" - -#define _BITZERO_CLOBBERS_FULL_LOCK \ - : "cc", "memory", "%w9", "%x11" \ - -#define _BITZERO_CLOBBERS_LOCK_LESS \ - : "cc", "memory", "%x11" \ - -#define _BITZERO(_lock, _src) \ -({ \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - _BITZERO_PRE_INST_##_lock \ - "cmp xzr, x11" "\n\t" \ - "cset %[ret], eq" "\n\t" \ - _BITZERO_POST_INST_##_lock \ - : [ret] "+r" (_ret) \ - : [addr] "r" (&_src) \ - _BITZERO_CLOBBERS_##_lock \ - ); \ - _ret; \ -}) - -#define BITZERO(_lock, _src) \ - _BITZERO(_lock, _src) - #define BITBSF(_base, _index) \ ({ \ register unsigned char _ret; \ @@ -769,7 +660,7 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) #endif #if (CORE_COUNT == 64) -#define BITWISEAND_CC(_lock, _opl, _opr) BITWISEAND(_lock, _opl, _opr) +#define BITWISEAND_CC(_lock, _opl, _opr) _BITWISEAND(_lock, _opl, _opr) #else #define BITWISEAND_CC(_lock, _opl, _opr) \ ({ \ @@ -783,194 +674,73 @@ ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__) #endif #if (CORE_COUNT == 64) -#define BITSTOR_CC(_lock, _dest, _src) BITSTOR(_lock, _dest, _src) +#define BITSTOR_CC(_lock, _dest, _src) _BITSTOR(_lock, _dest, _src) #else #define BITSTOR_CC(_lock, _dest, _src) \ ({ \ unsigned int cw = 0; \ do { \ - BITSTOR(_lock, _dest[cw], _src[cw]); \ + _BITSTOR(_lock, _dest[cw], _src[cw]); \ } while (++cw <= CORE_WORD_TOP(CORE_COUNT)); \ }) #endif -#define ASM_CMPXCHG16B( _lock, _ret, _tmp, \ - _val0, _val1, _reg0, _reg1, _off0, _off1 ) \ - "add " #_tmp " , " #_reg0 ", #" #_off0"\n\t" \ - "ldr " #_val0 " , [" #_tmp "]" "\n\t" \ - \ - "add " #_tmp " , " #_reg1 ", #" #_off0"\n\t" \ - "ldr " #_val1 " , [" #_tmp "]" "\n\t" \ - \ - "cmp " #_val0 " , " #_val1 "\n\t" \ - "cset " #_ret " , eq" "\n\t" \ - \ - "add " #_tmp " , " #_reg0 ", #" #_off1"\n\t" \ - "ldr " #_val0 " , [" #_tmp "]" "\n\t" \ - \ - "add " #_tmp " , " #_reg1 ", #" #_off1"\n\t" \ - "ldr " #_val1 " , [" #_tmp "]" "\n\t" \ - \ - "cmp " #_val0 " , " #_val1 "\n\t" \ - "cset " #_tmp " , eq" "\n\t" \ - \ - "and " #_ret " , " #_ret ", " #_tmp "\n\t" +#define _BITCMP_PRE_INST_FULL_LOCK \ + "1:" "\n\t" \ + "ldxr x11, [%[addr]]" "\n\t" -#if defined(LEGACY) && (LEGACY > 0) -FEAT_MSG("LEGACY Level 1: BITCMP_CC() built without asm cmpxchg16b") +#define _BITCMP_PRE_INST_LOCK_LESS \ + "ldr x11, [%[addr]]" "\n\t" -#if (CORE_COUNT == 64) -#error "LEGACY Level 1: Unimplemented BITCMP_CC() and CORE_COUNT(64)" -#else -#define BITCMP_CC(_lock, _opl, _opr) \ -({ \ - unsigned char ret = 1; \ - unsigned int cw = 0; \ - do { \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - "cmp %[opr] , %[opl]" "\n\t" \ - "cset %[ret] , eq" \ - : [ret] "=r" (_ret) \ - : [opl] "r" (_opl[cw]), \ - [opr] "r" (_opr[cw]) \ - : "cc", "memory" \ - ); \ - ret &= _ret; \ - } while (++cw <= CORE_WORD_TOP(CORE_COUNT)); \ - ret; \ -}) -#endif -/* --- --- --- cmpxchg16b --- --- --- */ -#elif (CORE_COUNT == 64) - -#define BITCMP_CC(_lock, _opl, _opr) \ -({ \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - "ldr x14 , %[opr]" "\n\t" \ - "ldr x15 , %[opl]" "\n\t" \ - "cmp x14 , x15" "\n\t" \ - "cset %[ret] , eq" \ - : [ret] "=r" (_ret) \ - : [opl] "m" (_opl), \ - [opr] "m" (_opr) \ - : "cc", "memory", "%x14", "%x15" \ - ); \ - _ret; \ -}) - -#elif (CORE_COUNT == 128) - -#define BITCMP_CC(_lock, _opl, _opr) \ -({ \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - "mov x14 , %[opr]" "\n\t" \ - "mov x15 , %[opl]" "\n\t" \ - ASM_CMPXCHG16B(_lock, x12,x11,x9,x10,x14,x15, 0, 8)"\n\t"\ - "str x12 , %[ret]" \ - : [ ret] "+m" (_ret) \ - : [ opl] "r" (_opl), \ - [ opr] "r" (_opr) \ - : "cc", "memory", \ - "%x9", "%x10", "%x11", "%x12", "%x14", "%x15" \ - ); \ - _ret; \ -}) +#define _BITCMP_POST_INST_FULL_LOCK \ + "stxr w9, x11, [%[addr]]" "\n\t" \ + "cbnz w9, 1b" "\n\t" \ + "dmb ish" -#elif (CORE_COUNT == 256) +#define _BITCMP_POST_INST_LOCK_LESS \ + "# NOP" -#define BITCMP_CC(_lock, _opl, _opr) \ -({ \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - "mov x14 , %[opr]" "\n\t" \ - "mov x15 , %[opl]" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,16,24)"\n\t"\ - ASM_CMPXCHG16B(_lock, x12,x11,x9,x10,x14,x15, 0, 8)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - "str x12 , %[ret]" \ - : [ ret] "+m" (_ret) \ - : [ opl] "r" (_opl), \ - [ opr] "r" (_opr) \ - : "cc", "memory", \ - "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15" \ - ); \ - _ret; \ -}) +#define _BITCMP_CLOBBERS_FULL_LOCK \ + : "cc", "memory", "%w9", "%w10", "%x11" -#elif (CORE_COUNT == 512) +#define _BITCMP_CLOBBERS_LOCK_LESS \ + : "cc", "memory", "%w10", "%x11" -#define BITCMP_CC(_lock, _opl, _opr) \ +#define _BITCMP(_lock, _opl, _opr) \ ({ \ volatile unsigned char _ret; \ \ __asm__ volatile \ ( \ - "mov x14 , %[opr]" "\n\t" \ - "mov x15 , %[opl]" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,48,56)"\n\t"\ - ASM_CMPXCHG16B(_lock, x12,x11,x9,x10,x14,x15,32,40)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,16,24)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15, 0, 8)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - "str x12 , %[ret]" \ - : [ ret] "+m" (_ret) \ - : [ opl] "r" (_opl), \ - [ opr] "r" (_opr) \ - : "cc", "memory", \ - "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15" \ + _BITCMP_PRE_INST_##_lock \ + "cmp x11, %[opr]" "\n\t" \ + "cset w10, eq" "\n\t" \ + "strb w10, %[ret]" "\n\t" \ + _BITCMP_POST_INST_##_lock \ + : [ret] "=m" (_ret) \ + : [addr] "r" (&_opl), \ + [opr] "Lr" (_opr) \ + _BITCMP_CLOBBERS_##_lock \ ); \ _ret; \ }) -#elif (CORE_COUNT == 1024) +#define BITZERO(_lock, _src) \ + _BITCMP(_lock, _src, 0) +#if (CORE_COUNT == 64) +#define BITCMP_CC(_lock, _opl, _opr) _BITCMP(_lock, _opl, _opr) +#else #define BITCMP_CC(_lock, _opl, _opr) \ ({ \ - volatile unsigned char _ret; \ - \ - __asm__ volatile \ - ( \ - "mov x14 , %[opr]" "\n\t" \ - "mov x15 , %[opl]" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,112,120)"\n\t"\ - ASM_CMPXCHG16B(_lock, x12,x11,x9,x10,x14,x15,96,104) "\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,80,88)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,64,72)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,48,56)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,32,40)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15,16,24)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - ASM_CMPXCHG16B(_lock, x13,x11,x9,x10,x14,x15, 0, 8)"\n\t"\ - "and x12 , x12 , x13" "\n\t" \ - "str x12 , %[ret]" \ - : [ ret] "+m" (_ret) \ - : [ opl] "r" (_opl), \ - [ opr] "r" (_opr) \ - : "cc", "memory", \ - "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15" \ - ); \ - _ret; \ + unsigned char ret = 1; \ + unsigned int cw = 0; \ + do { \ + ret &= _BITCMP(_lock, _opl[cw], _opr[cw]); \ + } while (++cw <= CORE_WORD_TOP(CORE_COUNT)); \ + ret; \ }) - -#endif /* LEGACY */ +#endif /* Micro-benchmark. Prerequisites: CPU affinity, RDTSC[P] optionnaly RDPMC */ #if defined(UBENCH) && UBENCH == 1 |