- Notifications
You must be signed in to change notification settings - Fork 15.2k
Open
Description
Ideally Clang should figure out that foo_v1() can be optimized like foo_v2(). Code:
#include <cstdint> struct Entry { int _v[32]; unsigned char _s[32]; }; __always_inline void common_part(Entry* e, int b, int s) { int t = (b*3 + s) ^ 0x9e3779b9; e->_v[s] = t; e->_s[s] = (unsigned char)((b ^ s) & 255); } void foo_v1(Entry* e, bool found, unsigned m, int b) { int k = found ? (b & 31) : -1; if (k == -1) common_part(e, b, __builtin_ctz(m)); else common_part(e, b, k); } void foo_v2(Entry* e, bool found, unsigned m, int b) { int k = found ? (b & 31) : -1; common_part(e, b, k == -1 ? __builtin_ctz(m) : k); }Assembly (-std=c++23 -O3 -march=znver1):
foo_v1(Entry*, bool, unsigned int, int): lea eax, [rcx+rcx*2] test sil, sil je .L2 mov esi, ecx mov edx, ecx and esi, 31 and edx, 31 add eax, esi xor ecx, esi xor eax, -1640531527 mov DWORD PTR [rdi+rdx*4], eax mov BYTE PTR [rdi+128+rdx], cl ret .L2: tzcnt esi, edx add eax, esi xor ecx, esi xor eax, -1640531527 mov DWORD PTR [rdi+rsi*4], eax mov BYTE PTR [rdi+128+rsi], cl ret foo_v2(Entry*, bool, unsigned int, int): mov eax, ecx tzcnt edx, edx and eax, 31 test sil, sil cmovne edx, eax lea eax, [rcx+rcx*2] add eax, edx movsx rsi, edx xor ecx, edx xor eax, -1640531527 mov DWORD PTR [rdi+rsi*4], eax mov BYTE PTR [rdi+128+rsi], cl retGodbolt: https://godbolt.org/z/bK13denas