Skip to content

Commit 5b76056

Browse files
authored
[X86] narrowBitOpRMW - add tests showing failure to fold to BTC/BTR/BTS RMW patterns (#165758)
Failure to fold if the store's chain doesn't directly touch the RMW load source (we should be using reachesChainWithoutSideEffects to avoid this). Failure to fold if the stored value has additional uses (we could update other uses of the whole stored value to reload after the new narrow store)
1 parent e9368a0 commit 5b76056

File tree

1 file changed

+288
-0
lines changed

1 file changed

+288
-0
lines changed

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
16761676
%cmp = icmp ne i4096 %test, 0
16771677
ret i1 %cmp
16781678
}
1679+
1680+
; Special Cases
1681+
1682+
; Multiple uses of the stored value
1683+
define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
1684+
; X86-LABEL: complement_cmpz_i128:
1685+
; X86: # %bb.0:
1686+
; X86-NEXT: pushl %ebp
1687+
; X86-NEXT: movl %esp, %ebp
1688+
; X86-NEXT: pushl %ebx
1689+
; X86-NEXT: pushl %edi
1690+
; X86-NEXT: pushl %esi
1691+
; X86-NEXT: andl $-16, %esp
1692+
; X86-NEXT: subl $64, %esp
1693+
; X86-NEXT: movzbl 12(%ebp), %ecx
1694+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1695+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1696+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1697+
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1698+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1699+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1700+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1701+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1702+
; X86-NEXT: movl %ecx, %eax
1703+
; X86-NEXT: shrb $3, %al
1704+
; X86-NEXT: andb $12, %al
1705+
; X86-NEXT: negb %al
1706+
; X86-NEXT: movsbl %al, %esi
1707+
; X86-NEXT: movl 36(%esp,%esi), %eax
1708+
; X86-NEXT: movl 40(%esp,%esi), %edi
1709+
; X86-NEXT: movl %edi, %edx
1710+
; X86-NEXT: shldl %cl, %eax, %edx
1711+
; X86-NEXT: movl 32(%esp,%esi), %ebx
1712+
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1713+
; X86-NEXT: movl 44(%esp,%esi), %esi
1714+
; X86-NEXT: shldl %cl, %edi, %esi
1715+
; X86-NEXT: movl %ebx, %edi
1716+
; X86-NEXT: shll %cl, %edi
1717+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1718+
; X86-NEXT: shldl %cl, %ebx, %eax
1719+
; X86-NEXT: movl 8(%ebp), %ecx
1720+
; X86-NEXT: xorl 12(%ecx), %esi
1721+
; X86-NEXT: xorl 8(%ecx), %edx
1722+
; X86-NEXT: xorl 4(%ecx), %eax
1723+
; X86-NEXT: xorl (%ecx), %edi
1724+
; X86-NEXT: movl %edx, 8(%ecx)
1725+
; X86-NEXT: movl %esi, 12(%ecx)
1726+
; X86-NEXT: movl %edi, (%ecx)
1727+
; X86-NEXT: movl %eax, 4(%ecx)
1728+
; X86-NEXT: orl %esi, %eax
1729+
; X86-NEXT: orl %edx, %edi
1730+
; X86-NEXT: orl %eax, %edi
1731+
; X86-NEXT: setne %al
1732+
; X86-NEXT: leal -12(%ebp), %esp
1733+
; X86-NEXT: popl %esi
1734+
; X86-NEXT: popl %edi
1735+
; X86-NEXT: popl %ebx
1736+
; X86-NEXT: popl %ebp
1737+
; X86-NEXT: retl
1738+
;
1739+
; SSE-LABEL: complement_cmpz_i128:
1740+
; SSE: # %bb.0:
1741+
; SSE-NEXT: movl %esi, %ecx
1742+
; SSE-NEXT: movl $1, %eax
1743+
; SSE-NEXT: xorl %edx, %edx
1744+
; SSE-NEXT: shldq %cl, %rax, %rdx
1745+
; SSE-NEXT: shlq %cl, %rax
1746+
; SSE-NEXT: xorl %esi, %esi
1747+
; SSE-NEXT: testb $64, %cl
1748+
; SSE-NEXT: cmovneq %rax, %rdx
1749+
; SSE-NEXT: cmovneq %rsi, %rax
1750+
; SSE-NEXT: xorq 8(%rdi), %rdx
1751+
; SSE-NEXT: xorq (%rdi), %rax
1752+
; SSE-NEXT: movq %rax, (%rdi)
1753+
; SSE-NEXT: movq %rdx, 8(%rdi)
1754+
; SSE-NEXT: orq %rdx, %rax
1755+
; SSE-NEXT: setne %al
1756+
; SSE-NEXT: retq
1757+
;
1758+
; AVX2-LABEL: complement_cmpz_i128:
1759+
; AVX2: # %bb.0:
1760+
; AVX2-NEXT: movl %esi, %ecx
1761+
; AVX2-NEXT: movl $1, %eax
1762+
; AVX2-NEXT: xorl %edx, %edx
1763+
; AVX2-NEXT: shldq %cl, %rax, %rdx
1764+
; AVX2-NEXT: xorl %esi, %esi
1765+
; AVX2-NEXT: shlxq %rcx, %rax, %rax
1766+
; AVX2-NEXT: testb $64, %cl
1767+
; AVX2-NEXT: cmovneq %rax, %rdx
1768+
; AVX2-NEXT: cmovneq %rsi, %rax
1769+
; AVX2-NEXT: xorq 8(%rdi), %rdx
1770+
; AVX2-NEXT: xorq (%rdi), %rax
1771+
; AVX2-NEXT: movq %rax, (%rdi)
1772+
; AVX2-NEXT: movq %rdx, 8(%rdi)
1773+
; AVX2-NEXT: orq %rdx, %rax
1774+
; AVX2-NEXT: setne %al
1775+
; AVX2-NEXT: retq
1776+
;
1777+
; AVX512-LABEL: complement_cmpz_i128:
1778+
; AVX512: # %bb.0:
1779+
; AVX512-NEXT: movl %esi, %ecx
1780+
; AVX512-NEXT: xorl %eax, %eax
1781+
; AVX512-NEXT: movl $1, %edx
1782+
; AVX512-NEXT: xorl %esi, %esi
1783+
; AVX512-NEXT: shldq %cl, %rdx, %rsi
1784+
; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1785+
; AVX512-NEXT: testb $64, %cl
1786+
; AVX512-NEXT: cmovneq %rdx, %rsi
1787+
; AVX512-NEXT: cmovneq %rax, %rdx
1788+
; AVX512-NEXT: xorq 8(%rdi), %rsi
1789+
; AVX512-NEXT: xorq (%rdi), %rdx
1790+
; AVX512-NEXT: movq %rdx, (%rdi)
1791+
; AVX512-NEXT: movq %rsi, 8(%rdi)
1792+
; AVX512-NEXT: orq %rsi, %rdx
1793+
; AVX512-NEXT: setne %al
1794+
; AVX512-NEXT: retq
1795+
%rem = and i32 %position, 127
1796+
%ofs = zext nneg i32 %rem to i128
1797+
%bit = shl nuw i128 1, %ofs
1798+
%ld = load i128, ptr %word
1799+
%res = xor i128 %ld, %bit
1800+
store i128 %res, ptr %word
1801+
%cmp = icmp ne i128 %res, 0
1802+
ret i1 %cmp
1803+
}
1804+
1805+
; Multiple loads in store chain
1806+
define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
1807+
; X86-LABEL: reset_multiload_i128:
1808+
; X86: # %bb.0:
1809+
; X86-NEXT: pushl %ebp
1810+
; X86-NEXT: movl %esp, %ebp
1811+
; X86-NEXT: pushl %ebx
1812+
; X86-NEXT: pushl %edi
1813+
; X86-NEXT: pushl %esi
1814+
; X86-NEXT: andl $-16, %esp
1815+
; X86-NEXT: subl $64, %esp
1816+
; X86-NEXT: movl 12(%ebp), %ecx
1817+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1818+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1819+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1820+
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1821+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1822+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1823+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1824+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1825+
; X86-NEXT: movl %ecx, %eax
1826+
; X86-NEXT: shrb $3, %al
1827+
; X86-NEXT: andb $12, %al
1828+
; X86-NEXT: negb %al
1829+
; X86-NEXT: movsbl %al, %edi
1830+
; X86-NEXT: movl 36(%esp,%edi), %edx
1831+
; X86-NEXT: movl 40(%esp,%edi), %ebx
1832+
; X86-NEXT: movl %ebx, %esi
1833+
; X86-NEXT: shldl %cl, %edx, %esi
1834+
; X86-NEXT: movl 32(%esp,%edi), %eax
1835+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1836+
; X86-NEXT: movl 44(%esp,%edi), %edi
1837+
; X86-NEXT: shldl %cl, %ebx, %edi
1838+
; X86-NEXT: movl %eax, %ebx
1839+
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
1840+
; X86-NEXT: shll %cl, %ebx
1841+
; X86-NEXT: notl %ebx
1842+
; X86-NEXT: movl 16(%ebp), %eax
1843+
; X86-NEXT: movl (%eax), %eax
1844+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1845+
; X86-NEXT: movl 12(%ebp), %eax
1846+
; X86-NEXT: andl $96, %eax
1847+
; X86-NEXT: shrl $3, %eax
1848+
; X86-NEXT: movl 8(%ebp), %ecx
1849+
; X86-NEXT: movl (%ecx,%eax), %eax
1850+
; X86-NEXT: andl %ebx, (%ecx)
1851+
; X86-NEXT: movl 12(%ebp), %ecx
1852+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1853+
; X86-NEXT: shldl %cl, %ebx, %edx
1854+
; X86-NEXT: notl %edx
1855+
; X86-NEXT: movl 8(%ebp), %ebx
1856+
; X86-NEXT: andl %edx, 4(%ebx)
1857+
; X86-NEXT: notl %esi
1858+
; X86-NEXT: andl %esi, 8(%ebx)
1859+
; X86-NEXT: notl %edi
1860+
; X86-NEXT: andl %edi, 12(%ebx)
1861+
; X86-NEXT: btl %ecx, %eax
1862+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1863+
; X86-NEXT: jae .LBB22_2
1864+
; X86-NEXT: # %bb.1:
1865+
; X86-NEXT: xorl %eax, %eax
1866+
; X86-NEXT: .LBB22_2:
1867+
; X86-NEXT: leal -12(%ebp), %esp
1868+
; X86-NEXT: popl %esi
1869+
; X86-NEXT: popl %edi
1870+
; X86-NEXT: popl %ebx
1871+
; X86-NEXT: popl %ebp
1872+
; X86-NEXT: retl
1873+
;
1874+
; SSE-LABEL: reset_multiload_i128:
1875+
; SSE: # %bb.0:
1876+
; SSE-NEXT: movl %esi, %ecx
1877+
; SSE-NEXT: movl $1, %esi
1878+
; SSE-NEXT: xorl %r8d, %r8d
1879+
; SSE-NEXT: shldq %cl, %rsi, %r8
1880+
; SSE-NEXT: xorl %eax, %eax
1881+
; SSE-NEXT: shlq %cl, %rsi
1882+
; SSE-NEXT: testb $64, %cl
1883+
; SSE-NEXT: cmovneq %rsi, %r8
1884+
; SSE-NEXT: cmovneq %rax, %rsi
1885+
; SSE-NEXT: notq %r8
1886+
; SSE-NEXT: notq %rsi
1887+
; SSE-NEXT: movl %ecx, %r9d
1888+
; SSE-NEXT: andl $96, %r9d
1889+
; SSE-NEXT: shrl $3, %r9d
1890+
; SSE-NEXT: movl (%rdi,%r9), %r9d
1891+
; SSE-NEXT: btl %ecx, %r9d
1892+
; SSE-NEXT: jb .LBB22_2
1893+
; SSE-NEXT: # %bb.1:
1894+
; SSE-NEXT: movl (%rdx), %eax
1895+
; SSE-NEXT: .LBB22_2:
1896+
; SSE-NEXT: andq %r8, 8(%rdi)
1897+
; SSE-NEXT: andq %rsi, (%rdi)
1898+
; SSE-NEXT: # kill: def $eax killed $eax killed $rax
1899+
; SSE-NEXT: retq
1900+
;
1901+
; AVX2-LABEL: reset_multiload_i128:
1902+
; AVX2: # %bb.0:
1903+
; AVX2-NEXT: movl %esi, %ecx
1904+
; AVX2-NEXT: xorl %eax, %eax
1905+
; AVX2-NEXT: movl $1, %r8d
1906+
; AVX2-NEXT: xorl %esi, %esi
1907+
; AVX2-NEXT: shldq %cl, %r8, %rsi
1908+
; AVX2-NEXT: shlxq %rcx, %r8, %r8
1909+
; AVX2-NEXT: testb $64, %cl
1910+
; AVX2-NEXT: cmovneq %r8, %rsi
1911+
; AVX2-NEXT: cmovneq %rax, %r8
1912+
; AVX2-NEXT: notq %rsi
1913+
; AVX2-NEXT: notq %r8
1914+
; AVX2-NEXT: movl %ecx, %r9d
1915+
; AVX2-NEXT: andl $96, %r9d
1916+
; AVX2-NEXT: shrl $3, %r9d
1917+
; AVX2-NEXT: movl (%rdi,%r9), %r9d
1918+
; AVX2-NEXT: btl %ecx, %r9d
1919+
; AVX2-NEXT: jb .LBB22_2
1920+
; AVX2-NEXT: # %bb.1:
1921+
; AVX2-NEXT: movl (%rdx), %eax
1922+
; AVX2-NEXT: .LBB22_2:
1923+
; AVX2-NEXT: andq %rsi, 8(%rdi)
1924+
; AVX2-NEXT: andq %r8, (%rdi)
1925+
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
1926+
; AVX2-NEXT: retq
1927+
;
1928+
; AVX512-LABEL: reset_multiload_i128:
1929+
; AVX512: # %bb.0:
1930+
; AVX512-NEXT: movl %esi, %ecx
1931+
; AVX512-NEXT: movl $1, %r8d
1932+
; AVX512-NEXT: xorl %esi, %esi
1933+
; AVX512-NEXT: shldq %cl, %r8, %rsi
1934+
; AVX512-NEXT: xorl %eax, %eax
1935+
; AVX512-NEXT: shlxq %rcx, %r8, %r8
1936+
; AVX512-NEXT: testb $64, %cl
1937+
; AVX512-NEXT: cmovneq %r8, %rsi
1938+
; AVX512-NEXT: cmovneq %rax, %r8
1939+
; AVX512-NEXT: notq %rsi
1940+
; AVX512-NEXT: notq %r8
1941+
; AVX512-NEXT: movl %ecx, %r9d
1942+
; AVX512-NEXT: andl $96, %r9d
1943+
; AVX512-NEXT: shrl $3, %r9d
1944+
; AVX512-NEXT: movl (%rdi,%r9), %r9d
1945+
; AVX512-NEXT: btl %ecx, %r9d
1946+
; AVX512-NEXT: jb .LBB22_2
1947+
; AVX512-NEXT: # %bb.1:
1948+
; AVX512-NEXT: movl (%rdx), %eax
1949+
; AVX512-NEXT: .LBB22_2:
1950+
; AVX512-NEXT: andq %rsi, 8(%rdi)
1951+
; AVX512-NEXT: andq %r8, (%rdi)
1952+
; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
1953+
; AVX512-NEXT: retq
1954+
%rem = and i32 %position, 127
1955+
%ofs = zext nneg i32 %rem to i128
1956+
%bit = shl nuw i128 1, %ofs
1957+
%mask = xor i128 %bit, -1
1958+
%ld = load i128, ptr %word
1959+
%sel = load i32, ptr %p
1960+
%test = and i128 %ld, %bit
1961+
%res = and i128 %ld, %mask
1962+
%cmp = icmp eq i128 %test, 0
1963+
store i128 %res, ptr %word
1964+
%ret = select i1 %cmp, i32 %sel, i32 0
1965+
ret i32 %ret
1966+
}

0 commit comments

Comments
 (0)