@@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
16761676 %cmp  = icmp  ne  i4096  %test , 0 
16771677 ret  i1  %cmp 
16781678}
1679+ 
1680+ ; Special Cases 
1681+ 
1682+ ; Multiple uses of the stored value 
1683+ define  i1  @complement_cmpz_i128 (ptr  %word , i32  %position ) nounwind  {
1684+ ; X86-LABEL: complement_cmpz_i128: 
1685+ ; X86: # %bb.0: 
1686+ ; X86-NEXT: pushl %ebp 
1687+ ; X86-NEXT: movl %esp, %ebp 
1688+ ; X86-NEXT: pushl %ebx 
1689+ ; X86-NEXT: pushl %edi 
1690+ ; X86-NEXT: pushl %esi 
1691+ ; X86-NEXT: andl $-16, %esp 
1692+ ; X86-NEXT: subl $64, %esp 
1693+ ; X86-NEXT: movzbl 12(%ebp), %ecx 
1694+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1695+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1696+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1697+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) 
1698+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1699+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1700+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1701+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1702+ ; X86-NEXT: movl %ecx, %eax 
1703+ ; X86-NEXT: shrb $3, %al 
1704+ ; X86-NEXT: andb $12, %al 
1705+ ; X86-NEXT: negb %al 
1706+ ; X86-NEXT: movsbl %al, %esi 
1707+ ; X86-NEXT: movl 36(%esp,%esi), %eax 
1708+ ; X86-NEXT: movl 40(%esp,%esi), %edi 
1709+ ; X86-NEXT: movl %edi, %edx 
1710+ ; X86-NEXT: shldl %cl, %eax, %edx 
1711+ ; X86-NEXT: movl 32(%esp,%esi), %ebx 
1712+ ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 
1713+ ; X86-NEXT: movl 44(%esp,%esi), %esi 
1714+ ; X86-NEXT: shldl %cl, %edi, %esi 
1715+ ; X86-NEXT: movl %ebx, %edi 
1716+ ; X86-NEXT: shll %cl, %edi 
1717+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 
1718+ ; X86-NEXT: shldl %cl, %ebx, %eax 
1719+ ; X86-NEXT: movl 8(%ebp), %ecx 
1720+ ; X86-NEXT: xorl 12(%ecx), %esi 
1721+ ; X86-NEXT: xorl 8(%ecx), %edx 
1722+ ; X86-NEXT: xorl 4(%ecx), %eax 
1723+ ; X86-NEXT: xorl (%ecx), %edi 
1724+ ; X86-NEXT: movl %edx, 8(%ecx) 
1725+ ; X86-NEXT: movl %esi, 12(%ecx) 
1726+ ; X86-NEXT: movl %edi, (%ecx) 
1727+ ; X86-NEXT: movl %eax, 4(%ecx) 
1728+ ; X86-NEXT: orl %esi, %eax 
1729+ ; X86-NEXT: orl %edx, %edi 
1730+ ; X86-NEXT: orl %eax, %edi 
1731+ ; X86-NEXT: setne %al 
1732+ ; X86-NEXT: leal -12(%ebp), %esp 
1733+ ; X86-NEXT: popl %esi 
1734+ ; X86-NEXT: popl %edi 
1735+ ; X86-NEXT: popl %ebx 
1736+ ; X86-NEXT: popl %ebp 
1737+ ; X86-NEXT: retl 
1738+ ; 
1739+ ; SSE-LABEL: complement_cmpz_i128: 
1740+ ; SSE: # %bb.0: 
1741+ ; SSE-NEXT: movl %esi, %ecx 
1742+ ; SSE-NEXT: movl $1, %eax 
1743+ ; SSE-NEXT: xorl %edx, %edx 
1744+ ; SSE-NEXT: shldq %cl, %rax, %rdx 
1745+ ; SSE-NEXT: shlq %cl, %rax 
1746+ ; SSE-NEXT: xorl %esi, %esi 
1747+ ; SSE-NEXT: testb $64, %cl 
1748+ ; SSE-NEXT: cmovneq %rax, %rdx 
1749+ ; SSE-NEXT: cmovneq %rsi, %rax 
1750+ ; SSE-NEXT: xorq 8(%rdi), %rdx 
1751+ ; SSE-NEXT: xorq (%rdi), %rax 
1752+ ; SSE-NEXT: movq %rax, (%rdi) 
1753+ ; SSE-NEXT: movq %rdx, 8(%rdi) 
1754+ ; SSE-NEXT: orq %rdx, %rax 
1755+ ; SSE-NEXT: setne %al 
1756+ ; SSE-NEXT: retq 
1757+ ; 
1758+ ; AVX2-LABEL: complement_cmpz_i128: 
1759+ ; AVX2: # %bb.0: 
1760+ ; AVX2-NEXT: movl %esi, %ecx 
1761+ ; AVX2-NEXT: movl $1, %eax 
1762+ ; AVX2-NEXT: xorl %edx, %edx 
1763+ ; AVX2-NEXT: shldq %cl, %rax, %rdx 
1764+ ; AVX2-NEXT: xorl %esi, %esi 
1765+ ; AVX2-NEXT: shlxq %rcx, %rax, %rax 
1766+ ; AVX2-NEXT: testb $64, %cl 
1767+ ; AVX2-NEXT: cmovneq %rax, %rdx 
1768+ ; AVX2-NEXT: cmovneq %rsi, %rax 
1769+ ; AVX2-NEXT: xorq 8(%rdi), %rdx 
1770+ ; AVX2-NEXT: xorq (%rdi), %rax 
1771+ ; AVX2-NEXT: movq %rax, (%rdi) 
1772+ ; AVX2-NEXT: movq %rdx, 8(%rdi) 
1773+ ; AVX2-NEXT: orq %rdx, %rax 
1774+ ; AVX2-NEXT: setne %al 
1775+ ; AVX2-NEXT: retq 
1776+ ; 
1777+ ; AVX512-LABEL: complement_cmpz_i128: 
1778+ ; AVX512: # %bb.0: 
1779+ ; AVX512-NEXT: movl %esi, %ecx 
1780+ ; AVX512-NEXT: xorl %eax, %eax 
1781+ ; AVX512-NEXT: movl $1, %edx 
1782+ ; AVX512-NEXT: xorl %esi, %esi 
1783+ ; AVX512-NEXT: shldq %cl, %rdx, %rsi 
1784+ ; AVX512-NEXT: shlxq %rcx, %rdx, %rdx 
1785+ ; AVX512-NEXT: testb $64, %cl 
1786+ ; AVX512-NEXT: cmovneq %rdx, %rsi 
1787+ ; AVX512-NEXT: cmovneq %rax, %rdx 
1788+ ; AVX512-NEXT: xorq 8(%rdi), %rsi 
1789+ ; AVX512-NEXT: xorq (%rdi), %rdx 
1790+ ; AVX512-NEXT: movq %rdx, (%rdi) 
1791+ ; AVX512-NEXT: movq %rsi, 8(%rdi) 
1792+ ; AVX512-NEXT: orq %rsi, %rdx 
1793+ ; AVX512-NEXT: setne %al 
1794+ ; AVX512-NEXT: retq 
1795+  %rem  = and  i32  %position , 127 
1796+  %ofs  = zext  nneg i32  %rem  to  i128 
1797+  %bit  = shl  nuw  i128  1 , %ofs 
1798+  %ld  = load  i128 , ptr  %word 
1799+  %res  = xor  i128  %ld , %bit 
1800+  store  i128  %res , ptr  %word 
1801+  %cmp  = icmp  ne  i128  %res , 0 
1802+  ret  i1  %cmp 
1803+ }
1804+ 
1805+ ; Multiple loads in store chain 
1806+ define  i32  @reset_multiload_i128 (ptr  %word , i32  %position , ptr  %p ) nounwind  {
1807+ ; X86-LABEL: reset_multiload_i128: 
1808+ ; X86: # %bb.0: 
1809+ ; X86-NEXT: pushl %ebp 
1810+ ; X86-NEXT: movl %esp, %ebp 
1811+ ; X86-NEXT: pushl %ebx 
1812+ ; X86-NEXT: pushl %edi 
1813+ ; X86-NEXT: pushl %esi 
1814+ ; X86-NEXT: andl $-16, %esp 
1815+ ; X86-NEXT: subl $64, %esp 
1816+ ; X86-NEXT: movl 12(%ebp), %ecx 
1817+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1818+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1819+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1820+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) 
1821+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1822+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1823+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1824+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) 
1825+ ; X86-NEXT: movl %ecx, %eax 
1826+ ; X86-NEXT: shrb $3, %al 
1827+ ; X86-NEXT: andb $12, %al 
1828+ ; X86-NEXT: negb %al 
1829+ ; X86-NEXT: movsbl %al, %edi 
1830+ ; X86-NEXT: movl 36(%esp,%edi), %edx 
1831+ ; X86-NEXT: movl 40(%esp,%edi), %ebx 
1832+ ; X86-NEXT: movl %ebx, %esi 
1833+ ; X86-NEXT: shldl %cl, %edx, %esi 
1834+ ; X86-NEXT: movl 32(%esp,%edi), %eax 
1835+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 
1836+ ; X86-NEXT: movl 44(%esp,%edi), %edi 
1837+ ; X86-NEXT: shldl %cl, %ebx, %edi 
1838+ ; X86-NEXT: movl %eax, %ebx 
1839+ ; X86-NEXT: # kill: def $cl killed $cl killed $ecx 
1840+ ; X86-NEXT: shll %cl, %ebx 
1841+ ; X86-NEXT: notl %ebx 
1842+ ; X86-NEXT: movl 16(%ebp), %eax 
1843+ ; X86-NEXT: movl (%eax), %eax 
1844+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 
1845+ ; X86-NEXT: movl 12(%ebp), %eax 
1846+ ; X86-NEXT: andl $96, %eax 
1847+ ; X86-NEXT: shrl $3, %eax 
1848+ ; X86-NEXT: movl 8(%ebp), %ecx 
1849+ ; X86-NEXT: movl (%ecx,%eax), %eax 
1850+ ; X86-NEXT: andl %ebx, (%ecx) 
1851+ ; X86-NEXT: movl 12(%ebp), %ecx 
1852+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 
1853+ ; X86-NEXT: shldl %cl, %ebx, %edx 
1854+ ; X86-NEXT: notl %edx 
1855+ ; X86-NEXT: movl 8(%ebp), %ebx 
1856+ ; X86-NEXT: andl %edx, 4(%ebx) 
1857+ ; X86-NEXT: notl %esi 
1858+ ; X86-NEXT: andl %esi, 8(%ebx) 
1859+ ; X86-NEXT: notl %edi 
1860+ ; X86-NEXT: andl %edi, 12(%ebx) 
1861+ ; X86-NEXT: btl %ecx, %eax 
1862+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 
1863+ ; X86-NEXT: jae .LBB22_2 
1864+ ; X86-NEXT: # %bb.1: 
1865+ ; X86-NEXT: xorl %eax, %eax 
1866+ ; X86-NEXT: .LBB22_2: 
1867+ ; X86-NEXT: leal -12(%ebp), %esp 
1868+ ; X86-NEXT: popl %esi 
1869+ ; X86-NEXT: popl %edi 
1870+ ; X86-NEXT: popl %ebx 
1871+ ; X86-NEXT: popl %ebp 
1872+ ; X86-NEXT: retl 
1873+ ; 
1874+ ; SSE-LABEL: reset_multiload_i128: 
1875+ ; SSE: # %bb.0: 
1876+ ; SSE-NEXT: movl %esi, %ecx 
1877+ ; SSE-NEXT: movl $1, %esi 
1878+ ; SSE-NEXT: xorl %r8d, %r8d 
1879+ ; SSE-NEXT: shldq %cl, %rsi, %r8 
1880+ ; SSE-NEXT: xorl %eax, %eax 
1881+ ; SSE-NEXT: shlq %cl, %rsi 
1882+ ; SSE-NEXT: testb $64, %cl 
1883+ ; SSE-NEXT: cmovneq %rsi, %r8 
1884+ ; SSE-NEXT: cmovneq %rax, %rsi 
1885+ ; SSE-NEXT: notq %r8 
1886+ ; SSE-NEXT: notq %rsi 
1887+ ; SSE-NEXT: movl %ecx, %r9d 
1888+ ; SSE-NEXT: andl $96, %r9d 
1889+ ; SSE-NEXT: shrl $3, %r9d 
1890+ ; SSE-NEXT: movl (%rdi,%r9), %r9d 
1891+ ; SSE-NEXT: btl %ecx, %r9d 
1892+ ; SSE-NEXT: jb .LBB22_2 
1893+ ; SSE-NEXT: # %bb.1: 
1894+ ; SSE-NEXT: movl (%rdx), %eax 
1895+ ; SSE-NEXT: .LBB22_2: 
1896+ ; SSE-NEXT: andq %r8, 8(%rdi) 
1897+ ; SSE-NEXT: andq %rsi, (%rdi) 
1898+ ; SSE-NEXT: # kill: def $eax killed $eax killed $rax 
1899+ ; SSE-NEXT: retq 
1900+ ; 
1901+ ; AVX2-LABEL: reset_multiload_i128: 
1902+ ; AVX2: # %bb.0: 
1903+ ; AVX2-NEXT: movl %esi, %ecx 
1904+ ; AVX2-NEXT: xorl %eax, %eax 
1905+ ; AVX2-NEXT: movl $1, %r8d 
1906+ ; AVX2-NEXT: xorl %esi, %esi 
1907+ ; AVX2-NEXT: shldq %cl, %r8, %rsi 
1908+ ; AVX2-NEXT: shlxq %rcx, %r8, %r8 
1909+ ; AVX2-NEXT: testb $64, %cl 
1910+ ; AVX2-NEXT: cmovneq %r8, %rsi 
1911+ ; AVX2-NEXT: cmovneq %rax, %r8 
1912+ ; AVX2-NEXT: notq %rsi 
1913+ ; AVX2-NEXT: notq %r8 
1914+ ; AVX2-NEXT: movl %ecx, %r9d 
1915+ ; AVX2-NEXT: andl $96, %r9d 
1916+ ; AVX2-NEXT: shrl $3, %r9d 
1917+ ; AVX2-NEXT: movl (%rdi,%r9), %r9d 
1918+ ; AVX2-NEXT: btl %ecx, %r9d 
1919+ ; AVX2-NEXT: jb .LBB22_2 
1920+ ; AVX2-NEXT: # %bb.1: 
1921+ ; AVX2-NEXT: movl (%rdx), %eax 
1922+ ; AVX2-NEXT: .LBB22_2: 
1923+ ; AVX2-NEXT: andq %rsi, 8(%rdi) 
1924+ ; AVX2-NEXT: andq %r8, (%rdi) 
1925+ ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax 
1926+ ; AVX2-NEXT: retq 
1927+ ; 
1928+ ; AVX512-LABEL: reset_multiload_i128: 
1929+ ; AVX512: # %bb.0: 
1930+ ; AVX512-NEXT: movl %esi, %ecx 
1931+ ; AVX512-NEXT: movl $1, %r8d 
1932+ ; AVX512-NEXT: xorl %esi, %esi 
1933+ ; AVX512-NEXT: shldq %cl, %r8, %rsi 
1934+ ; AVX512-NEXT: xorl %eax, %eax 
1935+ ; AVX512-NEXT: shlxq %rcx, %r8, %r8 
1936+ ; AVX512-NEXT: testb $64, %cl 
1937+ ; AVX512-NEXT: cmovneq %r8, %rsi 
1938+ ; AVX512-NEXT: cmovneq %rax, %r8 
1939+ ; AVX512-NEXT: notq %rsi 
1940+ ; AVX512-NEXT: notq %r8 
1941+ ; AVX512-NEXT: movl %ecx, %r9d 
1942+ ; AVX512-NEXT: andl $96, %r9d 
1943+ ; AVX512-NEXT: shrl $3, %r9d 
1944+ ; AVX512-NEXT: movl (%rdi,%r9), %r9d 
1945+ ; AVX512-NEXT: btl %ecx, %r9d 
1946+ ; AVX512-NEXT: jb .LBB22_2 
1947+ ; AVX512-NEXT: # %bb.1: 
1948+ ; AVX512-NEXT: movl (%rdx), %eax 
1949+ ; AVX512-NEXT: .LBB22_2: 
1950+ ; AVX512-NEXT: andq %rsi, 8(%rdi) 
1951+ ; AVX512-NEXT: andq %r8, (%rdi) 
1952+ ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax 
1953+ ; AVX512-NEXT: retq 
1954+  %rem  = and  i32  %position , 127 
1955+  %ofs  = zext  nneg i32  %rem  to  i128 
1956+  %bit  = shl  nuw  i128  1 , %ofs 
1957+  %mask  = xor  i128  %bit , -1 
1958+  %ld  = load  i128 , ptr  %word 
1959+  %sel  = load  i32 , ptr  %p 
1960+  %test  = and  i128  %ld , %bit 
1961+  %res  = and  i128  %ld , %mask 
1962+  %cmp  = icmp  eq  i128  %test , 0 
1963+  store  i128  %res , ptr  %word 
1964+  %ret  = select  i1  %cmp , i32  %sel , i32  0 
1965+  ret  i32  %ret 
1966+ }
0 commit comments