Skip to content

Commit f1e9273

Browse files
committed
[AMDGPU][gfx1250] Remove SCOPE_SE for scratch stores
1 parent 5c1b025 commit f1e9273

29 files changed

+2665
-2670
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,11 +2655,6 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
26552655
return Changed;
26562656
}
26572657

2658-
// GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
2659-
// space.
2660-
if (TII->mayAccessScratchThroughFlat(MI) && Scope == CPol::SCOPE_CU)
2661-
return setScope(MI, CPol::SCOPE_SE);
2662-
26632658
return Changed;
26642659
}
26652660

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
7373
; GFX1250: ; %bb.0: ; %main_body
7474
; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
7575
; GFX1250-NEXT: s_wait_loadcnt 0x0
76-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
76+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
7777
; GFX1250-NEXT: s_endpgm
7878
main_body:
7979
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -192,7 +192,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
192192
; GFX1250: ; %bb.0: ; %main_body
193193
; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
194194
; GFX1250-NEXT: s_wait_loadcnt 0x0
195-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
195+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
196196
; GFX1250-NEXT: s_endpgm
197197
main_body:
198198
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -311,7 +311,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
311311
; GFX1250: ; %bb.0: ; %main_body
312312
; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
313313
; GFX1250-NEXT: s_wait_loadcnt 0x0
314-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
314+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
315315
; GFX1250-NEXT: s_endpgm
316316
main_body:
317317
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -429,7 +429,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
429429
; GFX1250: ; %bb.0: ; %main_body
430430
; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
431431
; GFX1250-NEXT: s_wait_loadcnt 0x0
432-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
432+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
433433
; GFX1250-NEXT: s_endpgm
434434
main_body:
435435
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -547,7 +547,7 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
547547
; GFX1250: ; %bb.0: ; %main_body
548548
; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
549549
; GFX1250-NEXT: s_wait_loadcnt 0x0
550-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
550+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
551551
; GFX1250-NEXT: s_endpgm
552552
main_body:
553553
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -666,7 +666,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
666666
; GFX1250: ; %bb.0: ; %main_body
667667
; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
668668
; GFX1250-NEXT: s_wait_loadcnt 0x0
669-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
669+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
670670
; GFX1250-NEXT: s_endpgm
671671
main_body:
672672
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -785,7 +785,7 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
785785
; GFX1250: ; %bb.0: ; %main_body
786786
; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
787787
; GFX1250-NEXT: s_wait_loadcnt 0x0
788-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
788+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
789789
; GFX1250-NEXT: s_endpgm
790790
main_body:
791791
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -903,7 +903,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
903903
; GFX1250: ; %bb.0: ; %main_body
904904
; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
905905
; GFX1250-NEXT: s_wait_loadcnt 0x0
906-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
906+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
907907
; GFX1250-NEXT: s_endpgm
908908
main_body:
909909
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -1021,7 +1021,7 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
10211021
; GFX1250: ; %bb.0: ; %main_body
10221022
; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
10231023
; GFX1250-NEXT: s_wait_loadcnt 0x0
1024-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
1024+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
10251025
; GFX1250-NEXT: s_endpgm
10261026
main_body:
10271027
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
@@ -1140,7 +1140,7 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
11401140
; GFX1250: ; %bb.0: ; %main_body
11411141
; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
11421142
; GFX1250-NEXT: s_wait_loadcnt 0x0
1143-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
1143+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
11441144
; GFX1250-NEXT: s_endpgm
11451145
main_body:
11461146
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
@@ -1259,7 +1259,7 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
12591259
; GFX1250: ; %bb.0: ; %main_body
12601260
; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
12611261
; GFX1250-NEXT: s_wait_loadcnt 0x0
1262-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
1262+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
12631263
; GFX1250-NEXT: s_endpgm
12641264
main_body:
12651265
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
@@ -1377,7 +1377,7 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
13771377
; GFX1250: ; %bb.0: ; %main_body
13781378
; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
13791379
; GFX1250-NEXT: s_wait_loadcnt 0x0
1380-
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
1380+
; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1]
13811381
; GFX1250-NEXT: s_endpgm
13821382
main_body:
13831383
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)

llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ define double @flat_system_atomic_fadd_f64(ptr %ptr, double %val) {
572572
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
573573
; GFX1250-NEXT: s_wait_loadcnt 0x0
574574
; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[4:5], v[2:3]
575-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
575+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
576576
; GFX1250-NEXT: .LBB34_5: ; %Flow1
577577
; GFX1250-NEXT: s_wait_xcnt 0x0
578578
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s1
@@ -634,7 +634,7 @@ define double @flat_one_as_atomic_fadd_f64(ptr %ptr, double %val) {
634634
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
635635
; GFX1250-NEXT: s_wait_loadcnt 0x0
636636
; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[4:5], v[2:3]
637-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
637+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
638638
; GFX1250-NEXT: .LBB35_5: ; %Flow1
639639
; GFX1250-NEXT: s_wait_xcnt 0x0
640640
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s1
@@ -714,7 +714,7 @@ define double @flat_system_atomic_fmin_f64(ptr %ptr, double %val) {
714714
; GFX1250-NEXT: s_wait_loadcnt 0x0
715715
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[4:5]
716716
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
717-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
717+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
718718
; GFX1250-NEXT: .LBB38_4: ; %atomicrmw.phi
719719
; GFX1250-NEXT: s_wait_xcnt 0x0
720720
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -758,7 +758,7 @@ define double @flat_one_as_atomic_fmin_f64(ptr %ptr, double %val) {
758758
; GFX1250-NEXT: s_wait_loadcnt 0x0
759759
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[4:5]
760760
; GFX1250-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
761-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
761+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
762762
; GFX1250-NEXT: .LBB39_4: ; %atomicrmw.phi
763763
; GFX1250-NEXT: s_wait_xcnt 0x0
764764
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -826,7 +826,7 @@ define double @flat_system_atomic_fmax_f64(ptr %ptr, double %val) {
826826
; GFX1250-NEXT: s_wait_loadcnt 0x0
827827
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[4:5]
828828
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
829-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
829+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
830830
; GFX1250-NEXT: .LBB42_4: ; %atomicrmw.phi
831831
; GFX1250-NEXT: s_wait_xcnt 0x0
832832
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -870,7 +870,7 @@ define double @flat_one_as_atomic_fmax_f64(ptr %ptr, double %val) {
870870
; GFX1250-NEXT: s_wait_loadcnt 0x0
871871
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[4:5]
872872
; GFX1250-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
873-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
873+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
874874
; GFX1250-NEXT: .LBB43_4: ; %atomicrmw.phi
875875
; GFX1250-NEXT: s_wait_xcnt 0x0
876876
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1009,7 +1009,7 @@ define i64 @flat_one_as_atomic_min_i64(ptr %ptr, i64 %val) {
10091009
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
10101010
; GFX1250-NEXT: s_wait_loadcnt 0x0
10111011
; GFX1250-NEXT: v_min_i64 v[0:1], v[4:5], v[2:3]
1012-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1012+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
10131013
; GFX1250-NEXT: .LBB52_4: ; %atomicrmw.phi
10141014
; GFX1250-NEXT: s_wait_xcnt 0x0
10151015
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1052,7 +1052,7 @@ define i64 @flat_system_atomic_min_i64(ptr %ptr, i64 %val) {
10521052
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
10531053
; GFX1250-NEXT: s_wait_loadcnt 0x0
10541054
; GFX1250-NEXT: v_min_i64 v[0:1], v[4:5], v[2:3]
1055-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1055+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
10561056
; GFX1250-NEXT: .LBB53_4: ; %atomicrmw.phi
10571057
; GFX1250-NEXT: s_wait_xcnt 0x0
10581058
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1095,7 +1095,7 @@ define i64 @flat_one_as_atomic_max_i64(ptr %ptr, i64 %val) {
10951095
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
10961096
; GFX1250-NEXT: s_wait_loadcnt 0x0
10971097
; GFX1250-NEXT: v_max_i64 v[0:1], v[4:5], v[2:3]
1098-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1098+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
10991099
; GFX1250-NEXT: .LBB54_4: ; %atomicrmw.phi
11001100
; GFX1250-NEXT: s_wait_xcnt 0x0
11011101
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1138,7 +1138,7 @@ define i64 @flat_system_atomic_max_i64(ptr %ptr, i64 %val) {
11381138
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
11391139
; GFX1250-NEXT: s_wait_loadcnt 0x0
11401140
; GFX1250-NEXT: v_max_i64 v[0:1], v[4:5], v[2:3]
1141-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1141+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
11421142
; GFX1250-NEXT: .LBB55_4: ; %atomicrmw.phi
11431143
; GFX1250-NEXT: s_wait_xcnt 0x0
11441144
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1181,7 +1181,7 @@ define i64 @flat_one_as_atomic_umin_i64(ptr %ptr, i64 %val) {
11811181
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
11821182
; GFX1250-NEXT: s_wait_loadcnt 0x0
11831183
; GFX1250-NEXT: v_min_u64 v[0:1], v[4:5], v[2:3]
1184-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1184+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
11851185
; GFX1250-NEXT: .LBB56_4: ; %atomicrmw.phi
11861186
; GFX1250-NEXT: s_wait_xcnt 0x0
11871187
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1224,7 +1224,7 @@ define i64 @flat_system_atomic_umin_i64(ptr %ptr, i64 %val) {
12241224
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
12251225
; GFX1250-NEXT: s_wait_loadcnt 0x0
12261226
; GFX1250-NEXT: v_min_u64 v[0:1], v[4:5], v[2:3]
1227-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1227+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
12281228
; GFX1250-NEXT: .LBB57_4: ; %atomicrmw.phi
12291229
; GFX1250-NEXT: s_wait_xcnt 0x0
12301230
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1267,7 +1267,7 @@ define i64 @flat_one_as_atomic_umax_i64(ptr %ptr, i64 %val) {
12671267
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
12681268
; GFX1250-NEXT: s_wait_loadcnt 0x0
12691269
; GFX1250-NEXT: v_max_u64 v[0:1], v[4:5], v[2:3]
1270-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1270+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
12711271
; GFX1250-NEXT: .LBB58_4: ; %atomicrmw.phi
12721272
; GFX1250-NEXT: s_wait_xcnt 0x0
12731273
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
@@ -1310,7 +1310,7 @@ define i64 @flat_system_atomic_umax_i64(ptr %ptr, i64 %val) {
13101310
; GFX1250-NEXT: scratch_load_b64 v[4:5], v6, off
13111311
; GFX1250-NEXT: s_wait_loadcnt 0x0
13121312
; GFX1250-NEXT: v_max_u64 v[0:1], v[4:5], v[2:3]
1313-
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
1313+
; GFX1250-NEXT: scratch_store_b64 v6, v[0:1], off
13141314
; GFX1250-NEXT: .LBB59_4: ; %atomicrmw.phi
13151315
; GFX1250-NEXT: s_wait_xcnt 0x0
13161316
; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0

llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
344344
; GFX1250: ; %bb.0: ; %entry
345345
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
346346
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
347-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
347+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
348348
; GFX1250-NEXT: s_endpgm
349349
entry:
350350
%a.cvt = fptrunc float %a to bfloat
@@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
380380
; GFX1250: ; %bb.0: ; %entry
381381
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
382382
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0
383-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
383+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
384384
; GFX1250-NEXT: s_endpgm
385385
entry:
386386
%a.abs = call float @llvm.fabs.f32(float %a)
@@ -417,7 +417,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
417417
; GFX1250: ; %bb.0: ; %entry
418418
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
419419
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0
420-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
420+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
421421
; GFX1250-NEXT: s_endpgm
422422
entry:
423423
%a.neg = fneg float %a
@@ -480,7 +480,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
480480
; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
481481
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
482482
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
483-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
483+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
484484
; GFX1250-NEXT: s_endpgm
485485
entry:
486486
%a.cvt = fptrunc double %a to bfloat
@@ -543,7 +543,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
543543
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
544544
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
545545
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
546-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
546+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
547547
; GFX1250-NEXT: s_endpgm
548548
entry:
549549
%a.neg = fneg double %a
@@ -607,7 +607,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
607607
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
608608
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
609609
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
610-
; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
610+
; GFX1250-NEXT: flat_store_b16 v[2:3], v0
611611
; GFX1250-NEXT: s_endpgm
612612
entry:
613613
%a.abs = call double @llvm.fabs.f64(double %a)

0 commit comments

Comments
 (0)