- Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Merge consecutive wait_alu instruction #128916
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; } | |
| /// \returns SaSdst bit shift | ||
| inline unsigned getSaSdstBitShift() { return 0; } | ||
| | ||
| /// \returns VaSsrc width | ||
| inline unsigned getVaSsrcBitWidth() { return 1; } | ||
| | ||
| /// \returns VaSsrc bit shift | ||
| inline unsigned getVaSsrcBitShift() { return 8; } | ||
| | ||
| /// \returns HoldCnt bit shift | ||
| inline unsigned getHoldCntWidth() { return 1; } | ||
| | ||
| /// \returns HoldCnt bit shift | ||
| inline unsigned getHoldCntBitShift() { return 7; } | ||
| | ||
| } // end anonymous namespace | ||
| | ||
| namespace llvm { | ||
| | @@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) { | |
| return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth()); | ||
| } | ||
| | ||
| unsigned decodeFieldVaSsrc(unsigned Encoded) { | ||
| return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); | ||
| } | ||
| | ||
| unsigned decodeFieldHoldCnt(unsigned Encoded) { | ||
| return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth()); | ||
| } | ||
| | ||
| unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { | ||
| return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); | ||
| } | ||
| | @@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) { | |
| return encodeFieldVaVcc(0xffff, VaVcc); | ||
| } | ||
| | ||
| unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) { | ||
| return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); | ||
| } | ||
| | ||
| unsigned encodeFieldVaSsrc(unsigned VaSsrc) { | ||
| return encodeFieldVaSsrc(0xfff, VaSsrc); | ||
| ||
| } | ||
| | ||
| unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { | ||
| return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth()); | ||
| } | ||
| | ||
| unsigned encodeFieldHoldCnt(unsigned HoldCnt) { | ||
| return encodeFieldHoldCnt(0xfff, HoldCnt); | ||
| ||
| } | ||
| | ||
| } // namespace DepCtr | ||
| | ||
| //===----------------------------------------------------------------------===// | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 | ||
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s | ||
| | ||
| | ||
| --- | ||
| name: merge_consecutive_wait_alus | ||
| body: | | ||
| bb.0: | ||
| liveins: $vgpr0 | ||
| | ||
| ; CHECK-LABEL: name: merge_consecutive_wait_alus | ||
| ; CHECK: liveins: $vgpr0 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946 | ||
| ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo | ||
| renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc | ||
| S_WAITCNT_DEPCTR 65530 | ||
| renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc | ||
| ... | ||
| --- | ||
| name: merge_consecutive_wait_alus_two_bb | ||
| body: | | ||
| ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb | ||
| ; CHECK: bb.0: | ||
| ; CHECK-NEXT: successors: %bb.1(0x80000000) | ||
| ; CHECK-NEXT: liveins: $vgpr0 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: bb.1: | ||
| ; CHECK-NEXT: liveins: $sgpr0 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951 | ||
| ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo | ||
| bb.0: | ||
| liveins: $vgpr0 | ||
| | ||
| renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc | ||
| S_WAITCNT_DEPCTR 65530 | ||
| | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test where it's the start of the block and end of the block Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add a test that skips meta instructions Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Do you mean same basic block or successor and predecessor ? | ||
| bb.1: | ||
| liveins: $sgpr0 | ||
| | ||
| renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc | ||
| ... | ||
| --- | ||
| name: meta_instructions | ||
| machineFunctionInfo: | ||
| body: | | ||
| bb.0: | ||
| ; CHECK-LABEL: name: meta_instructions | ||
| ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 | ||
| ; CHECK-NEXT: SCHED_BARRIER 0 | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951 | ||
| ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo | ||
| renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc | ||
| S_WAITCNT_DEPCTR 65530 | ||
| SCHED_BARRIER 0 | ||
| renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc | ||
| ... | ||
| --- | ||
| name: debug_instruction | ||
| machineFunctionInfo: | ||
| body: | | ||
| bb.0: | ||
| ; CHECK-LABEL: name: debug_instruction | ||
| ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo | ||
| ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946 | ||
| ; CHECK-NEXT: DBG_VALUE $sgpr0 | ||
| ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo | ||
| renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc | ||
| S_WAITCNT_DEPCTR 65530 | ||
| DBG_VALUE $sgpr0 | ||
| renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc | ||
| ... | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will skip the "Update hazards based on defs" code below. Is that really safe?
Also I think you still need to set
Emitted = truein this path, since it will be used to determine whether this pass changed anything at all in the MIR.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes code below should not be skipped.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing this out !