Skip to content

Commit 5f819f8

Browse files
fix: do not wait with locked csr
Related-To: NEO-16532, HSD-18043732674 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
1 parent dfc70bb commit 5f819f8

File tree

4 files changed

+65
-50
lines changed

4 files changed

+65
-50
lines changed

level_zero/core/source/cmdqueue/cmdqueue_hw.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ struct CommandQueueHw : public CommandQueueImp {
9191
NEO::ScratchSpaceController *scratchSpaceController = nullptr;
9292
NEO::GraphicsAllocation *globalStatelessAllocation = nullptr;
9393
std::unique_lock<std::mutex> *outerLockForIndirect = nullptr;
94+
std::unique_lock<NEO::CommandStreamReceiver::MutexType> *lockCSR = nullptr;
9495

9596
NEO::PreemptionMode preemptionMode{};
9697
NEO::PreemptionMode statePreemption{};
@@ -219,8 +220,10 @@ struct CommandQueueHw : public CommandQueueImp {
219220
inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired,
220221
uint32_t numCommandLists,
221222
ze_command_list_handle_t *commandListHandles);
222-
inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
223-
inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
223+
ze_result_t waitForCommandQueueCompletion(CommandListExecutionContext &ctx);
224+
inline ze_result_t handleNonParentImmediateStream(ze_fence_handle_t hFence, CommandListExecutionContext &ctx, uint32_t numCommandLists,
225+
ze_command_list_handle_t *phCommandLists, NEO::LinearStream *streamForDispatch, bool isFenceRequired);
226+
inline ze_result_t handleSubmission(NEO::SubmissionStatus submitRet);
224227
inline size_t estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
225228
const NEO::StreamProperties &cmdListRequired,
226229
const NEO::StreamProperties &cmdListFinal,

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
128128
!this->commandQueueDebugCmdsProgrammed &&
129129
device->getL0Debugger();
130130
ctx.lockScratchController = lockScratchController;
131-
131+
ctx.lockCSR = &lockCSR;
132132
if (this->isCopyOnlyCommandQueue) {
133133
ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
134134
} else if (this->heaplessStateInitEnabled) {
@@ -230,18 +230,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
230230
this->dispatchPatchPreambleEnding(ctx);
231231

232232
if (!ctx.containsParentImmediateStream) {
233-
this->assignCsrTaskCountToFenceIfAvailable(hFence);
234-
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch);
235-
236-
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
237-
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired, numCommandLists, commandListHandles);
238-
239-
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
240-
241-
auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
242-
retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
243-
this->csr->getResidencyAllocations().clear();
233+
retVal = handleNonParentImmediateStream(hFence, ctx, numCommandLists, commandListHandles, streamForDispatch, false);
234+
if (retVal != ZE_RESULT_SUCCESS) {
235+
return retVal;
236+
}
237+
retVal = this->waitForCommandQueueCompletion(ctx);
244238
}
239+
245240
return retVal;
246241
}
247242

@@ -458,21 +453,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
458453
this->csr->setPreemptionMode(ctx.statePreemption);
459454

460455
if (!ctx.containsParentImmediateStream) {
461-
this->assignCsrTaskCountToFenceIfAvailable(hFence);
462-
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch);
463-
464-
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
465-
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired, numCommandLists, commandListHandles);
466-
467-
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
468-
469-
auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
470-
retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
471-
this->csr->getResidencyAllocations().clear();
456+
retVal = handleNonParentImmediateStream(hFence, ctx, numCommandLists, commandListHandles, streamForDispatch, false);
472457
}
473458

474459
this->stateChanges.clear();
475460
this->currentStateChangeIndex = 0;
461+
if (retVal != ZE_RESULT_SUCCESS) {
462+
return retVal;
463+
}
464+
if (!ctx.containsParentImmediateStream) {
465+
return this->waitForCommandQueueCompletion(ctx);
466+
}
476467
return retVal;
477468
}
478469

@@ -543,19 +534,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
543534
this->makeCsrTagAllocationResident();
544535

545536
if (!ctx.containsParentImmediateStream) {
546-
this->assignCsrTaskCountToFenceIfAvailable(hFence);
547-
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, fenceRequired, *streamForDispatch);
548-
549-
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
550-
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired, numCommandLists, phCommandLists);
551-
552-
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
553-
554-
auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
555-
retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
556-
this->csr->getResidencyAllocations().clear();
537+
retVal = handleNonParentImmediateStream(hFence, ctx, numCommandLists, phCommandLists, streamForDispatch, fenceRequired);
538+
if (retVal != ZE_RESULT_SUCCESS) {
539+
return retVal;
540+
}
541+
retVal = this->waitForCommandQueueCompletion(ctx);
557542
}
558-
559543
return retVal;
560544
}
561545

@@ -1689,39 +1673,52 @@ void CommandQueueHw<gfxCoreFamily>::updateTaskCountAndPostSync(bool isDispatchTa
16891673
}
16901674

16911675
template <GFXCORE_FAMILY gfxCoreFamily>
1692-
ze_result_t CommandQueueHw<gfxCoreFamily>::waitForCommandQueueCompletionAndCleanHeapContainer() {
1693-
1676+
ze_result_t CommandQueueHw<gfxCoreFamily>::waitForCommandQueueCompletion(CommandListExecutionContext &ctx) {
16941677
ze_result_t ret = ZE_RESULT_SUCCESS;
1695-
16961678
if (this->isSynchronousMode()) {
1679+
ctx.lockCSR->unlock();
16971680
if (const auto syncRet = this->synchronize(std::numeric_limits<uint64_t>::max()); syncRet == ZE_RESULT_ERROR_DEVICE_LOST) {
16981681
ret = syncRet;
16991682
}
17001683
}
1701-
this->heapContainer.clear();
17021684

17031685
return ret;
17041686
}
17051687

17061688
template <GFXCORE_FAMILY gfxCoreFamily>
1707-
ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmissionAndCompletionResults(
1708-
NEO::SubmissionStatus submitRet,
1709-
ze_result_t completionRet) {
1689+
ze_result_t CommandQueueHw<gfxCoreFamily>::handleNonParentImmediateStream(ze_fence_handle_t hFence, CommandListExecutionContext &ctx, uint32_t numCommandLists,
1690+
ze_command_list_handle_t *phCommandLists, NEO::LinearStream *streamForDispatch, bool isFenceRequired) {
1691+
this->assignCsrTaskCountToFenceIfAvailable(hFence);
1692+
if (!this->isCopyOnlyCommandQueue) {
1693+
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch);
1694+
} else {
1695+
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, isFenceRequired, *streamForDispatch);
1696+
}
1697+
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
1698+
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired, numCommandLists, phCommandLists);
17101699

1711-
if ((submitRet != NEO::SubmissionStatus::success) || (completionRet == ZE_RESULT_ERROR_DEVICE_LOST)) {
1700+
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
1701+
1702+
auto retVal = this->handleSubmission(submitResult);
1703+
this->csr->getResidencyAllocations().clear();
1704+
this->heapContainer.clear();
1705+
return retVal;
1706+
}
1707+
1708+
template <GFXCORE_FAMILY gfxCoreFamily>
1709+
ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmission(
1710+
NEO::SubmissionStatus submitRet) {
1711+
if (submitRet != NEO::SubmissionStatus::success) {
17121712
for (auto &gfx : this->csr->getResidencyAllocations()) {
17131713
if (this->csr->peekLatestFlushedTaskCount() == 0) {
17141714
gfx->releaseUsageInOsContext(this->csr->getOsContext().getContextId());
17151715
} else {
17161716
gfx->updateTaskCount(this->csr->peekLatestFlushedTaskCount(), this->csr->getOsContext().getContextId());
17171717
}
17181718
}
1719-
if (completionRet != ZE_RESULT_ERROR_DEVICE_LOST) {
1720-
completionRet = getErrorCodeForSubmissionStatus(submitRet);
1721-
}
1719+
return getErrorCodeForSubmissionStatus(submitRet);
17221720
}
1723-
1724-
return completionRet;
1721+
return ZE_RESULT_SUCCESS;
17251722
}
17261723

17271724
template <GFXCORE_FAMILY gfxCoreFamily>

level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
9999
using BaseClass::prepareAndSubmitBatchBuffer;
100100
using BaseClass::printfKernelContainer;
101101
using BaseClass::startingCmdBuffer;
102+
using BaseClass::waitForCommandQueueCompletion;
102103
using L0::CommandQueue::activeSubDevices;
103104
using L0::CommandQueue::cmdListHeapAddressModel;
104105
using L0::CommandQueue::dispatchCmdListBatchBufferAsPrimary;

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,5 +1232,19 @@ HWTEST_F(HostFunctionsCmdPatchTests, givenHostFunctionPatchCommandsWhenPatchComm
12321232
commandQueue->csr = oldCsr;
12331233
}
12341234

1235+
HWTEST_F(CommandQueueSynchronizeTest, givenCmdQueueWhenCallWaitForCommandQueueCompletionThenSynchronizeWithoutLockOnCsr) {
1236+
DebugManagerStateRestore restore;
1237+
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
1238+
1239+
ze_command_queue_desc_t desc = {};
1240+
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
1241+
auto commandQueue = std::make_unique<MockCommandQueueHw<FamilyType::gfxCoreFamily>>(device, &csr, &desc);
1242+
1243+
auto ownership = csr.obtainUniqueOwnership();
1244+
auto ctx = typename MockCommandQueueHw<FamilyType::gfxCoreFamily>::CommandListExecutionContext();
1245+
ctx.lockCSR = &ownership;
1246+
commandQueue->waitForCommandQueueCompletion(ctx);
1247+
EXPECT_FALSE(ownership.owns_lock());
1248+
}
12351249
} // namespace ult
12361250
} // namespace L0

0 commit comments

Comments
 (0)