Skip to content

Commit cfa007e

Browse files
feature: Set shared system USM madvise preferred location as prefetch region
Resolves: NEO-16482 Signed-off-by: Falkowski, John <john.falkowski@intel.com>
1 parent 20e4655 commit cfa007e

File tree

8 files changed

+184
-23
lines changed

8 files changed

+184
-23
lines changed

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ DECLARE_DEBUG_VARIABLE(int64_t, ForceGmmSystemMemoryBufferForAllocations, 0, "0:
452452
DECLARE_DEBUG_VARIABLE(int32_t, ForceLowLatencyHint, -1, "Force passing low latency hint during xe_exec_queue creation. -1: default, 0: disabled, 1: enabled");
453453
DECLARE_DEBUG_VARIABLE(int32_t, EmitMemAdvisePriorToCopyForNonUsm, -1, "Enable Memadvise to system memory for copy/fill with shared system input: -1: default, 0: disabled, 1: enabled")
454454
DECLARE_DEBUG_VARIABLE(int32_t, TreatNonUsmForTransfersAsSharedSystem, -1, "-1: default, 0: import non-usm as external host ptr on copy/fill (legacy mode), 1: treat non usm on copy/fill as shared system usm")
455+
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMadviseSharedSystemPrefetchRegion, -1, "-1: default (madvise), 0: system memory, 1: same-tile local memory")
455456

456457
/*DIRECT SUBMISSION FLAGS*/
457458
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ bool DrmMemoryManager::prefetchSharedSystemAlloc(const void *ptr, const size_t s
372372
auto memoryClassDevice = ioctlHelper->getDrmParamValue(DrmParam::memoryClassDevice);
373373
auto region = static_cast<uint32_t>((memoryClassDevice << 16u) | subDeviceIds[0]);
374374
auto vmId = drm.getVirtualMemoryAddressSpace(subDeviceIds[0]);
375-
return ioctlHelper->setVmPrefetch(reinterpret_cast<uint64_t>(ptr), size, region, vmId);
375+
return ioctlHelper->setVmSharedSystemMemPrefetch(reinterpret_cast<uint64_t>(ptr), size, region, vmId);
376376
}
377377

378378
bool DrmMemoryManager::setMemPrefetch(GraphicsAllocation *gfxAllocation, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) {

shared/source/os_interface/linux/ioctl_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class IoctlHelper {
137137
virtual AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const uint32_t vmId) { return AtomicAccessMode::none; }
138138
virtual bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) = 0;
139139
virtual bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) = 0;
140+
virtual bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) { return true; }
140141
virtual bool setGemTiling(void *setTiling) = 0;
141142
virtual bool getGemTiling(void *setTiling) = 0;
142143
virtual uint32_t getDirectSubmissionFlag() = 0;

shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,47 @@ bool IoctlHelperXe::setVmPrefetch(uint64_t start, uint64_t length, uint32_t regi
994994
return true;
995995
}
996996

997+
bool IoctlHelperXe::setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) {
998+
xeLog(" -> IoctlHelperXe::%s s=0x%llx l=0x%llx align_s=0x%llx align_l=0x%llx vmid=0x%x\n", __FUNCTION__, start, length, alignDown(start, MemoryConstants::pageSize), alignSizeWholePage(reinterpret_cast<void *>(start), length), vmId);
999+
drm_xe_vm_bind bind = {};
1000+
bind.vm_id = vmId;
1001+
bind.num_binds = 1;
1002+
1003+
bind.bind.range = alignSizeWholePage(reinterpret_cast<void *>(start), length);
1004+
bind.bind.addr = alignDown(start, MemoryConstants::pageSize);
1005+
bind.bind.op = DRM_XE_VM_BIND_OP_PREFETCH;
1006+
1007+
auto pHwInfo = this->drm.getRootDeviceEnvironment().getHardwareInfo();
1008+
if (debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.get() != -1) {
1009+
constexpr uint32_t subDeviceMaskSize = DeviceBitfield().size();
1010+
constexpr uint32_t subDeviceMaskMax = (1u << subDeviceMaskSize) - 1u;
1011+
uint32_t subDeviceId = region & subDeviceMaskMax;
1012+
DeviceBitfield subDeviceMask = (debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.get() << subDeviceId);
1013+
MemoryClassInstance regionInstanceClass = this->drm.getMemoryInfo()->getMemoryRegionClassAndInstance(subDeviceMask, *pHwInfo);
1014+
bind.bind.prefetch_mem_region_instance = regionInstanceClass.memoryInstance;
1015+
} else {
1016+
bind.bind.prefetch_mem_region_instance = DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC;
1017+
}
1018+
1019+
int ret = IoctlHelper::ioctl(DrmIoctl::gemVmBind, &bind);
1020+
1021+
xeLog(" vm=%d addr=0x%lx range=0x%lx region=0x%x operation=%d(%s) ret=%d\n",
1022+
bind.vm_id,
1023+
bind.bind.addr,
1024+
bind.bind.range,
1025+
bind.bind.prefetch_mem_region_instance,
1026+
bind.bind.op,
1027+
xeGetBindOperationName(bind.bind.op),
1028+
ret);
1029+
1030+
if (ret != 0) {
1031+
xeLog("error: %s ret=%d\n", xeGetBindOperationName(bind.bind.op), ret);
1032+
return false;
1033+
}
1034+
1035+
return true;
1036+
}
1037+
9971038
uint32_t IoctlHelperXe::getDirectSubmissionFlag() {
9981039
xeLog(" -> IoctlHelperXe::%s\n", __FUNCTION__);
9991040
return 0;

shared/source/os_interface/linux/xe/ioctl_helper_xe.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class IoctlHelperXe : public IoctlHelper {
6464
AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const uint32_t vmId) override;
6565
bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) override;
6666
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
67+
bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
6768
bool setGemTiling(void *setTiling) override;
6869
bool getGemTiling(void *setTiling) override;
6970
uint32_t getDirectSubmissionFlag() override;

shared/test/common/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ EnableDeferBacking = 0
650650
ForceLowLatencyHint = -1
651651
EmitMemAdvisePriorToCopyForNonUsm = -1
652652
TreatNonUsmForTransfersAsSharedSystem = -1
653+
OverrideMadviseSharedSystemPrefetchRegion = -1
653654
SetMaxBVHLevels = -1
654655
GetSipBinaryFromExternalLib = -1
655656
LogUsmReuse = 0

shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6472,46 +6472,36 @@ HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemPrefetch
64726472
}
64736473

64746474
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenPrefetchSharedSystemAllocIsCalledThenReturnTrue) {
6475-
SubDeviceIdsVec subDeviceIds{0};
6476-
class MyMockIoctlHelper : public MockIoctlHelper {
6477-
public:
6478-
using MockIoctlHelper::MockIoctlHelper;
6479-
6480-
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
6481-
return true;
6482-
}
6483-
};
6484-
auto mockIoctlHelper = new MyMockIoctlHelper(*mock);
6485-
6486-
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
6487-
drm.ioctlHelper.reset(mockIoctlHelper);
6488-
6489-
auto ptr = malloc(1024);
64906475

6491-
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, rootDeviceIndex));
6476+
void *ptr = malloc(1024);
64926477

6478+
auto subDeviceIds = NEO::SubDeviceIdsVec{0};
6479+
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, mockRootDeviceIndex));
64936480
free(ptr);
64946481
}
64956482

64966483
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenPrefetchSharedSystemAllocIsCalledThenReturnFalse) {
6497-
SubDeviceIdsVec subDeviceIds{0};
6484+
64986485
class MyMockIoctlHelper : public MockIoctlHelper {
6499-
public:
65006486
using MockIoctlHelper::MockIoctlHelper;
65016487

6502-
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
6488+
public:
6489+
bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
6490+
setVmSharedSystemMemPrefetchCalled++;
65036491
return false;
65046492
}
6493+
uint32_t setVmSharedSystemMemPrefetchCalled = 0;
65056494
};
65066495
auto mockIoctlHelper = new MyMockIoctlHelper(*mock);
65076496

65086497
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
65096498
drm.ioctlHelper.reset(mockIoctlHelper);
65106499

6511-
auto ptr = malloc(1024);
6512-
6513-
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, rootDeviceIndex));
6500+
void *ptr = malloc(1024);
65146501

6502+
auto subDeviceIds = NEO::SubDeviceIdsVec{0};
6503+
EXPECT_FALSE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, mockRootDeviceIndex));
6504+
EXPECT_EQ(1u, mockIoctlHelper->setVmSharedSystemMemPrefetchCalled);
65156505
free(ptr);
65166506
}
65176507

shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,6 +3044,132 @@ TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmPrefetchThenVmBindIs
30443044
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, targetMemoryRegion.memoryInstance);
30453045
}
30463046

3047+
struct DrmMockXePrefetchFail : public DrmMockXe {
3048+
static auto create(RootDeviceEnvironment &rootDeviceEnvironment) {
3049+
auto drm = std::unique_ptr<DrmMockXePrefetchFail>(new DrmMockXePrefetchFail{rootDeviceEnvironment});
3050+
drm->initInstance();
3051+
return drm;
3052+
}
3053+
3054+
int ioctl(DrmIoctl request, void *arg) override {
3055+
if (request == DrmIoctl::gemVmBind) {
3056+
return -1;
3057+
}
3058+
return DrmMockXe::ioctl(request, arg);
3059+
};
3060+
3061+
int gemVmAdviseReturn = 0;
3062+
StackVec<drm_xe_madvise, 4> vmAdviseInputs;
3063+
3064+
protected:
3065+
// Don't call directly, use the create() function
3066+
DrmMockXePrefetchFail(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockXe(rootDeviceEnvironment) {}
3067+
};
3068+
3069+
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchThenFailureIsReturned) {
3070+
3071+
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
3072+
auto drm = DrmMockXePrefetchFail::create(*executionEnvironment->rootDeviceEnvironments[0]);
3073+
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
3074+
3075+
uint64_t start = 0x12u;
3076+
uint64_t length = 0x34u;
3077+
uint32_t subDeviceId = 0u;
3078+
uint32_t vmId = 1u;
3079+
3080+
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
3081+
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
3082+
3083+
EXPECT_FALSE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
3084+
}
3085+
3086+
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchThenMemRegionInstanceIsMemAdvisePreferredLocation) {
3087+
DebugManagerStateRestore restorer;
3088+
debugManager.flags.EnableLocalMemory.set(1);
3089+
3090+
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
3091+
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
3092+
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
3093+
xeIoctlHelper->initialize();
3094+
3095+
uint64_t start = 0x12u;
3096+
uint64_t length = 0x34u;
3097+
uint32_t subDeviceId = 0u;
3098+
uint32_t vmId = 1u;
3099+
3100+
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
3101+
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
3102+
3103+
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
3104+
EXPECT_EQ(1u, drm->vmBindInputs.size());
3105+
3106+
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
3107+
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
3108+
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
3109+
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, ((uint64_t)DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) & 0xffffffff);
3110+
}
3111+
3112+
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchWithDebugVarThenMemRegionInstanceIsDeviceLocal) {
3113+
DebugManagerStateRestore restorer;
3114+
debugManager.flags.EnableLocalMemory.set(1);
3115+
debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.set(1);
3116+
3117+
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
3118+
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
3119+
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
3120+
xeIoctlHelper->initialize();
3121+
3122+
uint64_t start = 0x12u;
3123+
uint64_t length = 0x34u;
3124+
uint32_t subDeviceId = 0u;
3125+
uint32_t vmId = 1u;
3126+
3127+
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
3128+
ASSERT_NE(nullptr, memoryInfo);
3129+
MemoryClassInstance targetMemoryRegion = memoryInfo->getLocalMemoryRegions()[subDeviceId].region;
3130+
drm->memoryInfo.reset(memoryInfo.release());
3131+
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
3132+
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
3133+
3134+
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
3135+
EXPECT_EQ(1u, drm->vmBindInputs.size());
3136+
3137+
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
3138+
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
3139+
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
3140+
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, targetMemoryRegion.memoryInstance);
3141+
}
3142+
3143+
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchWithDebugVarThenMemRegionInstanceIsSystem) {
3144+
DebugManagerStateRestore restorer;
3145+
debugManager.flags.EnableLocalMemory.set(1);
3146+
debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.set(0);
3147+
3148+
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
3149+
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
3150+
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
3151+
xeIoctlHelper->initialize();
3152+
3153+
uint64_t start = 0x12u;
3154+
uint64_t length = 0x34u;
3155+
uint32_t subDeviceId = 0u;
3156+
uint32_t vmId = 1u;
3157+
3158+
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
3159+
ASSERT_NE(nullptr, memoryInfo);
3160+
drm->memoryInfo.reset(memoryInfo.release());
3161+
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
3162+
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
3163+
3164+
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
3165+
EXPECT_EQ(1u, drm->vmBindInputs.size());
3166+
3167+
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
3168+
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
3169+
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
3170+
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, 0u);
3171+
}
3172+
30473173
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmPrefetchOnSecondTileThenVmBindIsCalled) {
30483174
DebugManagerStateRestore restorer;
30493175
debugManager.flags.EnableLocalMemory.set(1);

0 commit comments

Comments
 (0)