Skip to content

Commit 8456b01

Browse files
authored
[SYCL] Fix memory leak for sub-devices (#20370)
Current `device_impl` implementation assumes shared ownership and manages the handle via `urRetain`/`urRelease` in constructor/destructor. Whenever we get temporary device handles via `urDeviceGet`/`urDevicePartition` (which implicitly return device handle with `refcount == 1`) and wrap those handles in `device_impl`, we have to release temporary handles at the end of scope to avoid memory leak. It's done here for devices: https://github.com/intel/llvm/blob/6b29cf120c267118dc8ad737e54e8b844bfb4e06/sycl/source/detail/platform_impl.cpp#L560-L563 Same needs to be done when we create sub-devices. But currently we were missing release calls for temporary handles after creating `device_impl` objects wrapping those handles. This PR fixes that issue by releasing temporary handles at the end of the scope.
1 parent 93a515d commit 8456b01

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ std::vector<device> device_impl::create_sub_devices(
167167
MPlatform.getOrMakeDeviceImpl(a_ur_device));
168168
res.push_back(sycl_device);
169169
});
170+
// urDevicePartition returns devices with their reference counts
171+
// incremented. Each device_impl wrapper increments the reference count and
172+
// decrements it on destruction (shared ownership). So, we have to decrement
173+
// the reference count once here to release temporary handles.
174+
for (ur_device_handle_t &SubDevice : SubDevices)
175+
Adapter.call<UrApiKind::urDeviceRelease>(SubDevice);
170176
return res;
171177
}
172178

sycl/unittests/context_device/DeviceRefCounter.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,39 @@ static ur_result_t redefinedDeviceReleaseAfter(void *) {
2929
return UR_RESULT_SUCCESS;
3030
}
3131

32+
ur_result_t redefinedDevicePartitionAfter(void *pParams) {
33+
auto params = *static_cast<ur_device_partition_params_t *>(pParams);
34+
if (*params.pphSubDevices) {
35+
for (size_t I = 0; I < *params.pNumDevices; ++I) {
36+
*params.pphSubDevices[I] = reinterpret_cast<ur_device_handle_t>(1000 + I);
37+
}
38+
}
39+
if (*params.ppNumDevicesRet)
40+
**params.ppNumDevicesRet = *params.pNumDevices;
41+
42+
DevRefCounter += *params.pNumDevices;
43+
return UR_RESULT_SUCCESS;
44+
}
45+
46+
static constexpr size_t NumSubDevices = 2;
47+
48+
ur_result_t redefinedDeviceGetInfoAfter(void *pParams) {
49+
auto params = *static_cast<ur_device_get_info_params_t *>(pParams);
50+
if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) {
51+
if (*params.ppPropValue) {
52+
auto *Result =
53+
reinterpret_cast<ur_device_partition_t *>(*params.ppPropValue);
54+
*Result = UR_DEVICE_PARTITION_EQUALLY;
55+
}
56+
if (*params.ppPropSizeRet)
57+
**params.ppPropSizeRet = sizeof(ur_device_partition_t);
58+
} else if (*params.ppropName == UR_DEVICE_INFO_MAX_COMPUTE_UNITS) {
59+
auto *Result = reinterpret_cast<uint32_t *>(*params.ppPropValue);
60+
*Result = NumSubDevices;
61+
}
62+
return UR_RESULT_SUCCESS;
63+
}
64+
3265
TEST(DevRefCounter, DevRefCounter) {
3366
{
3467
sycl::unittest::UrMock<> Mock;
@@ -52,3 +85,32 @@ TEST(DevRefCounter, DevRefCounter) {
5285
}
5386
EXPECT_EQ(DevRefCounter, 0);
5487
}
88+
89+
TEST(SubDevRefCounter, SubDevRefCounter) {
90+
{
91+
DevRefCounter = 0;
92+
sycl::unittest::UrMock<> Mock;
93+
mock::getCallbacks().set_after_callback("urDeviceGet",
94+
&redefinedDevicesGetAfter);
95+
mock::getCallbacks().set_after_callback("urDeviceRetain",
96+
&redefinedDeviceRetainAfter);
97+
mock::getCallbacks().set_after_callback("urDeviceRelease",
98+
&redefinedDeviceReleaseAfter);
99+
mock::getCallbacks().set_before_callback("urDevicePartition",
100+
&redefinedDevicePartitionAfter);
101+
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
102+
&redefinedDeviceGetInfoAfter);
103+
sycl::platform Plt = sycl::platform();
104+
105+
auto Devs = Plt.get_devices();
106+
if (!Devs.empty()) {
107+
auto Subdevs = Devs[0]
108+
.create_sub_devices<
109+
sycl::info::partition_property::partition_equally>(
110+
NumSubDevices);
111+
}
112+
EXPECT_NE(DevRefCounter, 0);
113+
sycl::detail::GlobalHandler::instance().getPlatformCache().clear();
114+
}
115+
EXPECT_EQ(DevRefCounter, 0);
116+
}

0 commit comments

Comments
 (0)