Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
0c42764
[OFFLOAD] Add plugin with support for Intel Level Zero
adurang Sep 16, 2025
e8c471e
Merge branch 'main' into l0_plugin
adurang Sep 16, 2025
f8956cd
Update offload/CMakeLists.txt
adurang Sep 16, 2025
d3fc4d7
Update offload/plugins-nextgen/level_zero/CMakeLists.txt
adurang Sep 16, 2025
4b38386
Update offload/plugins-nextgen/level_zero/include/L0Plugin.h
adurang Sep 16, 2025
6c1c820
Replace pragma once
adurang Sep 17, 2025
fd91c47
Address review comments
adurang Sep 17, 2025
84665dc
Fix makefile format
adurang Sep 17, 2025
a2217db
change to StringRef in multiple places
adurang Sep 17, 2025
08880a6
remove tokenize
adurang Sep 18, 2025
9a3088c
remove unused code
adurang Sep 18, 2025
24d0645
fix format
adurang Sep 18, 2025
0eb5712
Remove environment variable
adurang Sep 18, 2025
f491f3d
fix getAsInteger conditions
adurang Sep 18, 2025
9bbdae3
Merge branch 'main' into l0_plugin
adurang Sep 18, 2025
fe633f4
Don't use __tgt_device_image
adurang Sep 19, 2025
e36a8fc
fix image checking
adurang Sep 19, 2025
53bce7e
changes from review comments
adurang Sep 21, 2025
69df388
remove more CMake messages
adurang Sep 21, 2025
91fa069
format
adurang Sep 21, 2025
561e4ca
remove unnecessary check
adurang Sep 21, 2025
ad07ee8
Merge branch 'main' into l0_plugin
adurang Sep 23, 2025
5b90ccb
Add level_zero to liboffload platforms
adurang Sep 23, 2025
6b8280d
fix different issues
adurang Sep 23, 2025
f983135
Fix ELF bits from #159623
adurang Sep 23, 2025
b6c393a
format & add more Deviceinfo tags
adurang Sep 23, 2025
b226c70
tidy up Makefile
adurang Sep 23, 2025
af1945f
format & renaming
adurang Sep 23, 2025
3dba3c3
Remove ompt_device_t reference
adurang Sep 24, 2025
ac62231
address reviews
adurang Sep 25, 2025
17c8e31
remove unused argument
adurang Sep 25, 2025
e550bf1
Merge branch 'main' into l0_plugin
adurang Sep 26, 2025
b859179
cleanup
adurang Sep 26, 2025
f1d1aad
[OFFLOAD] Enable level zero testing
adurang Sep 26, 2025
c141f9d
address review
adurang Sep 30, 2025
dd774b6
fix & remove dead code
adurang Sep 30, 2025
7fa9dda
remove support for target ICVs
adurang Sep 30, 2025
96411ce
typo & format
adurang Sep 30, 2025
a4cd814
Merge branch 'main' into l0_plugin
adurang Sep 30, 2025
6c5278f
Adjust interfaces after merge
adurang Sep 30, 2025
44b2f48
push some error checking down
adurang Sep 30, 2025
b4e238f
Update offload/plugins-nextgen/level_zero/include/L0Device.h
adurang Oct 9, 2025
166028d
address review comments
adurang Oct 9, 2025
6629159
format
adurang Oct 9, 2025
c00c288
more format :/
adurang Oct 9, 2025
93d3948
add missed wrapper; remove unused macro
adurang Oct 9, 2025
4a2140e
Remove level_zero from default plugin list
adurang Oct 15, 2025
94335ac
Merge branch 'main' into l0_plugin
adurang Oct 15, 2025
bd43212
remove removed plugin interface
adurang Oct 15, 2025
2be5a5a
Merge branch 'l0_plugin' into l0_tests
adurang Oct 15, 2025
7f9c0c8
update spirv target
adurang Oct 15, 2025
2d7cc66
address some reviews
adurang Oct 20, 2025
91e51ed
address more reviews
adurang Oct 20, 2025
12ab478
address more comments
adurang Oct 20, 2025
3287b1a
change some containers
adurang Oct 21, 2025
69abec0
more container changes
adurang Oct 21, 2025
b7e95ab
move unsafe code out of destructors
adurang Oct 21, 2025
91f1033
Remove legacy error checking Pt 1
adurang Oct 21, 2025
1221006
Remove legacy error checking Pt 2
adurang Oct 21, 2025
45d38a2
Remove legacy error checking Pt 3
adurang Oct 21, 2025
6ec68e7
Remove legacy error checking Pt 4
adurang Oct 21, 2025
ef8d794
Add more error checking
adurang Oct 21, 2025
19e888e
fix format
adurang Oct 21, 2025
a591b0e
Move DeviceRange to PluginInterface and remove internal Device list
adurang Oct 22, 2025
fc61165
refactor MemPool options field
adurang Oct 22, 2025
40f2626
change auto returns
adurang Oct 22, 2025
f04fef7
Remove link copy queues
adurang Oct 22, 2025
4a6b196
debug build fixes
adurang Oct 23, 2025
120e3d4
fixes and cleanup
adurang Oct 27, 2025
40ad342
format :/
adurang Oct 27, 2025
8bb25ad
fixes
adurang Oct 29, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion offload/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ endif()

set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
"Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
"Semicolon-separated list of plugins to use: cuda, amdgpu, level_zero, host or \"all\".")

if(LIBOMPTARGET_PLUGINS_TO_BUILD STREQUAL "all")
set(LIBOMPTARGET_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_PLUGIN_TARGETS})
Expand All @@ -176,6 +176,18 @@ if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$"
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "cuda")
endif()
endif()
if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64)$" AND
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better to do this check when we create the list rather than removing it later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the other plugins are treated in the same way right now (e.g., 3 lines above). They are in the "candidate" list and if some condition is not met they're removed.

I don't disagree with your statement about do it earlier but we should be consistent, so maybe something to do subsequently?

CMAKE_SYSTEM_NAME MATCHES "Linux|Windows"))
if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
message(STATUS "Not building Level Zero plugin: it is only supported on "
"Linux/Windows x86_64 or ppc64le hosts")
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
endif()
endif()
if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD AND
NOT LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND)
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
endif()
message(STATUS "Building the offload library with support for "
"the \"${LIBOMPTARGET_PLUGINS_TO_BUILD}\" plugins")

Expand Down Expand Up @@ -214,6 +226,7 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} riscv64-unknown-linux
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} riscv64-unknown-linux-gnu-LTO")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} loongarch64-unknown-linux-gnu")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} loongarch64-unknown-linux-gnu-LTO")
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} spirv64-intel")

# Once the plugins for the different targets are validated, they will be added to
# the list of supported targets in the current system.
Expand Down
33 changes: 33 additions & 0 deletions offload/cmake/Modules/LibomptargetGetDependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ endif()
find_package(FFI QUIET)
set(LIBOMPTARGET_DEP_LIBFFI_FOUND ${FFI_FOUND})

################################################################################
# Looking for offload-arch...
################################################################################
if(TARGET offload-arch)
get_property(LIBOMPTARGET_OFFLOAD_ARCH TARGET offload-arch PROPERTY LOCATION)
else()
find_program(LIBOMPTARGET_OFFLOAD_ARCH NAMES offload-arch
PATHS ${LLVM_TOOLS_BINARY_DIR})
endif()

################################################################################
# Looking for NVIDIA GPUs...
################################################################################
Expand Down Expand Up @@ -89,4 +99,27 @@ if(LIBOMPTARGET_AMDGPU_ARCH)
endif()
endif()

################################################################################
# Looking for Level0
################################################################################
find_path(LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIR NAMES level_zero/ze_api.h)

if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIR)
set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND FALSE)
else()
set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND TRUE)
find_library(LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARY NAMES ze_loader)
endif()

if(LIBOMPTARGET_OFFLOAD_ARCH)
execute_process(COMMAND ${LIBOMPTARGET_OFFLOAD_ARCH} "--only=intel"
OUTPUT_VARIABLE LIBOMPTARGET_INTELGPU_ARCH_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REPLACE "\n" ";" intelgpu_arch_list "${LIBOMPTARGET_INTELGPU_ARCH_OUTPUT}")
if(intelgpu_arch_list)
set(LIBOMPTARGET_FOUND_INTELGPU_GPU TRUE)
set(LIBOMPTARGET_INTELGPU_DETECTED_ARCH_LIST "${intelgpu_arch_list}")
endif()
endif()

set(OPENMP_PTHREAD_LIB ${LLVM_PTHREAD_LIB})
7 changes: 1 addition & 6 deletions offload/include/OpenMP/InteropAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,12 @@ struct InteropTableEntry {
Interops.push_back(obj);
}

template <class ClearFuncTy> void clear(ClearFuncTy f) {
for (auto &Obj : Interops) {
f(Obj);
}
}

/// vector interface
int size() const { return Interops.size(); }
iterator begin() { return Interops.begin(); }
iterator end() { return Interops.end(); }
iterator erase(iterator it) { return Interops.erase(it); }
void clear() { Interops.clear(); }
};

struct InteropTblTy
Expand Down
174 changes: 172 additions & 2 deletions offload/include/PerThreadTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,96 @@
#define OFFLOAD_PERTHREADTABLE_H

#include <list>
#include <llvm/Support/Error.h>
#include <memory>
#include <mutex>
#include <type_traits>

template <typename ObjectType> struct PerThread {
struct PerThreadData {
std::unique_ptr<ObjectType> ThEntry;
};

std::mutex Mtx;
std::list<std::shared_ptr<PerThreadData>> ThreadDataList;

// define default constructors, disable copy and move constructors
PerThread() = default;
PerThread(const PerThread &) = delete;
PerThread(PerThread &&) = delete;
PerThread &operator=(const PerThread &) = delete;
PerThread &operator=(PerThread &&) = delete;
~PerThread() {
std::lock_guard<std::mutex> Lock(Mtx);
ThreadDataList.clear();
}

private:
PerThreadData &getThreadData() {
static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
if (!ThreadData) {
ThreadData = std::make_shared<PerThreadData>();
std::lock_guard<std::mutex> Lock(Mtx);
ThreadDataList.push_back(ThreadData);
}
return *ThreadData;
}

protected:
ObjectType &getThreadEntry() {
auto &ThData = getThreadData();
if (ThData.ThEntry)
return *ThData.ThEntry;
ThData.ThEntry = std::make_unique<ObjectType>();
return *ThData.ThEntry;
}

public:
ObjectType &get() { return getThreadEntry(); }

template <class F> void clear(F f) {
std::lock_guard<std::mutex> Lock(Mtx);
for (auto ThData : ThreadDataList) {
if (!ThData->ThEntry)
continue;
f(*ThData->ThEntry);
}
ThreadDataList.clear();
}
};

// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
// thread_local variable.
// T is the container type used to store the objects, e.g., std::vector,
// std::set, etc. by each thread. O is the type of the stored objects e.g.,
// omp_interop_val_t *, ...

template <typename ContainerType, typename ObjectType> struct PerThreadTable {
using iterator = typename ContainerType::iterator;

template <typename, typename = std::void_t<>>
struct has_iterator : std::false_type {};
template <typename T>
struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};

template <typename T, typename = std::void_t<>>
struct has_clear : std::false_type {};
template <typename T>
struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
: std::true_type {};

template <typename T, typename = std::void_t<>>
struct has_clearAll : std::false_type {};
template <typename T>
struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
: std::true_type {};

template <typename, typename = std::void_t<>>
struct is_associative : std::false_type {};
template <typename T>
struct is_associative<T, std::void_t<typename T::mapped_type>>
: std::true_type {};

struct PerThreadData {
size_t NElements = 0;
std::unique_ptr<ContainerType> ThEntry;
Expand Down Expand Up @@ -71,6 +148,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
return ThData.NElements;
}

void setNElements(size_t Size) {
auto &NElements = getThreadNElements();
NElements = Size;
}

public:
void add(ObjectType obj) {
auto &Entry = getThreadEntry();
Expand Down Expand Up @@ -104,11 +186,99 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
for (auto ThData : ThreadDataList) {
if (!ThData->ThEntry || ThData->NElements == 0)
continue;
ThData->ThEntry->clear(f);
if constexpr (has_clearAll<ContainerType>::value) {
ThData->ThEntry->clearAll(f);
} else if constexpr (has_iterator<ContainerType>::value &&
has_clear<ContainerType>::value) {
for (auto &Obj : *ThData->ThEntry) {
if constexpr (is_associative<ContainerType>::value) {
f(Obj.second);
} else {
f(Obj);
}
}
ThData->ThEntry->clear();
} else {
static_assert(true, "Container type not supported");
}
ThData->NElements = 0;
}
ThreadDataList.clear();
}

template <class F> llvm::Error deinit(F f) {
std::lock_guard<std::mutex> Lock(Mtx);
for (auto ThData : ThreadDataList) {
if (!ThData->ThEntry || ThData->NElements == 0)
continue;
for (auto &Obj : *ThData->ThEntry) {
if constexpr (is_associative<ContainerType>::value) {
if (auto Err = f(Obj.second))
return Err;
} else {
if (auto Err = f(Obj))
return Err;
}
}
}
return llvm::Error::success();
}
};

template <typename T, typename = std::void_t<>> struct ContainerValueType {
using type = typename T::value_type;
};
template <typename T>
struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
using type = typename T::mapped_type;
};

template <typename ContainerType, size_t reserveSize = 0>
struct PerThreadContainer
: public PerThreadTable<ContainerType,
typename ContainerValueType<ContainerType>::type> {

// helpers
template <typename T, typename = std::void_t<>> struct indexType {
using type = typename T::size_type;
};
template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
using type = typename T::key_type;
};
template <typename T, typename = std::void_t<>>
struct has_resize : std::false_type {};
template <typename T>
struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
: std::true_type {};

template <typename T, typename = std::void_t<>>
struct has_reserve : std::false_type {};
template <typename T>
struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
: std::true_type {};

using IndexType = typename indexType<ContainerType>::type;
using ObjectType = typename ContainerValueType<ContainerType>::type;

// Get the object for the given index in the current thread
ObjectType &get(IndexType Index) {
auto &Entry = this->getThreadEntry();

// specialized code for vector-like containers
if constexpr (has_resize<ContainerType>::value) {
if (Index >= Entry.size()) {
if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
if (Entry.capacity() < reserveSize)
Entry.reserve(reserveSize);
}
// If the index is out of bounds, try resize the container
Entry.resize(Index + 1);
}
}
ObjectType &Ret = Entry[Index];
this->setNElements(Entry.size());
return Ret;
}
};

#endif
1 change: 1 addition & 0 deletions offload/include/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_HOST,
TARGET_ALLOC_SHARED,
TARGET_ALLOC_DEFAULT,
TARGET_ALLOC_LAST = TARGET_ALLOC_DEFAULT
};

inline KernelArgsTy CTorDTorKernelArgs = {
Expand Down
1 change: 1 addition & 0 deletions offload/liboffload/API/Platform.td
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def ol_platform_backend_t : Enum {
Etor<"UNKNOWN", "The backend is not recognized">,
Etor<"CUDA", "The backend is CUDA">,
Etor<"AMDGPU", "The backend is AMDGPU">,
Etor<"LEVEL_ZERO", "The backend is Level Zero">,
Etor<"HOST", "The backend is the host">,
];
}
Expand Down
2 changes: 2 additions & 0 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) {
return OL_PLATFORM_BACKEND_AMDGPU;
} else if (Name == "cuda") {
return OL_PLATFORM_BACKEND_CUDA;
} else if (Name == "level_zero") {
return OL_PLATFORM_BACKEND_LEVEL_ZERO;
} else {
return OL_PLATFORM_BACKEND_UNKNOWN;
}
Expand Down
16 changes: 16 additions & 0 deletions offload/plugins-nextgen/common/include/DLWrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,21 @@ template <size_t Requested, size_t Required> constexpr void verboseAssert() {
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
x9, x10); \
}
#define DLWRAP_INSTANTIATE_12(SYM_DEF, SYM_USE, T) \
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
typename T::template arg<1>::type x1, \
typename T::template arg<2>::type x2, \
typename T::template arg<3>::type x3, \
typename T::template arg<4>::type x4, \
typename T::template arg<5>::type x5, \
typename T::template arg<6>::type x6, \
typename T::template arg<7>::type x7, \
typename T::template arg<8>::type x8, \
typename T::template arg<9>::type x9, \
typename T::template arg<10>::type x10, \
typename T::template arg<11>::type x11) { \
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
x9, x10, x11); \
}

#endif // OMPTARGET_SHARED_DLWRAP_H
19 changes: 18 additions & 1 deletion offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,7 @@ struct GenericPluginTy {
virtual GenericGlobalHandlerTy *createGlobalHandler() = 0;

/// Get the reference to the device with a certain device id.
GenericDeviceTy &getDevice(int32_t DeviceId) {
GenericDeviceTy &getDevice(int32_t DeviceId) const {
assert(isValidDeviceId(DeviceId) && "Invalid device id");
assert(Devices[DeviceId] && "Device is uninitialized");

Expand Down Expand Up @@ -1527,6 +1527,23 @@ struct GenericPluginTy {
/// object and return immediately.
int32_t async_barrier(omp_interop_val_t *Interop);

struct DevicesRangeTy {
using iterator = llvm::SmallVector<GenericDeviceTy *>::iterator;

iterator BeginIt;
iterator EndIt;

DevicesRangeTy(iterator BeginIt, iterator EndIt)
: BeginIt(BeginIt), EndIt(EndIt) {}

auto &begin() { return BeginIt; }
auto &end() { return EndIt; }
};

DevicesRangeTy getDevicesRange() {
return DevicesRangeTy(Devices.begin(), Devices.end());
}

private:
/// Indicates if the platform runtime has been fully initialized.
bool Initialized = false;
Expand Down
Loading
Loading