Skip to content

Commit

Permalink
Add device architecture to DeviceInfo (#596)
Browse files Browse the repository at this point in the history
* change target[1] to sycl_device to get arch properties from Triton side

* Revert "change target[1] to sycl_device to get arch properties from Triton side"

This reverts commit 47015a8.

* Add device architecture to DeviceInfo
  • Loading branch information
quintinwang5 authored Apr 18, 2024
1 parent fcd22de commit e22924d
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 4 deletions.
1 change: 1 addition & 0 deletions csrc/gpu/aten/core/DeviceInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct DeviceInfo {
std::string version;
uint32_t device_id;
uint64_t global_mem_size;
uint64_t device_arch;
uint32_t max_compute_units;
uint32_t gpu_eu_count;
uint32_t gpu_subslice_count;
Expand Down
9 changes: 8 additions & 1 deletion csrc/gpu/runtime/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ static void initDeviceProperty(DeviceId device_id) {
device_prop.single_fp_config = device.get_info<dpcpp_dev_single_fp_config>();
device_prop.double_fp_config = device.get_info<dpcpp_dev_double_fp_config>();
device_prop.global_mem_size = device.get_info<dpcpp_dev_global_mem_size>();
#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100)
device_prop.device_arch = device.get_info<dpcpp_dev_architecture>();
#endif
device_prop.global_mem_cache_type =
device.get_info<dpcpp_dev_global_mem_cache_type>();
device_prop.global_mem_cache_size =
Expand Down Expand Up @@ -353,7 +356,11 @@ static void initDeviceProperty(DeviceId device_id) {
dev_info.max_num_sub_groups = device_prop.max_num_subgroup;
dev_info.sub_group_sizes = device_prop.subgroup_sizes;
dev_info.support_fp64 = device_prop.support_fp64;

#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100)
dev_info.device_arch = static_cast<uint64_t>(device_prop.device_arch);
#else
dev_info.device_arch = (uint64_t)0;
#endif
device_info[device_id] = dev_info;
}

Expand Down
4 changes: 4 additions & 0 deletions csrc/gpu/runtime/DeviceProp.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ struct DeviceProp {
dpcpp_info_t<dpcpp_dev_ext_intel_gpu_eu_simd_width> gpu_eu_simd_width;
dpcpp_info_t<dpcpp_dev_ext_intel_gpu_hw_threads_per_eu> gpu_hw_threads_per_eu;
#endif
#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100)
dpcpp_info_t<dpcpp_dev_architecture> device_arch;
#endif

bool support_fp64;
bool support_atomic64;
};
Expand Down
5 changes: 4 additions & 1 deletion csrc/gpu/utils/DPCPP.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ using dpcpp_dev_single_fp_config = sycl::info::device::single_fp_config;
using dpcpp_dev_double_fp_config = sycl::info::device::double_fp_config;
// Returns the size of global device memory in bytes
using dpcpp_dev_global_mem_size = sycl::info::device::global_mem_size;
// Return the architecture for device.
using dpcpp_dev_architecture =
sycl::ext::oneapi::experimental::info::device::architecture;
// Returns the type of global memory cache supported.
using dpcpp_dev_global_mem_cache_type =
sycl::info::device::global_mem_cache_type;
Expand Down Expand Up @@ -396,4 +399,4 @@ using dpcpp_atomic_ref_rlx_dev_global_t = sycl::

template <typename T>
using dpcpp_atomic_ref_rlx_wg_local_t =
sycl::atomic_ref<T, dpcpp_mem_odr_rlx, dpcpp_mem_scp_wg, dpcpp_local_space>;
sycl::atomic_ref<T, dpcpp_mem_odr_rlx, dpcpp_mem_scp_wg, dpcpp_local_space>;
7 changes: 5 additions & 2 deletions intel_extension_for_pytorch/csrc/xpu/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ PyObject* THPModule_getCurrentRawStream(
"torch.xpu.Stream.sycl_queue",
nullptr);
else
return PyLong_FromVoidPtr(xpu::dpcpp::getCurrentDPCPPStream(device).queue());
return PyLong_FromVoidPtr(
xpu::dpcpp::getCurrentDPCPPStream(device).queue());
END_HANDLE_TH_ERRORS
}

Expand Down Expand Up @@ -576,6 +577,7 @@ static void register_xpu_device_info(PyObject* module) {
.def_readonly("max_num_sub_groups", &DeviceInfo::max_num_sub_groups)
.def_readonly("sub_group_sizes", &DeviceInfo::sub_group_sizes)
.def_readonly("has_fp64", &DeviceInfo::support_fp64)
.def_readonly("device_arch", &DeviceInfo::device_arch)
.def_property_readonly(
"dev_type", [](const DeviceInfo& info) { return get_dev_type(info); })
.def("__repr__", [](const DeviceInfo& info) {
Expand All @@ -586,7 +588,8 @@ static void register_xpu_device_info(PyObject* module) {
<< info.driver_version << "', has_fp64=" << info.support_fp64
<< ", total_memory=" << info.global_mem_size / (1024 * 1024)
<< "MB, max_compute_units=" << info.max_compute_units
<< ", gpu_eu_count=" << info.gpu_eu_count << ")";
<< ", gpu_eu_count=" << info.gpu_eu_count
<< ", device_arch=" << info.device_arch << ")";
return stream.str();
});
}
Expand Down

0 comments on commit e22924d

Please sign in to comment.