From e22924d77b478e1c4c37d36861e36a6b05b9274c Mon Sep 17 00:00:00 2001 From: "Wang, Quintin" Date: Thu, 18 Apr 2024 12:57:39 +0800 Subject: [PATCH] Add device architecture to DeviceInfo (#596) * change target[1] to sycl_device to get arch properties from Triton side * Revert "change target[1] to sycl_device to get arch properties from Triton side" This reverts commit 47015a84d0ca2f01a661bcd0ac190cbabbaecac3. * Add device architecture to DeviceInfo --- csrc/gpu/aten/core/DeviceInfo.h | 1 + csrc/gpu/runtime/Device.cpp | 9 ++++++++- csrc/gpu/runtime/DeviceProp.h | 4 ++++ csrc/gpu/utils/DPCPP.h | 5 ++++- intel_extension_for_pytorch/csrc/xpu/Module.cpp | 7 +++++-- 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/csrc/gpu/aten/core/DeviceInfo.h b/csrc/gpu/aten/core/DeviceInfo.h index 5ef9eab2f..a487e215b 100644 --- a/csrc/gpu/aten/core/DeviceInfo.h +++ b/csrc/gpu/aten/core/DeviceInfo.h @@ -26,6 +26,7 @@ struct DeviceInfo { std::string version; uint32_t device_id; uint64_t global_mem_size; + uint64_t device_arch; uint32_t max_compute_units; uint32_t gpu_eu_count; uint32_t gpu_subslice_count; diff --git a/csrc/gpu/runtime/Device.cpp b/csrc/gpu/runtime/Device.cpp index 0a5a7b5f5..5dd4a123e 100644 --- a/csrc/gpu/runtime/Device.cpp +++ b/csrc/gpu/runtime/Device.cpp @@ -257,6 +257,9 @@ static void initDeviceProperty(DeviceId device_id) { device_prop.single_fp_config = device.get_info(); device_prop.double_fp_config = device.get_info(); device_prop.global_mem_size = device.get_info(); +#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100) + device_prop.device_arch = device.get_info(); +#endif device_prop.global_mem_cache_type = device.get_info(); device_prop.global_mem_cache_size = @@ -353,7 +356,11 @@ static void initDeviceProperty(DeviceId device_id) { dev_info.max_num_sub_groups = device_prop.max_num_subgroup; dev_info.sub_group_sizes = device_prop.subgroup_sizes; dev_info.support_fp64 = device_prop.support_fp64; - +#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100) + dev_info.device_arch = static_cast(device_prop.device_arch); +#else + dev_info.device_arch = (uint64_t)0; +#endif device_info[device_id] = dev_info; } diff --git a/csrc/gpu/runtime/DeviceProp.h b/csrc/gpu/runtime/DeviceProp.h index 3d6450002..dbd0d07a2 100644 --- a/csrc/gpu/runtime/DeviceProp.h +++ b/csrc/gpu/runtime/DeviceProp.h @@ -137,6 +137,10 @@ struct DeviceProp { dpcpp_info_t gpu_eu_simd_width; dpcpp_info_t gpu_hw_threads_per_eu; #endif +#if (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER >= 20240100) + dpcpp_info_t device_arch; +#endif + bool support_fp64; bool support_atomic64; }; diff --git a/csrc/gpu/utils/DPCPP.h b/csrc/gpu/utils/DPCPP.h index ccfb9a1eb..27203cb70 100644 --- a/csrc/gpu/utils/DPCPP.h +++ b/csrc/gpu/utils/DPCPP.h @@ -251,6 +251,9 @@ using dpcpp_dev_single_fp_config = sycl::info::device::single_fp_config; using dpcpp_dev_double_fp_config = sycl::info::device::double_fp_config; // Returns the size of global device memory in bytes using dpcpp_dev_global_mem_size = sycl::info::device::global_mem_size; +// Return the architecture for device. +using dpcpp_dev_architecture = + sycl::ext::oneapi::experimental::info::device::architecture; // Returns the type of global memory cache supported. using dpcpp_dev_global_mem_cache_type = sycl::info::device::global_mem_cache_type; @@ -396,4 +399,4 @@ using dpcpp_atomic_ref_rlx_dev_global_t = sycl:: template using dpcpp_atomic_ref_rlx_wg_local_t = - sycl::atomic_ref; \ No newline at end of file + sycl::atomic_ref; diff --git a/intel_extension_for_pytorch/csrc/xpu/Module.cpp b/intel_extension_for_pytorch/csrc/xpu/Module.cpp index b2fd60bae..8528051c5 100644 --- a/intel_extension_for_pytorch/csrc/xpu/Module.cpp +++ b/intel_extension_for_pytorch/csrc/xpu/Module.cpp @@ -213,7 +213,8 @@ PyObject* THPModule_getCurrentRawStream( "torch.xpu.Stream.sycl_queue", nullptr); else - return PyLong_FromVoidPtr(xpu::dpcpp::getCurrentDPCPPStream(device).queue()); + return PyLong_FromVoidPtr( + xpu::dpcpp::getCurrentDPCPPStream(device).queue()); END_HANDLE_TH_ERRORS } @@ -576,6 +577,7 @@ static void register_xpu_device_info(PyObject* module) { .def_readonly("max_num_sub_groups", &DeviceInfo::max_num_sub_groups) .def_readonly("sub_group_sizes", &DeviceInfo::sub_group_sizes) .def_readonly("has_fp64", &DeviceInfo::support_fp64) + .def_readonly("device_arch", &DeviceInfo::device_arch) .def_property_readonly( "dev_type", [](const DeviceInfo& info) { return get_dev_type(info); }) .def("__repr__", [](const DeviceInfo& info) { @@ -586,7 +588,8 @@ static void register_xpu_device_info(PyObject* module) { << info.driver_version << "', has_fp64=" << info.support_fp64 << ", total_memory=" << info.global_mem_size / (1024 * 1024) << "MB, max_compute_units=" << info.max_compute_units - << ", gpu_eu_count=" << info.gpu_eu_count << ")"; + << ", gpu_eu_count=" << info.gpu_eu_count + << ", device_arch=" << info.device_arch << ")"; return stream.str(); }); }