diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 1994a4f987..46d875a84e 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -38,11 +38,14 @@ const ( NeuronCoreRayResourceName = "neuron_cores" TPUContainerResourceName = "google.com/tpu" TPURayResourceName = "TPU" + GPUShareContainerResourceName = "aliyun.com/gpu-mem" + GPUShareResourceName = "gpu_share" ) var customAcceleratorToRayResourceMap = map[string]string{ NeuronCoreContainerResourceName: NeuronCoreRayResourceName, TPUContainerResourceName: TPURayResourceName, + GPUShareContainerResourceName: GPUShareResourceName, } // Get the port required to connect to the Ray cluster by worker nodes and drivers diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index 6384b8405f..68d64afa47 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -1244,6 +1244,17 @@ func TestGenerateRayStartCommand(t *testing.T) { }, expected: `ray start --resources='{"TPU":4}' `, }, + { + name: "WorkerNode with GPU Share", + nodeType: rayv1.WorkerNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aliyun.com/gpu-mem": resource.MustParse("4"), + }, + }, + expected: `ray start --resources='{"gpu_share":4}' `, + }, { name: "HeadNode with Neuron Cores", nodeType: rayv1.HeadNode, @@ -1319,6 +1330,19 @@ func TestGenerateRayStartCommand(t *testing.T) { }, expected: `ray start --head --resources='{"custom_resource":2,"TPU":4}' `, }, + { + name: "HeadNode with existing GPU Share resources", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{ + "resources": `'{"custom_resource":2,"gpu_share":4}'`, + }, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aliyun.com/gpu-mem": resource.MustParse("8"), + }, + }, + expected: `ray start --head --resources='{"custom_resource":2,"gpu_share":4}' `, + }, { name: "HeadNode with invalid resources string", nodeType: rayv1.HeadNode,