Skip to content

Commit

Permalink
[Fix] Consistent parsing of custom accelerator resources (#2464)
Browse files Browse the repository at this point in the history
  • Loading branch information
mounchin authored Oct 23, 2024
1 parent 1213d15 commit a56b091
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
19 changes: 16 additions & 3 deletions ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,8 +795,12 @@ func addWellKnownAcceleratorResources(rayStartParams map[string]string, resource
// Flag to track if any custom accelerator resource are present/added in rayStartParams resources.
isCustomAcceleratorResourceAdded := isCustomAcceleratorPresentInResources(resourcesMap)

for resourceKey, resourceValue := range resourceLimits {
resourceKeyString := string(resourceKey)
// Create a sorted slice of resource keys
// Needed for consistent looping and adding first found custom accelerator resource to ray start params
sortedResourceKeys := getSortedResourceKeys(resourceLimits)

for _, resourceKeyString := range sortedResourceKeys {
resourceValue := resourceLimits[corev1.ResourceName(resourceKeyString)]

// Scan for resource keys ending with "gpu" like "nvidia.com/gpu"
if _, ok := rayStartParams["num-gpus"]; !ok {
Expand All @@ -809,7 +813,7 @@ func addWellKnownAcceleratorResources(rayStartParams map[string]string, resource
if !isCustomAcceleratorResourceAdded {
if rayResourceName, ok := customAcceleratorToRayResourceMap[resourceKeyString]; ok && !resourceValue.IsZero() {
if _, exists := resourcesMap[rayResourceName]; !exists {
resourcesMap[rayResourceName] = float64(resourceValue.Value())
resourcesMap[rayResourceName] = resourceValue.AsApproximateFloat64()

// Update the resources map in the rayStartParams
updatedResourcesStr, err := json.Marshal(resourcesMap)
Expand Down Expand Up @@ -855,6 +859,15 @@ func getResourcesMap(rayStartParams map[string]string) (map[string]float64, erro
return resources, nil
}

func getSortedResourceKeys(resourceLimits corev1.ResourceList) []string {
sortedResourceKeys := make([]string, 0, len(resourceLimits))
for resourceKey := range resourceLimits {
sortedResourceKeys = append(sortedResourceKeys, string(resourceKey))
}
sort.Strings(sortedResourceKeys)
return sortedResourceKeys
}

func convertParamMap(rayStartParams map[string]string) (s string) {
// Order rayStartParams keys for consistent ray start command flags generation
keys := make([]string, 0, len(rayStartParams))
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,7 @@ func TestGenerateRayStartCommand(t *testing.T) {
NeuronCoreContainerResourceName: NeuronCoreRayResourceName,
"cloud-tpus.google.com/v3": "tpu",
},
expected: `ray start --head --num-gpus=1 --resources='{"tpu":8}' `,
expected: `ray start --head --num-gpus=1 --resources='{"neuron_cores":4}' `,
},
{
name: "HeadNode with existing resources",
Expand Down

0 comments on commit a56b091

Please sign in to comment.