-
Notifications
You must be signed in to change notification settings - Fork 540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature] Add a Filter plugin to ensure that non-GPU pods are not sch… #788
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package gpumanagementfilter | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"strings" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/kubernetes/pkg/scheduler/framework" | ||
) | ||
|
||
const ( | ||
// PluginName is the name of the plugin used in the plugin registry and configurations. | ||
PluginName = "GPUManagementFilter" | ||
|
||
//preFilterStateKey is the key in CycleState to NodeResourcesFit pre-computed data. | ||
// Using the name of the plugin helps avoid collisions with other plugins. | ||
preFilterStateKey = "PreFilter" + PluginName | ||
|
||
//devicePluginContainerImage is the nvidia provided kubernetes device plugin image. | ||
devicePluginContainerImage = "nvcr.io/nvidia/k8s-device-plugin" | ||
|
||
// ResourceNvidiaGPU is nvidia GPU resource | ||
ResourceNvidiaGPU = "nvidia.com/gpu" | ||
Comment on lines
+21
to
+25
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should move both of those values so they can be changed via configuration file. Some clusters may also have more than one gpu resource on the node, so it would be great if we could provide a list of gpu resources. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1, Using configuration files is better than hard-coding. Maybe we don't need to limit ourselves to GPU resources, can we extend it to different resources? |
||
) | ||
|
||
type GPUManagementFilter struct { | ||
profileName string | ||
} | ||
|
||
var ( | ||
_ framework.FilterPlugin = &GPUManagementFilter{} | ||
_ framework.PreFilterPlugin = &GPUManagementFilter{} | ||
) | ||
|
||
// Name returns name of the plugin. It is used in logs, etc. | ||
func (p *GPUManagementFilter) Name() string { | ||
return PluginName | ||
} | ||
|
||
// preFilterState computed at PreFilter and used in Filter. | ||
type preFilterState struct { | ||
isPodRequestingGPUs bool | ||
isPodNvidiaDevicePlugin bool | ||
} | ||
|
||
// Clone implements the StateData interface. | ||
// It does not actually copy the data, as there is no need to do so. | ||
func (s *preFilterState) Clone() framework.StateData { | ||
return s | ||
} | ||
|
||
// PreFilter invoked at the prefilter extension point. | ||
func (p *GPUManagementFilter) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) { | ||
cycleState.Write(preFilterStateKey, updatePreFilterState(pod)) | ||
return nil, nil | ||
} | ||
|
||
// PreFilterExtensions returns prefilter extensions, pod add and remove. | ||
func (p *GPUManagementFilter) PreFilterExtensions() framework.PreFilterExtensions { | ||
return nil | ||
} | ||
|
||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) { | ||
c, err := cycleState.Read(preFilterStateKey) | ||
if err != nil { | ||
return nil, fmt.Errorf("error reading %q from cycleState: %w", preFilterStateKey, err) | ||
} | ||
|
||
s, ok := c.(*preFilterState) | ||
if !ok { | ||
return nil, fmt.Errorf("%+v convert to GPUSchedulerFilterPlugin.preFilterState error", c) | ||
} | ||
return s, nil | ||
} | ||
|
||
// Filter invoked at the filter extension point. | ||
func (p *GPUManagementFilter) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { | ||
s, err := getPreFilterState(cycleState) | ||
if err != nil { | ||
return framework.AsStatus(err) | ||
} | ||
|
||
if (!s.isPodRequestingGPUs) && isGPUNode(nodeInfo) && (!s.isPodNvidiaDevicePlugin) { | ||
return framework.NewStatus(framework.Unschedulable, "gpu node, non-gpu pod") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// New initializes a new plugin and returns it. | ||
func New(_ runtime.Object, h framework.Handle) (framework.Plugin, error) { | ||
return &GPUManagementFilter{}, nil | ||
} | ||
|
||
// updatePreFilterState checks if given pod is requesting GPUs or not by checking resource limits it also checks if the pod is a Nvidia Device Plugin Pod or not and updates the PreFilter State accordingly | ||
func updatePreFilterState(pod *v1.Pod) *preFilterState { | ||
result := &preFilterState{} | ||
|
||
initContainers := pod.Spec.InitContainers | ||
containers := pod.Spec.Containers | ||
|
||
if containsNvidiaDevicePluginImage(containers) { | ||
result.isPodNvidiaDevicePlugin = true | ||
} | ||
|
||
if requestsNvidiaGPU(initContainers) || requestsNvidiaGPU(containers) { | ||
result.isPodRequestingGPUs = true | ||
return result | ||
} | ||
return result | ||
} | ||
|
||
// isGPUNode checks if given node has GPU resource or not by checking Allocatable | ||
func isGPUNode(nodeInfo *framework.NodeInfo) bool { | ||
_, gpuAllocatable := nodeInfo.Allocatable.ScalarResources[ResourceNvidiaGPU] | ||
return gpuAllocatable | ||
} | ||
|
||
// containsNvidiaDevicePluginImage checks if any of the container images in containerList are the nvidia device plugin. The device plugin image would be the invariant across different Daemonset specs (only the version would be changed). Labels or annotation might be changed or added by anyone on their pod spec | ||
func containsNvidiaDevicePluginImage(containerList []v1.Container) bool { | ||
for _, containerItem := range containerList { | ||
if strings.Contains(containerItem.Image, devicePluginContainerImage) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
package gpumanagementfilter | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/kubernetes/pkg/scheduler/framework" | ||
) | ||
|
||
func TestGPUManagementFilter(t *testing.T) { | ||
tests := []struct { | ||
msg string | ||
pod *v1.Pod | ||
node *framework.NodeInfo | ||
wantStatus *framework.Status | ||
}{ | ||
{ | ||
msg: "gpu node, gpu pod", | ||
pod: makeGPUPod("p1", 2), | ||
node: makeGPUNode("n1", "2"), | ||
wantStatus: nil, | ||
}, | ||
{ | ||
msg: "non-gpu node, gpu pod", | ||
pod: makeGPUPod("p2", 2), | ||
node: makeNonGPUNode("n2"), | ||
wantStatus: nil, | ||
}, | ||
{ | ||
msg: "gpu node, non-gpu pod", | ||
pod: makeNonGPUPod("p3"), | ||
node: makeGPUNode("n3", "2"), | ||
wantStatus: framework.NewStatus(framework.Unschedulable, "gpu node, non-gpu pod"), | ||
}, | ||
{ | ||
msg: "non-gpu node, non-gpu pod", | ||
pod: makeNonGPUPod("p4"), | ||
node: makeNonGPUNode("n4"), | ||
wantStatus: nil, | ||
}, | ||
{ | ||
msg: "gpu node, gpu pod (requesting 0 GPUs)", | ||
pod: makeGPUPod("p5", 0), | ||
node: makeGPUNode("n5", "2"), | ||
wantStatus: framework.NewStatus(framework.Unschedulable, "gpu node, non-gpu pod"), | ||
}, | ||
{ | ||
msg: "non-gpu node, gpu pod (requesting 0 GPUs)", | ||
pod: makeGPUPod("p6", 0), | ||
node: makeNonGPUNode("n6"), | ||
wantStatus: nil, | ||
}, | ||
{ | ||
msg: "gpu node, device plugin pod", | ||
pod: makeNonGPUNvidiaDevicePluginPod("p7"), | ||
node: makeNonGPUNode("n7"), | ||
wantStatus: nil, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.msg, func(t *testing.T) { | ||
ctx := context.Background() | ||
cycleState := framework.NewCycleState() | ||
plugin := &GPUManagementFilter{ | ||
profileName: v1.DefaultSchedulerName, | ||
} | ||
if _, status := plugin.PreFilter(ctx, cycleState, tt.pod); !status.IsSuccess() { | ||
t.Fatal(status.AsError()) | ||
} | ||
|
||
status := plugin.Filter(ctx, cycleState, tt.pod, tt.node) | ||
assert.Equal(t, tt.wantStatus, status) | ||
}) | ||
} | ||
} | ||
|
||
func makeGPUPod(node string, limits int64) *v1.Pod { | ||
p := &v1.Pod{ | ||
Spec: v1.PodSpec{ | ||
NodeName: node, | ||
Containers: []v1.Container{v1.Container{Resources: v1.ResourceRequirements{ | ||
Limits: v1.ResourceList{v1.ResourceName(ResourceNvidiaGPU): *resource.NewQuantity(limits, resource.DecimalSI)}, | ||
}}}, | ||
}, | ||
} | ||
return p | ||
} | ||
|
||
func makeGPUNode(node, gpu string, pods ...*v1.Pod) *framework.NodeInfo { | ||
n := framework.NewNodeInfo(pods...) | ||
n.SetNode(&v1.Node{ | ||
ObjectMeta: metav1.ObjectMeta{Name: node}, | ||
Status: v1.NodeStatus{ | ||
Allocatable: v1.ResourceList{ | ||
v1.ResourceName(ResourceNvidiaGPU): resource.MustParse(gpu), | ||
}, | ||
}, | ||
}) | ||
return n | ||
} | ||
|
||
func makeNonGPUPod(node string) *v1.Pod { | ||
p := &v1.Pod{ | ||
Spec: v1.PodSpec{ | ||
NodeName: node, | ||
Containers: []v1.Container{v1.Container{Resources: v1.ResourceRequirements{}}}, | ||
}, | ||
} | ||
return p | ||
} | ||
|
||
func makeNonGPUNvidiaDevicePluginPod(node string) *v1.Pod { | ||
p := &v1.Pod{ | ||
Spec: v1.PodSpec{ | ||
NodeName: node, | ||
Containers: []v1.Container{ | ||
v1.Container{ | ||
Image: "nvcr.io/nvidia/k8s-device-plugin:v0.11.0", | ||
Resources: v1.ResourceRequirements{}, | ||
}, | ||
}, | ||
}, | ||
} | ||
return p | ||
} | ||
|
||
func makeNonGPUNode(node string, pods ...*v1.Pod) *framework.NodeInfo { | ||
n := framework.NewNodeInfo(pods...) | ||
n.SetNode(&v1.Node{ | ||
ObjectMeta: metav1.ObjectMeta{Name: node}, | ||
Status: v1.NodeStatus{ | ||
Allocatable: v1.ResourceList{}, | ||
}, | ||
}) | ||
return n | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package gpumanagementfilter | ||
|
||
import v1 "k8s.io/api/core/v1" | ||
|
||
// RequestsNvidiaGPU checks if any container in the list requests for a NVIDIA GPU | ||
func requestsNvidiaGPU(containerList []v1.Container) bool { | ||
for _, containerItem := range containerList { | ||
if checkNvidiaGPUResources(containerItem.Resources.Requests) || checkNvidiaGPUResources(containerItem.Resources.Limits) { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func checkNvidiaGPUResources(containerResources v1.ResourceList) bool { | ||
value, isPresent := containerResources[ResourceNvidiaGPU] | ||
valueInt, _ := value.AsInt64() | ||
if isPresent && valueInt > 0 { | ||
return true | ||
} | ||
return false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package gpumanagementfilter | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
) | ||
|
||
var cpuResourceList = v1.ResourceList{ | ||
"cpu": *resource.NewQuantity(5, resource.DecimalSI), | ||
} | ||
|
||
var gpuResourceList = v1.ResourceList{ | ||
ResourceNvidiaGPU: *resource.NewQuantity(5, resource.DecimalSI), | ||
} | ||
|
||
var zeroGpuResourceList = v1.ResourceList{ | ||
ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI), | ||
} | ||
|
||
func TestRequestsNvidiaGPU(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
resourceList v1.ResourceList | ||
expected bool | ||
}{ | ||
{ | ||
name: "containers requesting CPUs only", | ||
resourceList: cpuResourceList, | ||
expected: false, | ||
}, | ||
{ | ||
name: "containers requesting GPU resource with 0 value", | ||
resourceList: zeroGpuResourceList, | ||
expected: false, | ||
}, | ||
{ | ||
name: "containers requesting GPUs", | ||
resourceList: gpuResourceList, | ||
expected: true, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
assert.Equal(t, tt.expected, requestsNvidiaGPU(getContainerList(tt.resourceList))) | ||
}) | ||
} | ||
} | ||
|
||
func getContainerList(resourceList v1.ResourceList) []v1.Container { | ||
return []v1.Container{ | ||
{ | ||
Resources: v1.ResourceRequirements{ | ||
Limits: resourceList, | ||
}, | ||
}, | ||
{ | ||
Resources: v1.ResourceRequirements{ | ||
Limits: cpuResourceList, | ||
}, | ||
}, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we have a KEP document?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, let me write a KEP doc first