diff --git a/pkg/coscheduling/core/core.go b/pkg/coscheduling/core/core.go index fd9decdca..60d1b584f 100644 --- a/pkg/coscheduling/core/core.go +++ b/pkg/coscheduling/core/core.go @@ -48,12 +48,22 @@ const ( PodGroupNotFound Status = "PodGroup not found" Success Status = "Success" Wait Status = "Wait" + + permitStateKey = "PermitCoscheduling" ) +type PermitState struct { + Activate bool +} + +func (s *PermitState) Clone() framework.StateData { + return &PermitState{Activate: s.Activate} +} + // Manager defines the interfaces for PodGroup management. type Manager interface { PreFilter(context.Context, *corev1.Pod) error - Permit(context.Context, *corev1.Pod) Status + Permit(context.Context, *framework.CycleState, *corev1.Pod) Status GetPodGroup(context.Context, *corev1.Pod) (string, *v1alpha1.PodGroup) GetCreationTimestamp(*corev1.Pod, time.Time) time.Time DeletePermittedPodGroup(string) @@ -108,6 +118,13 @@ func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework return } + // Only proceed if it's explicitly requested to activate sibling pods. + if c, err := state.Read(permitStateKey); err != nil { + return + } else if s, ok := c.(*PermitState); !ok || !s.Activate { + return + } + pods, err := pgMgr.podLister.Pods(pod.Namespace).List( labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: pgName}), ) @@ -193,7 +210,7 @@ func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) er } // Permit permits a pod to run, if the minMember match, it would send a signal to chan. -func (pgMgr *PodGroupManager) Permit(ctx context.Context, pod *corev1.Pod) Status { +func (pgMgr *PodGroupManager) Permit(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) Status { pgFullName, pg := pgMgr.GetPodGroup(ctx, pod) if pgFullName == "" { return PodGroupNotSpecified @@ -209,6 +226,19 @@ func (pgMgr *PodGroupManager) Permit(ctx context.Context, pod *corev1.Pod) Statu if int32(assigned)+1 >= pg.Spec.MinMember { return Success } + + if assigned == 0 { + // Given we've reached Permit(), it's mean all PreFilter checks (minMember & minResource) + // already pass through, so if assigned == 0, it could be due to: + // - minResource get satisfied + // - new pods added + // In either case, we should and only should use this 0-th pod to trigger activating + // its siblings. + // It'd be in-efficient if we trigger activating siblings unconditionally. + // See https://github.com/kubernetes-sigs/scheduler-plugins/issues/682 + state.Write(permitStateKey, &PermitState{Activate: true}) + } + return Wait } diff --git a/pkg/coscheduling/core/core_test.go b/pkg/coscheduling/core/core_test.go index c57f659c8..1918bae87 100644 --- a/pkg/coscheduling/core/core_test.go +++ b/pkg/coscheduling/core/core_test.go @@ -28,6 +28,7 @@ import ( "k8s.io/client-go/informers" clientsetfake "k8s.io/client-go/kubernetes/fake" clicache "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/scheduler/framework" st "k8s.io/kubernetes/pkg/scheduler/testing" "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" tu "sigs.k8s.io/scheduler-plugins/test/util" @@ -278,7 +279,7 @@ func TestPermit(t *testing.T) { podInformer.Informer().GetStore().Add(p) } - if got := pgMgr.Permit(ctx, tt.pod); got != tt.want { + if got := pgMgr.Permit(ctx, &framework.CycleState{}, tt.pod); got != tt.want { t.Errorf("Want %v, but got %v", tt.want, got) } }) diff --git a/pkg/coscheduling/coscheduling.go b/pkg/coscheduling/coscheduling.go index 8c61c60d5..bcf37e2fc 100644 --- a/pkg/coscheduling/coscheduling.go +++ b/pkg/coscheduling/coscheduling.go @@ -204,7 +204,7 @@ func (cs *Coscheduling) PreFilterExtensions() framework.PreFilterExtensions { // Permit is the functions invoked by the framework at "Permit" extension point. func (cs *Coscheduling) Permit(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (*framework.Status, time.Duration) { waitTime := *cs.scheduleTimeout - s := cs.pgMgr.Permit(ctx, pod) + s := cs.pgMgr.Permit(ctx, state, pod) var retStatus *framework.Status switch s { case core.PodGroupNotSpecified: