Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Pod name for idle email notification #478

Merged
merged 20 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 68 additions & 51 deletions controllers/idler/idler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,10 @@ func (r *Reconciler) ensureIdling(logger logr.Logger, idler *toolchainv1alpha1.I
// Already tracking this pod. Check the timeout.
if time.Now().After(trackedPod.StartTime.Add(time.Duration(idler.Spec.TimeoutSeconds) * time.Second)) {
podLogger.Info("Pod running for too long. Killing the pod.", "start_time", trackedPod.StartTime.Format("2006-01-02T15:04:05Z"), "timeout_seconds", idler.Spec.TimeoutSeconds)
var appName string
fbm3307 marked this conversation as resolved.
Show resolved Hide resolved
var appType string
fbm3307 marked this conversation as resolved.
Show resolved Hide resolved
// Check if it belongs to a controller (Deployment, DeploymentConfig, etc) and scale it down to zero.
deletedByController, err := r.scaleControllerToZero(podLogger, pod.ObjectMeta)
appType, appName, deletedByController, err := r.scaleControllerToZero(podLogger, pod.ObjectMeta)
if err != nil {
return err
}
Expand All @@ -144,8 +146,14 @@ func (r *Reconciler) ensureIdling(logger logr.Logger, idler *toolchainv1alpha1.I
}
podLogger.Info("Pod deleted")
}

if appName == "" {
appName = pod.Name
fbm3307 marked this conversation as resolved.
Show resolved Hide resolved
appType = "Pod"
}

// By now either a pod has been deleted or scaled to zero by controller, idler Triggered notification should be sent
if err := r.createNotification(logger, idler); err != nil {
if err := r.createNotification(logger, idler, appName, appType); err != nil {
logger.Error(err, "failed to create Notification")
if err = r.setStatusIdlerNotificationCreationFailed(idler, err.Error()); err != nil {
logger.Error(err, "failed to set status IdlerNotificationCreationFailed")
Expand All @@ -168,7 +176,7 @@ func (r *Reconciler) ensureIdling(logger logr.Logger, idler *toolchainv1alpha1.I
return r.updateStatusPods(idler, newStatusPods)
}

func (r *Reconciler) createNotification(logger logr.Logger, idler *toolchainv1alpha1.Idler) error {
func (r *Reconciler) createNotification(logger logr.Logger, idler *toolchainv1alpha1.Idler, appName string, appType string) error {
logger.Info("Create Notification")
//Get the HostClient
hostCluster, ok := r.GetHostCluster()
Expand Down Expand Up @@ -200,7 +208,10 @@ func (r *Reconciler) createNotification(logger logr.Logger, idler *toolchainv1al

keysAndVals := map[string]string{
"Namespace": idler.Name,
"AppName": appName,
"AppType": appType,
}

for _, userEmail := range userEmails {
_, err := notify.NewNotificationBuilder(hostCluster.Client, hostCluster.OperatorNamespace).
WithName(notificationName).
Expand Down Expand Up @@ -252,7 +263,8 @@ func (r *Reconciler) getUserEmailsFromMURs(logger logr.Logger, hostCluster *clus
// scaleControllerToZero checks if the object has an owner controller (Deployment, ReplicaSet, etc)
// and scales the owner down to zero and returns "true".
// Otherwise returns "false".
func (r *Reconciler) scaleControllerToZero(logger logr.Logger, meta metav1.ObjectMeta) (bool, error) {
// It also returns the parent controller type and name or empty strings if there is no parent controller.
func (r *Reconciler) scaleControllerToZero(logger logr.Logger, meta metav1.ObjectMeta) (string, string, bool, error) {
fbm3307 marked this conversation as resolved.
Show resolved Hide resolved
logger.Info("Scaling controller to zero", "name", meta.Name)
owners := meta.GetOwnerReferences()
for _, owner := range owners {
Expand All @@ -275,17 +287,17 @@ func (r *Reconciler) scaleControllerToZero(logger logr.Logger, meta metav1.Objec
}
}
}
return false, nil
return "", "", false, nil
}

func (r *Reconciler) scaleDeploymentToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) scaleDeploymentToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
logger.Info("Scaling deployment to zero", "name", owner.Name)
d := &appsv1.Deployment{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, d); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
zero := int32(0)

Expand All @@ -298,10 +310,10 @@ func (r *Reconciler) scaleDeploymentToZero(logger logr.Logger, namespace string,

if err == nil {
logger.Info("Deployment scaled to zero using scale sub resource", "name", d.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

return false, err
return "", "", false, err
} else if errors.IsInternalError(err) { // Internal error indicates that the specReplicasPath is not set on the custom resource - just update the scale resource
scale := autoscalingv1.Scale{
ObjectMeta: ctrl.ObjectMeta{
Expand All @@ -316,22 +328,22 @@ func (r *Reconciler) scaleDeploymentToZero(logger logr.Logger, namespace string,

if err == nil {
logger.Info("Deployment scaled to zero using scale sub resource", "name", d.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

return false, err
return "", "", false, err
} else if !errors.IsNotFound(err) {
return false, err
return "", "", false, err
}
}
}

d.Spec.Replicas = &zero
if err := r.AllNamespacesClient.Update(context.TODO(), d); err != nil {
return false, err
return "", "", false, err
}
logger.Info("Deployment scaled to zero", "name", d.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

func getSupportedScaleResource(ownerReference metav1.OwnerReference) *schema.GroupVersionResource {
Expand All @@ -346,116 +358,121 @@ func getSupportedScaleResource(ownerReference metav1.OwnerReference) *schema.Gro
return nil
}

func (r *Reconciler) scaleReplicaSetToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) scaleReplicaSetToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
logger.Info("Scaling replica set to zero", "name", owner.Name)
rs := &appsv1.ReplicaSet{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, rs); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
logger.Info("replica set is not found; ignoring: it might be already deleted")
return true, nil
return owner.Kind, owner.Name, true, nil
}
logger.Error(err, "error deleting rs")
return false, err
return "", "", false, err
}
deletedByController, err := r.scaleControllerToZero(logger, rs.ObjectMeta) // Check if the ReplicaSet has another controller which owns it (i.g. Deployment)

appType, appName, deletedByController, err := r.scaleControllerToZero(logger, rs.ObjectMeta) // Check if the ReplicaSet has another controller which owns it (i.g. Deployment)
if err != nil {
return false, err
return "", "", false, err
}
if !deletedByController {
// There is no controller that owns the ReplicaSet. Scale the ReplicaSet to zero.
zero := int32(0)
rs.Spec.Replicas = &zero
if err := r.AllNamespacesClient.Update(context.TODO(), rs); err != nil {
return false, err
return "", "", false, err
}
logger.Info("ReplicaSet scaled to zero", "name", rs.Name)
appType = owner.Kind
appName = owner.Name
}
return true, nil
return appType, appName, true, nil
}

func (r *Reconciler) deleteDaemonSet(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) deleteDaemonSet(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
ds := &appsv1.DaemonSet{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, ds); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
if err := r.AllNamespacesClient.Delete(context.TODO(), ds); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
logger.Info("DaemonSet deleted", "name", ds.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

func (r *Reconciler) scaleStatefulSetToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) scaleStatefulSetToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
s := &appsv1.StatefulSet{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, s); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
zero := int32(0)
s.Spec.Replicas = &zero
if err := r.AllNamespacesClient.Update(context.TODO(), s); err != nil {
return false, err
return "", "", false, err
}
logger.Info("StatefulSet scaled to zero", "name", s.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

func (r *Reconciler) scaleDeploymentConfigToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) scaleDeploymentConfigToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
dc := &openshiftappsv1.DeploymentConfig{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, dc); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
dc.Spec.Replicas = 0
if err := r.AllNamespacesClient.Update(context.TODO(), dc); err != nil {
return false, err
return "", "", false, err
}
logger.Info("DeploymentConfig scaled to zero", "name", dc.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

func (r *Reconciler) scaleReplicationControllerToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) scaleReplicationControllerToZero(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
rc := &corev1.ReplicationController{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, rc); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
deletedByController, err := r.scaleControllerToZero(logger, rc.ObjectMeta) // Check if the ReplicationController has another controller which owns it (i.g. DeploymentConfig)
appType, appName, deletedByController, err := r.scaleControllerToZero(logger, rc.ObjectMeta) // Check if the ReplicationController has another controller which owns it (i.g. DeploymentConfig)
if err != nil {
return false, err
return "", "", false, err
}
if !deletedByController {
// There is no controller who owns the ReplicationController. Scale the ReplicationController to zero.
zero := int32(0)
rc.Spec.Replicas = &zero
if err := r.AllNamespacesClient.Update(context.TODO(), rc); err != nil {
return false, err
return "", "", false, err
}
logger.Info("ReplicationController scaled to zero", "name", rc.Name)
appType = owner.Kind
appName = owner.Name
}
return true, nil
return appType, appName, true, nil
}

func (r *Reconciler) deleteJob(logger logr.Logger, namespace string, owner metav1.OwnerReference) (bool, error) {
func (r *Reconciler) deleteJob(logger logr.Logger, namespace string, owner metav1.OwnerReference) (string, string, bool, error) {
j := &batchv1.Job{}
if err := r.AllNamespacesClient.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: owner.Name}, j); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
logger.Info("job not found")
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
// see https://github.com/kubernetes/kubernetes/issues/20902#issuecomment-321484735
// also, this may be needed for the e2e tests if the call to `client.Delete` comes too quickly after creating the job,
Expand All @@ -466,12 +483,12 @@ func (r *Reconciler) deleteJob(logger logr.Logger, namespace string, owner metav
PropagationPolicy: &propagationPolicy,
}); err != nil {
if errors.IsNotFound(err) { // Ignore not found errors. Can happen if the parent controller has been deleted. The Garbage Collector should delete the pods shortly.
return true, nil
return owner.Kind, owner.Name, true, nil
}
return false, err
return "", "", false, err
}
logger.Info("Job deleted", "name", j.Name)
return true, nil
return owner.Kind, owner.Name, true, nil
}

func findPodByName(idler *toolchainv1alpha1.Idler, name string) *toolchainv1alpha1.Pod {
Expand Down
Loading