Skip to content

Commit

Permalink
Merge pull request #186 from leelavg/oom-fix
Browse files Browse the repository at this point in the history
restrict manager resource cache based on namespaces from environment
  • Loading branch information
Madhu-1 authored Jan 7, 2025
2 parents bdd5baa + ada2433 commit 711b490
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 2 deletions.
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ IMAGE_NAME ?= ceph-csi-operator
# Allow customization of the name prefix and/or namespace
NAME_PREFIX ?= ceph-csi-operator-
NAMESPACE ?= $(NAME_PREFIX)system
# A comma separated list of namespaces for operator to cache objects from
WATCH_NAMESPACE ?= ""

IMG ?= $(IMAGE_REGISTRY)/$(REGISTRY_NAMESPACE)/$(IMAGE_NAME):$(IMAGE_TAG)

Expand Down Expand Up @@ -47,6 +49,11 @@ patches:
value:
name: CSI_SERVICE_ACCOUNT_PREFIX
value: $(NAME_PREFIX)
- op: add
path: /spec/template/spec/containers/1/env/-
value:
name: WATCH_NAMESPACE
value: $(WATCH_NAMESPACE)
target:
kind: Deployment
name: controller-manager
Expand Down
33 changes: 33 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package main
import (
"crypto/tls"
"flag"
"fmt"
"os"
"strings"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand All @@ -29,13 +31,15 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

csiv1alpha1 "github.com/ceph/ceph-csi-operator/api/v1alpha1"
"github.com/ceph/ceph-csi-operator/internal/controller"
"github.com/ceph/ceph-csi-operator/internal/utils"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -94,6 +98,23 @@ func main() {
TLSOpts: tlsOpts,
})

defaultNamespaces := map[string]cache.Config{}
operatorNamespace, err := utils.GetOperatorNamespace()
if err != nil {
setupLog.Error(err, "manager requires namespace to be registered for controllers to reconcile")
os.Exit(1)
}
// ensure we always cache items from operator namespace
defaultNamespaces[operatorNamespace] = cache.Config{}

watchNamespace, err := getWatchNamespace()
if err != nil {
setupLog.Error(err, "manager will only watch for resources in the operator deployed namespace")
} else {
for _, namespace := range strings.Split(watchNamespace, ",") {
defaultNamespaces[namespace] = cache.Config{}
}
}
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
Expand All @@ -116,6 +137,7 @@ func main() {
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
Cache: cache.Options{DefaultNamespaces: defaultNamespaces},
})
if err != nil {
setupLog.Error(err, "unable to start manager")
Expand Down Expand Up @@ -160,3 +182,14 @@ func main() {
os.Exit(1)
}
}

// getWatchNamespace returns the Namespace the operator should be watching for changes
func getWatchNamespace() (string, error) {
var watchNamespaceEnvVar = "WATCH_NAMESPACE"

ns := os.Getenv(watchNamespaceEnvVar)
if ns == "" {
return "", fmt.Errorf("%s must be set", watchNamespaceEnvVar)
}
return ns, nil
}
2 changes: 2 additions & 0 deletions deploy/all-in-one/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15496,6 +15496,8 @@ spec:
fieldPath: metadata.namespace
- name: CSI_SERVICE_ACCOUNT_PREFIX
value: ceph-csi-operator-
- name: WATCH_NAMESPACE
value: ""
image: quay.io/cephcsi/ceph-csi-operator:latest
livenessProbe:
httpGet:
Expand Down
2 changes: 2 additions & 0 deletions deploy/multifile/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,8 @@ spec:
fieldPath: metadata.namespace
- name: CSI_SERVICE_ACCOUNT_PREFIX
value: ceph-csi-operator-
- name: WATCH_NAMESPACE
value: ""
image: quay.io/cephcsi/ceph-csi-operator:latest
livenessProbe:
httpGet:
Expand Down
4 changes: 2 additions & 2 deletions internal/controller/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ var defaultDeploymentStrategy = appsv1.DeploymentStrategy{
}

var operatorNamespace = utils.Call(func() string {
namespace := os.Getenv("OPERATOR_NAMESPACE")
if namespace == "" {
namespace, err := utils.GetOperatorNamespace()
if err != nil {
panic("Required OPERATOR_NAMESPACE environment variable is either missing or empty")
}
return namespace
Expand Down
14 changes: 14 additions & 0 deletions internal/utils/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,17 @@ package utils

import (
"cmp"
"fmt"
"os"
"slices"
"strings"
"sync"
)

const (
operatorNamespaceEnvVar = "OPERATOR_NAMESPACE"
)

// RunConcurrently runs all the of the given functions concurrently returning a channel with
// the functions' return values (of type error) then closes the channel when all functions return.
func RunConcurrently(fnList ...func() error) chan error {
Expand Down Expand Up @@ -129,3 +135,11 @@ func DeleteZeroValues[T comparable](slice []T) []T {
return value == zero
})
}

func GetOperatorNamespace() (string, error) {
ns := os.Getenv(operatorNamespaceEnvVar)
if ns == "" {
return "", fmt.Errorf("%s must be set", operatorNamespaceEnvVar)
}
return ns, nil
}

0 comments on commit 711b490

Please sign in to comment.