Skip to content

Commit

Permalink
Remove EFA installation scripts; EFA is installed by default on EKS A…
Browse files Browse the repository at this point in the history
…L2 GPU, EKS Al2023 NVIDIA, and EKS AL2023 Neuron AMIs (#8113)

* fix: Remove EFA installation scripts; EFA is installed by default on EKS accelerated AMIs

* chore: Update EFA device plugin instances and container image to match `eks-charts`

* chore: Clean up dangling reference

* fix: Update test for EFA AL2 user data

* docs: Update remaining references
  • Loading branch information
bryantbiggs authored Jan 10, 2025
1 parent d84feef commit 8ffd609
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 244 deletions.
256 changes: 188 additions & 68 deletions pkg/addons/assets/efa-device-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,135 +40,255 @@ spec:
- key: "node.kubernetes.io/instance-type"
operator: In
values:
- c5n.18xlarge
- c5n.9xlarge
- c5n.metal
- c6a.32xlarge
- c6a.48xlarge
- c6a.metal
- c6gn.16xlarge
- c6i.32xlarge
- c6i.metal
- c6id.32xlarge
- c6id.metal
- dl1.24xlarge
- g4dn.12xlarge
- g4dn.8xlarge
- g4dn.metal
- g5.48xlarge
- g6.8xlarge
- g6.12xlarge
- g6.16xlarge
- g6.24xlarge
- g6.48xlarge
- hpc6a.48xlarge
- hpc7g.16xlarge
- hpc7g.8xlarge
- hpc7g.4xlarge
- i3en.12xlarge
- i3en.24xlarge
- i3en.metal
- i4i.32xlarge
- i4i.metal
- im4gn.16xlarge
- inf1.24xlarge
- m5dn.24xlarge
- m5dn.metal
- m5n.24xlarge
- m5n.metal
- m5zn.12xlarge
- m5zn.metal
- m6a.32xlarge
- m6a.48xlarge
- m6a.metal
- m6i.32xlarge
- m6i.metal
- m6id.32xlarge
- m6id.metal
- p3dn.24xlarge
- p4d.24xlarge
- p4de.24xlarge
- m6idn.32xlarge
- m6idn.metal
- m6in.32xlarge
- m6in.metal
- m7a.48xlarge
- m7a.metal-48xl
- m7g.16xlarge
- m7g.metal
- m7gd.16xlarge
- m7i.48xlarge
- m7i.metal-48xl
- c5n.9xlarge
- c5n.18xlarge
- c5n.metal
- c6a.48xlarge
- c6a.metal
- c6gn.16xlarge
- c6i.32xlarge
- c6i.metal
- c6id.32xlarge
- c6id.metal
- c6in.32xlarge
- c6in.metal
- c7a.48xlarge
- c7a.metal-48xl
- c7g.16xlarge
- c7g.metal
- c7gd.16xlarge
- c7gn.16xlarge
- c7i.48xlarge
- c7i.metal-48xl
- r5dn.24xlarge
- r5dn.metal
- r5n.24xlarge
- r5n.metal
- r6a.48xlarge
- r6a.metal
- r6i.32xlarge
- r6i.metal
- vt1.24xlarge
- r6idn.32xlarge
- r6idn.metal
- r6in.32xlarge
- r6in.metal
- r6id.32xlarge
- r6id.metal
- r7a.48xlarge
- r7a.metal-48xl
- r7g.16xlarge
- r7g.metal
- r7gd.16xlarge
- r7i.48xlarge
- r7i.metal-48xl
- r7iz.32xlarge
- r7iz.metal-32xl
- x2idn.32xlarge
- x2idn.metal
- x2iedn.32xlarge
- x2iedn.metal
- x2iezn.12xlarge
- x2iezn.metal
- matchExpressions:
- key: "node.kubernetes.io/instance-type"
operator: In
values:
- c5n.18xlarge
- c5n.9xlarge
- c5n.metal
- c6a.32xlarge
- c6a.48xlarge
- c6a.metal
- c6gn.16xlarge
- c6i.32xlarge
- c6i.metal
- c6id.32xlarge
- c6id.metal
- i3en.12xlarge
- i3en.24xlarge
- i3en.metal
- i4g.16xlarge
- i4i.32xlarge
- i4i.metal
- im4gn.16xlarge
- dl1.24xlarge
- g4dn.12xlarge
- dl2q.24xlarge
- g4dn.8xlarge
- g4dn.12xlarge
- g4dn.16xlarge
- g4dn.metal
- g5.8xlarge
- g5.12xlarge
- g5.16xlarge
- g5.24xlarge
- g5.48xlarge
- g6.8xlarge
- g6.12xlarge
- g6.16xlarge
- g6.24xlarge
- g6.48xlarge
- g6e.8xlarge
- g6e.12xlarge
- g6e.16xlarge
- g6e.24xlarge
- g6e.48xlarge
- gr6.8xlarge
- inf1.24xlarge
- p3dn.24xlarge
- p4d.24xlarge
- p4de.24xlarge
- p5.48xlarge
- p5e.48xlarge
- p5en.48xlarge
- trn1.32xlarge
- trn1n.32xlarge
- trn2.48xlarge
- vt1.24xlarge
- hpc6a.48xlarge
- hpc7g.16xlarge
- hpc7g.8xlarge
- hpc6id.32xlarge
- hpc7a.12xlarge
- hpc7a.24xlarge
- hpc7a.48xlarge
- hpc7a.96xlarge
- hpc7g.4xlarge
- i3en.12xlarge
- i3en.24xlarge
- i3en.metal
- i4i.32xlarge
- i4i.metal
- im4gn.16xlarge
- inf1.24xlarge
- hpc7g.8xlarge
- hpc7g.16xlarge
- matchExpressions:
- key: "node.kubernetes.io/instance-type"
operator: In
values:
- m5dn.24xlarge
- m5dn.metal
- m5n.24xlarge
- m5n.metal
- m5zn.12xlarge
- m5zn.metal
- m6a.32xlarge
- m6a.48xlarge
- m6a.metal
- m6i.32xlarge
- m6i.metal
- m6id.32xlarge
- m6id.metal
- p3dn.24xlarge
- p4d.24xlarge
- p4de.24xlarge
- m6idn.32xlarge
- m6idn.metal
- m6in.32xlarge
- m6in.metal
- m7a.48xlarge
- m7a.metal-48xl
- m7g.16xlarge
- m7g.metal
- m7gd.16xlarge
- m7i.48xlarge
- m7i.metal-48xl
- c5n.9xlarge
- c5n.18xlarge
- c5n.metal
- c6a.48xlarge
- c6a.metal
- c6gn.16xlarge
- c6i.32xlarge
- c6i.metal
- c6id.32xlarge
- c6id.metal
- c6in.32xlarge
- c6in.metal
- c7a.48xlarge
- c7a.metal-48xl
- c7g.16xlarge
- c7g.metal
- c7gd.16xlarge
- c7gn.16xlarge
- c7i.48xlarge
- c7i.metal-48xl
- r5dn.24xlarge
- r5dn.metal
- r5n.24xlarge
- r5n.metal
- r6a.48xlarge
- r6a.metal
- r6i.32xlarge
- r6i.metal
- vt1.24xlarge
- r6idn.32xlarge
- r6idn.metal
- r6in.32xlarge
- r6in.metal
- r6id.32xlarge
- r6id.metal
- r7a.48xlarge
- r7a.metal-48xl
- r7g.16xlarge
- r7g.metal
- r7gd.16xlarge
- r7i.48xlarge
- r7i.metal-48xl
- r7iz.32xlarge
- r7iz.metal-32xl
- x2idn.32xlarge
- x2idn.metal
- x2iedn.32xlarge
- x2iedn.metal
- x2iezn.12xlarge
- x2iezn.metal
- i3en.12xlarge
- i3en.24xlarge
- i3en.metal
- i4g.16xlarge
- i4i.32xlarge
- i4i.metal
- im4gn.16xlarge
- dl1.24xlarge
- dl2q.24xlarge
- g4dn.8xlarge
- g4dn.12xlarge
- g4dn.16xlarge
- g4dn.metal
- g5.8xlarge
- g5.12xlarge
- g5.16xlarge
- g5.24xlarge
- g5.48xlarge
- g6.8xlarge
- g6.12xlarge
- g6.16xlarge
- g6.24xlarge
- g6.48xlarge
- g6e.8xlarge
- g6e.12xlarge
- g6e.16xlarge
- g6e.24xlarge
- g6e.48xlarge
- gr6.8xlarge
- inf1.24xlarge
- p3dn.24xlarge
- p4d.24xlarge
- p4de.24xlarge
- p5.48xlarge
- p5e.48xlarge
- p5en.48xlarge
- trn1.32xlarge
- trn1n.32xlarge
- trn2.48xlarge
- vt1.24xlarge
- hpc6a.48xlarge
- hpc6id.32xlarge
- hpc7a.12xlarge
- hpc7a.24xlarge
- hpc7a.48xlarge
- hpc7a.96xlarge
- hpc7g.4xlarge
- hpc7g.8xlarge
- hpc7g.16xlarge
hostNetwork: true
containers:
- image: "%s.dkr.ecr.%s.%s/eks/aws-efa-k8s-device-plugin:v0.3.3"
- image: "%s.dkr.ecr.%s.%s/eks/aws-efa-k8s-device-plugin:v0.5.4"
name: aws-efa-k8s-device-plugin
securityContext:
allowPrivilegeEscalation: false
Expand Down
5 changes: 2 additions & 3 deletions pkg/nodebootstrap/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ The call to `UserData` will also dynamically add the following:
The bootstrap wrapper scripts will use `jq` and `sed` to get user and our config into various files,
and then call `/etc/eks/bootstrap.sh`.

For AL2, enabling either SSM or EFA will add `assets/install-ssm.al2.sh` or `assets/efa.al2.sh`.
For AL2, enabling SSM will add `assets/install-ssm.al2.sh`.

### AmazonLinux2023

Expand Down Expand Up @@ -73,7 +73,7 @@ spec:
```

For EKS-managed nodes based on native AMIs, the userdata above is fulfilled automatically by the AWS SSM agent.
For EKS-managed nodes based on native AMIs, the userdata above is fulfilled automatically by the AWS SSM agent.

## Troubleshooting

Expand Down Expand Up @@ -111,6 +111,5 @@ Files:
/var/lib/cloud/scripts/eksctl/bootstrap.al2.sh
/etc/kubernetes/kubelet/kubelet-config.json
/etc/docker/daemon.json
/var/lib/cloud/scripts/eksctl/efa.al2.sh
/var/lib/cloud/scripts/eksctl/install-ssm.sh
```
4 changes: 0 additions & 4 deletions pkg/nodebootstrap/al2.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ func NewAL2Bootstrapper(clusterConfig *api.ClusterConfig, ng *api.NodeGroup, clu
func (b *AmazonLinux2) UserData() (string, error) {
var scripts []script

if api.IsEnabled(b.ng.EFAEnabled) {
scripts = append(scripts, script{name: "efa.al2.sh", contents: assets.EfaAl2Sh})
}

body, err := linuxConfig(b.clusterConfig, al2BootScript, assets.BootstrapAl2Sh, b.clusterDNS, b.ng, scripts...)
if err != nil {
return "", errors.Wrap(err, "encoding user data")
Expand Down
6 changes: 0 additions & 6 deletions pkg/nodebootstrap/al2023.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,11 @@ type AL2023 struct {

func NewManagedAL2023Bootstrapper(cfg *api.ClusterConfig, mng *api.ManagedNodeGroup, clusterDNS string) *AL2023 {
al2023 := newAL2023Bootstrapper(cfg, mng, clusterDNS)
if api.IsEnabled(mng.EFAEnabled) {
al2023.cloudboot = append(al2023.cloudboot, assets.EfaManagedAL2023Boothook)
}
return al2023
}

func NewAL2023Bootstrapper(cfg *api.ClusterConfig, ng *api.NodeGroup, clusterDNS string) *AL2023 {
al2023 := newAL2023Bootstrapper(cfg, ng, clusterDNS)
if api.IsEnabled(ng.EFAEnabled) {
al2023.scripts = append(al2023.scripts, assets.EfaAl2023Sh)
}
return al2023
}

Expand Down
Loading

0 comments on commit 8ffd609

Please sign in to comment.