Skip to content

Commit

Permalink
Add container health check
Browse files Browse the repository at this point in the history
  • Loading branch information
takkyuuplayer committed Apr 10, 2024
1 parent d153e0d commit c92f9ce
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 15 deletions.
112 changes: 112 additions & 0 deletions fixtures/task-definition.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,118 @@
"retries": 0,
"startPeriod": 0
}
},
{
"name": "containerWithoutHealthCheck",
"image": "",
"repositoryCredentials": {
"credentialsParameter": ""
},
"cpu": 0,
"memory": 0,
"memoryReservation": 0,
"links": [
""
],
"portMappings": [
{
"containerPort": 8000,
"hostPort": 80
}
],
"essential": true,
"entryPoint": [
""
],
"command": [
""
],
"environment": [
{
"name": "",
"value": ""
}
],
"mountPoints": [
{
"sourceVolume": "",
"containerPath": "",
"readOnly": true
}
],
"volumesFrom": [
{
"sourceContainer": "",
"readOnly": true
}
],
"linuxParameters": {
"capabilities": {
"add": [
""
],
"drop": [
""
]
},
"devices": [
{
"hostPath": "",
"containerPath": "",
"permissions": [
"mknod"
]
}
],
"initProcessEnabled": true,
"sharedMemorySize": 0,
"tmpfs": [
{
"containerPath": "",
"size": 0,
"mountOptions": [
""
]
}
]
},
"hostname": "",
"user": "",
"workingDirectory": "",
"disableNetworking": true,
"privileged": true,
"readonlyRootFilesystem": true,
"dnsServers": [
""
],
"dnsSearchDomains": [
""
],
"extraHosts": [
{
"hostname": "",
"ipAddress": ""
}
],
"dockerSecurityOptions": [
""
],
"dockerLabels": {
"KeyName": ""
},
"ulimits": [
{
"name": "core",
"softLimit": 0,
"hardLimit": 0
}
],
"logConfiguration": {
"logDriver": "gelf",
"options": {
"KeyName": ""
}
}
}
],
"volumes": [
Expand Down
48 changes: 48 additions & 0 deletions rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,14 @@ func (c *cage) StartCanaryTask(ctx context.Context, nextTaskDefinition *ecstypes
}, WaitDuration); err != nil {
return nil, err
}

log.Infof("🥚 waiting until canary task '%s' containers become healthy...", *taskArn)
if err := c.waitUntilContainersBecomeHealthy(ctx, *taskArn, nextTaskDefinition); err != nil {
return nil, err
}

log.Infof("🐣 canary task '%s' is running!️", *taskArn)

var task ecstypes.Task
if o, err := c.ecs.DescribeTasks(ctx, &ecs.DescribeTasksInput{
Cluster: &c.env.Cluster,
Expand All @@ -296,6 +303,7 @@ func (c *cage) StartCanaryTask(ctx context.Context, nextTaskDefinition *ecstypes
} else {
task = o.Tasks[0]
}

if len(service.LoadBalancers) == 0 {
log.Infof("no load balancer is attached to service '%s'. skip registration to target group", *service.ServiceName)
log.Infof("wait %d seconds for ensuring the task goes stable", c.env.CanaryTaskIdleDuration)
Expand Down Expand Up @@ -398,6 +406,46 @@ func (c *cage) StartCanaryTask(ctx context.Context, nextTaskDefinition *ecstypes
}, nil
}

func (c *cage) waitUntilContainersBecomeHealthy(ctx context.Context, taskArn string, nextTaskDefinition *ecstypes.TaskDefinition) error {
containerHasHealthChecks := map[string]struct{}{}
for _, definition := range nextTaskDefinition.ContainerDefinitions {
if definition.HealthCheck != nil {
containerHasHealthChecks[*definition.Name] = struct{}{}
}
}

for count := 0; count < 10; count++ {
<-newTimer(time.Duration(15) * time.Second).C
log.Infof("canary task '%s' waits until %d container(s) become healthy", taskArn, len(containerHasHealthChecks))
if o, err := c.ecs.DescribeTasks(ctx, &ecs.DescribeTasksInput{
Cluster: &c.env.Cluster,
Tasks: []string{taskArn},
}); err != nil {
return err
} else {
task := o.Tasks[0]
if *task.LastStatus != "RUNNING" {
return fmt.Errorf("😫 canary task has stopped: %s", *task.StoppedReason)
}

for _, container := range task.Containers {
if _, ok := containerHasHealthChecks[*container.Name]; !ok {
continue
}
if container.HealthStatus != ecstypes.HealthStatusHealthy {
log.Infof("container '%s' is not healthy: %s", *container.Name, container.HealthStatus)
continue
}
delete(containerHasHealthChecks, *container.Name)
}
if len(containerHasHealthChecks) == 0 {
return nil
}
}
}
return fmt.Errorf("😨 canary task hasn't become to be healthy")
}

func (c *cage) StopCanaryTask(ctx context.Context, input *StartCanaryTaskOutput) error {
if input.registrationSkipped {
log.Info("no load balancer is attached to service. Skip deregisteration.")
Expand Down
53 changes: 38 additions & 15 deletions test/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@ import (
)

type MockContext struct {
Services map[string]*types.Service
Tasks map[string]*types.Task
TargetGroups map[string]struct{}
mux sync.Mutex
Services map[string]*types.Service
Tasks map[string]*types.Task
TaskDefinitions map[string]*types.TaskDefinition
TargetGroups map[string]struct{}
mux sync.Mutex
}

func NewMockContext() *MockContext {
return &MockContext{
Services: make(map[string]*types.Service),
Tasks: make(map[string]*types.Task),
TargetGroups: make(map[string]struct{}),
Services: make(map[string]*types.Service),
Tasks: make(map[string]*types.Task),
TaskDefinitions: make(map[string]*types.TaskDefinition),
TargetGroups: make(map[string]struct{}),
}
}

Expand Down Expand Up @@ -187,18 +189,25 @@ func (ctx *MockContext) DeleteService(c context.Context, input *ecs.DeleteServic
}

func (ctx *MockContext) RegisterTaskDefinition(_ context.Context, input *ecs.RegisterTaskDefinitionInput, _ ...func(options *ecs.Options)) (*ecs.RegisterTaskDefinitionOutput, error) {
ctx.mux.Lock()
defer ctx.mux.Unlock()

idstr := uuid.New().String()
ctx.TaskDefinitions[idstr] = &types.TaskDefinition{
TaskDefinitionArn: &idstr,
Family: aws.String("family"),
Revision: 1,
ContainerDefinitions: input.ContainerDefinitions,
}
return &ecs.RegisterTaskDefinitionOutput{
TaskDefinition: &types.TaskDefinition{
TaskDefinitionArn: &idstr,
Family: aws.String("family"),
Revision: 1,
ContainerDefinitions: input.ContainerDefinitions,
},
TaskDefinition: ctx.TaskDefinitions[idstr],
}, nil
}

func (ctx *MockContext) StartTask(_ context.Context, input *ecs.StartTaskInput, _ ...func(options *ecs.Options)) (*ecs.StartTaskOutput, error) {
ctx.mux.Lock()
defer ctx.mux.Unlock()

id := uuid.New()
idstr := id.String()
attachments := []types.Attachment{{
Expand All @@ -213,14 +222,28 @@ func (ctx *MockContext) StartTask(_ context.Context, input *ecs.StartTaskInput,
},
},
}}

containers := make([]types.Container, len(ctx.TaskDefinitions[*input.TaskDefinition].ContainerDefinitions))
for i, v := range ctx.TaskDefinitions[*input.TaskDefinition].ContainerDefinitions {
containers[i] = types.Container{
Name: v.Name,
Image: v.Image,
LastStatus: aws.String("RUNNING"),
}
if v.HealthCheck != nil {
containers[i].HealthStatus = "HEALTHY"
} else {
containers[i].HealthStatus = "UNKNOWN"
}
}

ret := types.Task{
TaskArn: &idstr,
ClusterArn: input.Cluster,
TaskDefinitionArn: input.TaskDefinition,
Group: input.Group,
Containers: containers,
}
ctx.mux.Lock()
defer ctx.mux.Unlock()
ctx.Tasks[idstr] = &ret
s, ok := ctx.Services[*input.Group]
var launchType types.LaunchType
Expand Down

0 comments on commit c92f9ce

Please sign in to comment.