Skip to content

Commit

Permalink
Add e2e test for kubectl ray job submit
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi committed Dec 6, 2024
1 parent aeba37e commit 2405cbc
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/e2e-tests-reusable-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ on:
dir-to-test:
required: true
type: string
ray-version:
required: false
type: string

jobs:
build:
Expand All @@ -32,6 +35,12 @@ jobs:
- name: Setup and start KinD cluster
uses: ./.github/workflows/actions/kind

- name: Install Ray
if: inputs.plugin-test
run: |
python --version
pip install -U "ray[default]==${{ inputs.ray-version }}"
- name: Build CLI and Add to PATH
if: inputs.plugin-test
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/e2e-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ jobs:
with:
plugin-test: true
dir-to-test: kubectl-plugin
ray-version: 2.40.0
102 changes: 102 additions & 0 deletions kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package e2e

import (
"os/exec"
"path"
"regexp"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

// Directory when running test is kuberay/kubectl-plugin/test/e2e/
const (
rayJobFilePath = "./testdata/ray-job.interactive-mode.yaml"
rayJobNoEnvFilePath = "./testdata/ray-job.interactive-mode-no-runtime-env.yaml"
kubectlRayJobWorkingDir = "./testdata/rayjob-submit-working-dir/"
entrypointSampleFileName = "entrypoint-python-sample.py"
runtimeEnvSampleFileName = "runtime-env-sample.yaml"
)

var _ = Describe("Calling ray plugin `job submit` command on Ray Job", Ordered, func() {
It("succeed in submitting RayJob", func() {
cmd := exec.Command("kubectl", "ray", "job", "submit", "-f", rayJobFilePath, "--working-dir", kubectlRayJobWorkingDir, "--", "python", entrypointSampleFileName)
output, err := cmd.CombinedOutput()

Expect(err).NotTo(HaveOccurred())
// Retrieve the Job ID from the output
regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`)
matches := regexExp.FindStringSubmatch(string(output))

Expect(len(matches)).To(BeNumerically(">=", 2))
cmdOutputJobID := matches[1]

// Use kubectl to check status of the rayjob
// Retrieve Job ID
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(cmdOutputJobID).To(Equal(string(output)))

// Retrieve Job Status
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(string(output)).To(Equal("SUCCEEDED"))

// Retrieve Job Deployment Status
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(string(output)).To(Equal("Complete"))

// Cleanup
cmd = exec.Command("kubectl", "delete", "rayjob", "rayjob-sample")
_, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())
})

It("succeed in submitting RayJob with runtime environment set with working dir", func() {
runtimeEnvFilePath := path.Join(kubectlRayJobWorkingDir, runtimeEnvSampleFileName)
cmd := exec.Command("kubectl", "ray", "job", "submit", "-f", rayJobNoEnvFilePath, "--runtime-env", runtimeEnvFilePath, "--", "python", entrypointSampleFileName)
output, err := cmd.CombinedOutput()

Expect(err).NotTo(HaveOccurred())
// Retrieve the Job ID from the output
regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`)
matches := regexExp.FindStringSubmatch(string(output))

Expect(len(matches)).To(BeNumerically(">=", 2))
cmdOutputJobID := matches[1]

// Use kubectl to check status of the rayjob
// Retrieve Job ID
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(cmdOutputJobID).To(Equal(string(output)))

// Retrieve Job Status
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(string(output)).To(Equal("SUCCEEDED"))

// Retrieve Job Deployment Status
cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}")
output, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())

Expect(string(output)).To(Equal("Complete"))

// Cleanup
cmd = exec.Command("kubectl", "delete", "rayjob", "rayjob-sample")
_, err = cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
apiVersion: ray.io/v1
kind: RayJob
metadata:
name: rayjob-sample
spec:
submissionMode: 'InteractiveMode'
rayClusterSpec:
rayVersion: '2.39.0'
headGroupSpec:
rayStartParams:
dashboard-host: '0.0.0.0'
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.39.0
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
resources:
limits:
cpu: "1"
requests:
cpu: "200m"
workerGroupSpecs:
- replicas: 1
minReplicas: 1
maxReplicas: 5
groupName: small-group
rayStartParams: {}
template:
spec:
containers:
- name: ray-worker
image: rayproject/ray:2.39.0
lifecycle:
preStop:
exec:
command: [ "/bin/sh","-c","ray stop" ]
resources:
limits:
cpu: "1"
requests:
cpu: "200m"
57 changes: 57 additions & 0 deletions kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
apiVersion: ray.io/v1
kind: RayJob
metadata:
name: rayjob-sample
spec:
# The current value is "InteractiveMode", meaning that it will wait for user to submit job and provide the job submission ID
submissionMode: 'InteractiveMode'
runtimeEnvYAML: |
pip:
- emoji==2.14.0
- pyjokes==0.6.0
env_vars:
test_env_var: "first_env_var"
another_env_var: "second_env_var"
rayClusterSpec:
rayVersion: '2.39.0' # should match the Ray version in the image of the containers
headGroupSpec:
rayStartParams:
dashboard-host: '0.0.0.0'
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.39.0
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
resources:
limits:
cpu: "1"
requests:
cpu: "200m"
workerGroupSpecs:
- replicas: 1
minReplicas: 1
maxReplicas: 5
groupName: small-group
rayStartParams: {}
template:
spec:
containers:
- name: ray-worker
image: rayproject/ray:2.39.0
lifecycle:
preStop:
exec:
command: [ "/bin/sh","-c","ray stop" ]
resources:
limits:
cpu: "1"
requests:
cpu: "200m"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import ray
import os
import emoji
import pyjokes

ray.init()

@ray.remote
def f():
assert emoji.__version__ == "2.14.0"
assert pyjokes.__version__ == "0.6.0"

first_env_var = os.getenv("test_env_var")
second_env_var = os.getenv("another_env_var")

assert first_env_var == "first_env_var"
assert second_env_var == "second_env_var"

ray.get(f.remote())
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pip:
- emoji==2.14.0
- pyjokes==0.6.0
env_vars:
test_env_var: "first_env_var"
another_env_var: "second_env_var"
working_dir: ./testdata/rayjob-submit-working-dir/

0 comments on commit 2405cbc

Please sign in to comment.