From 7b11a7ca96fdb4a8f62bc90fa71c9c1b60ee57b8 Mon Sep 17 00:00:00 2001 From: Aaron Liang Date: Tue, 5 Nov 2024 17:44:45 -0800 Subject: [PATCH] Add e2e test for kubectl ray job submit --- .../e2e-tests-reusable-workflow.yaml | 15 +++ .github/workflows/e2e-tests.yaml | 1 + .../test/e2e/kubectl_ray_job_submit_test.go | 102 ++++++++++++++++++ ...y-job.interactive-mode-no-runtime-env.yaml | 48 +++++++++ .../testdata/ray-job.interactive-mode.yaml | 57 ++++++++++ .../entrypoint-python-sample.py | 19 ++++ .../runtime-env-sample.yaml | 7 ++ 7 files changed, 249 insertions(+) create mode 100644 kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go create mode 100644 kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml create mode 100644 kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml create mode 100644 kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py create mode 100644 kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml diff --git a/.github/workflows/e2e-tests-reusable-workflow.yaml b/.github/workflows/e2e-tests-reusable-workflow.yaml index 8fbc71907b2..5e67f0f6c79 100644 --- a/.github/workflows/e2e-tests-reusable-workflow.yaml +++ b/.github/workflows/e2e-tests-reusable-workflow.yaml @@ -9,6 +9,9 @@ on: dir-to-test: required: true type: string + ray-version: + required: false + type: string jobs: build: @@ -32,6 +35,18 @@ jobs: - name: Setup and start KinD cluster uses: ./.github/workflows/actions/kind + - name: Set up Python + if: inputs.plugin-test + uses: actions/setup-python@v5 + with: + python-version: '3.9' + + - name: Install Ray + if: inputs.plugin-test + run: | + python --version + pip install -U "ray[default]==${{ inputs.ray-version }}" + - name: Build CLI and Add to PATH if: inputs.plugin-test run: | diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml index 996083ca6e8..d06fd24a05e 100644 --- a/.github/workflows/e2e-tests.yaml +++ b/.github/workflows/e2e-tests.yaml @@ -38,3 +38,4 @@ jobs: with: plugin-test: true dir-to-test: kubectl-plugin + ray-version: 2.40.0 diff --git a/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go b/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go new file mode 100644 index 00000000000..dc467d4f1c6 --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go @@ -0,0 +1,102 @@ +package e2e + +import ( + "os/exec" + "path" + "regexp" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Directory when running test is kuberay/kubectl-plugin/test/e2e/ +const ( + rayJobFilePath = "./testdata/ray-job.interactive-mode.yaml" + rayJobNoEnvFilePath = "./testdata/ray-job.interactive-mode-no-runtime-env.yaml" + kubectlRayJobWorkingDir = "./testdata/rayjob-submit-working-dir/" + entrypointSampleFileName = "entrypoint-python-sample.py" + runtimeEnvSampleFileName = "runtime-env-sample.yaml" +) + +var _ = Describe("Calling ray plugin `job submit` command on Ray Job", Ordered, func() { + It("succeed in submitting RayJob", func() { + cmd := exec.Command("kubectl", "ray", "job", "submit", "-f", rayJobFilePath, "--working-dir", kubectlRayJobWorkingDir, "--", "python", entrypointSampleFileName) + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + // Retrieve the Job ID from the output + regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`) + matches := regexExp.FindStringSubmatch(string(output)) + + Expect(len(matches)).To(BeNumerically(">=", 2)) + cmdOutputJobID := matches[1] + + // Use kubectl to check status of the rayjob + // Retrieve Job ID + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(cmdOutputJobID).To(Equal(string(output))) + + // Retrieve Job Status + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("SUCCEEDED")) + + // Retrieve Job Deployment Status + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("Complete")) + + // Cleanup + cmd = exec.Command("kubectl", "delete", "rayjob", "rayjob-sample") + _, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + }) + + It("succeed in submitting RayJob with runtime environment set with working dir", func() { + runtimeEnvFilePath := path.Join(kubectlRayJobWorkingDir, runtimeEnvSampleFileName) + cmd := exec.Command("kubectl", "ray", "job", "submit", "-f", rayJobNoEnvFilePath, "--runtime-env", runtimeEnvFilePath, "--", "python", entrypointSampleFileName) + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + // Retrieve the Job ID from the output + regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`) + matches := regexExp.FindStringSubmatch(string(output)) + + Expect(len(matches)).To(BeNumerically(">=", 2)) + cmdOutputJobID := matches[1] + + // Use kubectl to check status of the rayjob + // Retrieve Job ID + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(cmdOutputJobID).To(Equal(string(output))) + + // Retrieve Job Status + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("SUCCEEDED")) + + // Retrieve Job Deployment Status + cmd = exec.Command("kubectl", "get", "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("Complete")) + + // Cleanup + cmd = exec.Command("kubectl", "delete", "rayjob", "rayjob-sample") + _, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + }) +}) diff --git a/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml new file mode 100644 index 00000000000..d883da72a92 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml @@ -0,0 +1,48 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + submissionMode: 'InteractiveMode' + rayClusterSpec: + rayVersion: '2.39.0' + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.39.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "200m" + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.39.0 + lifecycle: + preStop: + exec: + command: [ "/bin/sh","-c","ray stop" ] + resources: + limits: + cpu: "1" + requests: + cpu: "200m" diff --git a/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml new file mode 100644 index 00000000000..7808ebad327 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml @@ -0,0 +1,57 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + # The current value is "InteractiveMode", meaning that it will wait for user to submit job and provide the job submission ID + submissionMode: 'InteractiveMode' + runtimeEnvYAML: | + pip: + - emoji==2.14.0 + - pyjokes==0.6.0 + env_vars: + test_env_var: "first_env_var" + another_env_var: "second_env_var" + + rayClusterSpec: + rayVersion: '2.39.0' # should match the Ray version in the image of the containers + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.39.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "200m" + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.39.0 + lifecycle: + preStop: + exec: + command: [ "/bin/sh","-c","ray stop" ] + resources: + limits: + cpu: "1" + requests: + cpu: "200m" diff --git a/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py new file mode 100644 index 00000000000..18b6de6bae6 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py @@ -0,0 +1,19 @@ +import ray +import os +import emoji +import pyjokes + +ray.init() + +@ray.remote +def f(): + assert emoji.__version__ == "2.14.0" + assert pyjokes.__version__ == "0.6.0" + + first_env_var = os.getenv("test_env_var") + second_env_var = os.getenv("another_env_var") + + assert first_env_var == "first_env_var" + assert second_env_var == "second_env_var" + +ray.get(f.remote()) diff --git a/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml new file mode 100644 index 00000000000..651db18d969 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml @@ -0,0 +1,7 @@ +pip: + - emoji==2.14.0 + - pyjokes==0.6.0 +env_vars: + test_env_var: "first_env_var" + another_env_var: "second_env_var" +working_dir: ./testdata/rayjob-submit-working-dir/