Skip to content

Commit

Permalink
Add API v2 compat support to ShimClient (#2156)
Browse files Browse the repository at this point in the history
From the caller's point of view, ShimClient interface stays the same,
but internally it's now able to work with both API v1 and v2, emulating
API v1 logic via API v2 methods if v2 is supported by shim


Part-of: #1780
Co-authored-by: Victor Skvortsov <[email protected]>
  • Loading branch information
un-def and r4victor authored Dec 30, 2024
1 parent dfb7b7d commit c93a4d1
Show file tree
Hide file tree
Showing 8 changed files with 793 additions and 35 deletions.
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ addopts =
--allow-hosts=127.0.0.1,localhost
; unix socket for Docker/testcontainers
--allow-unix-socket
markers =
shim_version
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pytest~=7.2
pytest-asyncio>=0.21
pytest-httpbin==2.1.0
pytest-socket>=0.7.0
requests-mock>=1.12.1
openai>=1.53.0,<2.0.0
freezegun>=1.2.0
ruff==0.5.3 # Should match .pre-commit-config.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def _process_provisioning_with_shim(
# the previous job container now, making the shim available (state=running->pending)
# for the next try.
logger.warning(
"%s: failed to sumbit, shim is already running a job, stopping it now, retry later",
"%s: failed to submit, shim is already running a job, stopping it now, retry later",
fmt(job_model),
)
shim_client.stop(force=True)
Expand Down
51 changes: 47 additions & 4 deletions src/dstack/_internal/server/schemas/runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from base64 import b64decode
from enum import Enum
from typing import Dict, List, Optional, Union

from pydantic import Field, validator
Expand Down Expand Up @@ -94,13 +95,55 @@ class ShimVolumeInfo(CoreModel):
device_name: Optional[str] = None


class TaskConfigBody(CoreModel):
class TaskStatus(str, Enum):
PENDING = "pending"
PREPARING = "preparing"
PULLING = "pulling"
CREATING = "creating"
RUNNING = "running"
TERMINATED = "terminated"


class TaskInfoResponse(CoreModel):
id: str
status: TaskStatus
termination_reason: str
termination_message: str


class TaskSubmitRequest(CoreModel):
id: str
name: str
registry_username: str
registry_password: str
image_name: str
container_user: str
privileged: bool
gpu: int
cpu: float
memory: int
shm_size: int
volumes: list[ShimVolumeInfo]
volume_mounts: list[VolumeMountPoint]
instance_mounts: list[InstanceMountPoint]
host_ssh_user: str
host_ssh_keys: list[str]
container_ssh_keys: list[str]


class TaskTerminateRequest(CoreModel):
termination_reason: str
termination_message: str
timeout: int


class LegacySubmitBody(CoreModel):
username: str
password: str
image_name: str
privileged: bool
container_name: str
container_user: Optional[str]
container_user: str
shm_size: int
public_keys: List[str]
ssh_user: str
Expand All @@ -110,7 +153,7 @@ class TaskConfigBody(CoreModel):
instance_mounts: List[InstanceMountPoint]


class StopBody(CoreModel):
class LegacyStopBody(CoreModel):
force: bool = False


Expand All @@ -119,6 +162,6 @@ class JobResult(CoreModel):
reason_message: str


class PullBody(CoreModel):
class LegacyPullResponse(CoreModel):
state: str
result: Optional[JobResult]
Loading

0 comments on commit c93a4d1

Please sign in to comment.