Skip to content

Commit

Permalink
Switch to invoking SkyPilot using dagster_shell and STREAMing logs.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjkanji committed Feb 28, 2024
1 parent b4877cb commit 15c07d0
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 52 deletions.
58 changes: 6 additions & 52 deletions dagster_skypilot/assets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
from pathlib import Path

import sky
from dagster import AssetExecutionContext, asset
from dagster_shell import execute_shell_command

from dagster_skypilot.consts import DEPLOYMENT_TYPE

Expand Down Expand Up @@ -37,55 +37,9 @@ def populate_keyfiles():

@asset(group_name="ai")
def skypilot_model(context: AssetExecutionContext) -> None:
# SkyPilot doesn't support reading credentials from environment variables.
# So, we need to populate the required keyfiles.
populate_keyfiles()

# The setup command.
setup = r"""
set -e # Exit if any command failed.
git clone https://github.com/huggingface/transformers/ || true
cd transformers
pip install .
cd examples/pytorch/text-classification
pip install -r requirements.txt
"""

# The command to run. Will be run under the working directory.
run = r"""
set -e # Exit if any command failed.
cd transformers/examples/pytorch/text-classification
python run_glue.py \
--model_name_or_path bert-base-cased \
--dataset_name imdb \
--do_train \
--max_seq_length 128 \
--per_device_train_batch_size 32 \
--learning_rate 2e-5 \
--max_steps 50 \
--output_dir /tmp/imdb/ --overwrite_output_dir \
--fp16
"""

# Mount an external bucket
storage_mounts = {
"/dagster-skypilot-bucket": sky.Storage(
source="s3://dagster-skypilot-bucket", mode=sky.StorageMode.MOUNT
)
}

task = sky.Task(
"huggingface",
workdir=".",
setup=setup,
run=run,
execute_shell_command(
"sky launch -c dnn dnn.yaml --yes -i 5 --down",
output_logging="STREAM",
log=context.log,
cwd=str(Path(__file__).parent.parent),
)

task.set_resources(
sky.Resources(sky.Lambda(), accelerators={"A10": 1})
).set_storage_mounts(storage_mounts)

# sky.launch(task, dryrun=True)
sky.launch(task, cluster_name="dnn", idle_minutes_to_autostop=5, down=True) # type: ignore

return None
51 changes: 51 additions & 0 deletions dnn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# dnn.yaml

name: huggingface

resources:
cloud: lambda
accelerators: A10:1

# Optional: upload a working directory to remote ~/sky_workdir.
# Commands in "setup" and "run" will be executed under it.
#
# workdir: .

# Optional: upload local files.
# Format:
# /remote/path: /local/path
#
file_mounts:
/dagster-skypilot-bucket:
source: s3://dagster-skypilot-bucket
mode: MOUNT

# setup: |
# set -e # Exit if any command failed.
# git clone https://github.com/huggingface/transformers/ || true
# cd transformers
# pip install .
# cd examples/pytorch/text-classification
# pip install -r requirements.txt

# run: |
# set -e # Exit if any command failed.
# cd transformers/examples/pytorch/text-classification
# python run_glue.py \
# --model_name_or_path bert-base-cased \
# --dataset_name imdb \
# --do_train \
# --max_seq_length 128 \
# --per_device_train_batch_size 32 \
# --learning_rate 2e-5 \
# --max_steps 50 \
# --output_dir /tmp/imdb/ --overwrite_output_dir \
# --fp16

setup: |
set -e
echo "Running setup command."
run: |
set -e
echo "Running run command."
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"dagster>=1.6.0,<1.7.0",
"dagster-cloud",
"skypilot[aws,azure,gcp]",
"dagster-shell",
],
extras_require={"dev": ["dagster-webserver", "pytest"]},
)

0 comments on commit 15c07d0

Please sign in to comment.