Skip to content

Commit

Permalink
Update a few examples (#621)
Browse files Browse the repository at this point in the history
* Update a few examples

* Ruff

* Update download.py

---------

Co-authored-by: Eric Zhang <[email protected]>
  • Loading branch information
erikbern and ekzhang authored Mar 5, 2024
1 parent 495b005 commit 4d0562c
Show file tree
Hide file tree
Showing 16 changed files with 59 additions and 37 deletions.
16 changes: 10 additions & 6 deletions 06_gpu_and_ml/diffusers/train_and_serve_diffusers_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,16 @@
stub = Stub(name="example-diffusers-app")

MODEL_DIR = Path("/model")
stub.training_data_volume = Volume.persisted("diffusers-training-data-volume")
stub.model_volume = Volume.persisted("diffusers-model-volume")
training_data_volume = Volume.from_name(
"diffusers-training-data-volume", create_if_missing=True
)
model_volume = Volume.from_name(
"diffusers-model-volume", create_if_missing=True
)

VOLUME_CONFIG = {
"/training_data": stub.training_data_volume,
"/model": stub.model_volume,
"/training_data": training_data_volume,
"/model": model_volume,
}

# ## Set up config
Expand Down Expand Up @@ -337,7 +341,7 @@ def launch_training():
# The trained model artefacts have been output to the volume mounted at `MODEL_DIR`.
# To persist these artefacts for use in future inference function calls, we 'commit' the changes
# to the volume.
stub.model_volume.commit()
model_volume.commit()


@stub.local_entrypoint()
Expand Down Expand Up @@ -370,7 +374,7 @@ def load_model(self):
from diffusers import DDIMScheduler, StableDiffusionPipeline

# Reload the modal.Volume to ensure the latest state is accessible.
stub.model_volume.reload()
model_volume.reload()

# set up a hugging face inference pipeline using our model
ddim = DDIMScheduler.from_pretrained(MODEL_DIR, subfolder="scheduler")
Expand Down
4 changes: 3 additions & 1 deletion 06_gpu_and_ml/dreambooth/dreambooth_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ def download_models():
# We'll use one to store the fine-tuned weights we create during training
# and then load them back in for inference.

volume = Volume.persisted("dreambooth-finetuning-volume")
volume = Volume.from_name(
"dreambooth-finetuning-volume", create_if_missing=True
)
MODEL_DIR = "/model"


Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/embeddings/text_embeddings_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def download_model():
spawn_server().terminate()


volume = Volume.persisted("tei-hn-data")
volume = Volume.from_name("tei-hn-data", create_if_missing=True)

stub = Stub("example-tei")

Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/embeddings/wikipedia/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


# We define our Modal Resources that we'll need
volume = Volume.persisted("embedding-wikipedia")
volume = Volume.from_name("embedding-wikipedia", create_if_missing=True)
image = Image.debian_slim(python_version="3.9").pip_install(
"datasets==2.16.1", "apache_beam==2.53.0"
)
Expand Down
8 changes: 6 additions & 2 deletions 06_gpu_and_ml/embeddings/wikipedia/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@

## Dataset-Specific Configuration
DATASET_NAME = "wikipedia"
DATASET_READ_VOLUME = Volume.persisted("embedding-wikipedia")
EMBEDDING_CHECKPOINT_VOLUME = Volume.persisted("checkpoint")
DATASET_READ_VOLUME = Volume.from_name(
"embedding-wikipedia", create_if_missing=True
)
EMBEDDING_CHECKPOINT_VOLUME = Volume.from_name(
"checkpoint", create_if_missing=True
)
DATASET_DIR = "/data"
CHECKPOINT_DIR = "/checkpoint"
SAVE_TO_DISK = True
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/flan_t5/flan_t5_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
)

stub = Stub(name="example-news-summarizer", image=image)
output_vol = Volume.persisted("finetune-volume")
output_vol = Volume.from_name("finetune-volume", create_if_missing=True)

# ### Handling preemption
#
Expand Down
8 changes: 5 additions & 3 deletions 06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
)


persistent_volume = modal.NetworkFileSystem.persisted(
app_config.persistent_vol_name
persistent_volume = modal.Volume.from_name(
app_config.persistent_vol_name,
create_if_missing=True,
)
image = modal.Image.debian_slim().pip_install_from_requirements(
"requirements.txt"
Expand All @@ -39,7 +40,7 @@

@stub.function(
gpu="A10G",
network_file_systems={app_config.model_dir: persistent_volume},
volumes={app_config.model_dir: persistent_volume},
# 12hrs
timeout=12 * 60 * 60,
# For occasional connection error to 'cdn-lfs.huggingface.co'
Expand Down Expand Up @@ -447,6 +448,7 @@ def compute_metrics(pred):
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
persistent_volume.commit()

logger.info("13. Running evaluation")
results = {} # type: ignore
Expand Down
4 changes: 3 additions & 1 deletion 06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
from . import config, podcast, search

logger = config.get_logger(__name__)
volume = NetworkFileSystem.persisted("dataset-cache-vol")
volume = NetworkFileSystem.from_name(
"dataset-cache-vol", create_if_missing=True
)

app_image = (
Image.debian_slim(python_version="3.10")
Expand Down
4 changes: 3 additions & 1 deletion 06_gpu_and_ml/spam-detect/spam_detect/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@

stub = modal.Stub(name="example-spam-detect-llm", image=image)
# Used to store datasets, trained models, model metadata, config.
volume = modal.Volume.persisted("example-spam-detect-vol")
volume = modal.Volume.from_name(
"example-spam-detect-vol", create_if_missing=True
)
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pathlib
import time

from modal import Image, NetworkFileSystem, Stub
from modal import Image, Stub, Volume

CACHE_DIR = "/cache"
MODEL_CACHE = pathlib.Path("/models")
Expand Down Expand Up @@ -189,7 +189,7 @@ def null_safety(images, **kwargs):
return pipe


volume = NetworkFileSystem.persisted("txt-to-pokemon-cache-vol")
volume = Volume.from_name("txt-to-pokemon-cache-vol", create_if_missing=True)
image = (
Image.debian_slim()
.pip_install(
Expand Down
13 changes: 6 additions & 7 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ def image_to_byte_array(image) -> bytes:
return buf.getvalue()


@stub.cls(
gpu="A10G", network_file_systems={config.CACHE_DIR: volume}, keep_warm=1
)
@stub.cls(gpu="A10G", volumes={config.CACHE_DIR: volume}, keep_warm=1)
class Model:
@enter()
def load_model(self):
Expand All @@ -90,7 +88,7 @@ def normalize_prompt(p: str) -> str:
return re.sub("[^a-z0-9- ]", "", p.lower())


@stub.function(network_file_systems={config.CACHE_DIR: volume})
@stub.function(volumes={config.CACHE_DIR: volume})
def diskcached_text_to_pokemon(prompt: str) -> list[bytes]:
start_time = time.monotonic()
cached = False
Expand Down Expand Up @@ -121,6 +119,7 @@ def diskcached_text_to_pokemon(prompt: str) -> list[bytes]:
with open(dest_path, "wb") as f:
f.write(image_bytes)
print(f"✔️ Saved a Pokémon sample to {dest_path}.")
volume.commit()
total_duration_secs = timedelta(
seconds=time.monotonic() - start_time
).total_seconds()
Expand Down Expand Up @@ -152,7 +151,7 @@ def fastapi_app():

@stub.function(
image=inpaint.cv_image,
network_file_systems={config.CACHE_DIR: volume},
volumes={config.CACHE_DIR: volume},
interactive=False,
)
def inpaint_new_pokemon_name(card_image: bytes, prompt: str) -> bytes:
Expand Down Expand Up @@ -249,7 +248,7 @@ def color_dist(
return delta_e


@stub.function(network_file_systems={config.CACHE_DIR: volume})
@stub.function(volumes={config.CACHE_DIR: volume})
def create_composite_card(i: int, sample: bytes, prompt: str) -> bytes:
"""
Takes a single Pokémon sample and creates a Pokémon card image for it.
Expand All @@ -276,7 +275,7 @@ def create_composite_card(i: int, sample: bytes, prompt: str) -> bytes:
)


@stub.function(network_file_systems={config.CACHE_DIR: volume})
@stub.function(volumes={config.CACHE_DIR: volume})
def create_pokemon_cards(prompt: str) -> list[dict]:
norm_prompt = normalize_prompt(prompt)
print(f"Creating for prompt '{norm_prompt}'")
Expand Down
6 changes: 4 additions & 2 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)


@stub.function(network_file_systems={config.CACHE_DIR: volume})
@stub.function(volumes={config.CACHE_DIR: volume})
def reset_diskcache(dry_run=True) -> None:
"""
Delete all Pokémon character samples and cards from disk cache.
Expand Down Expand Up @@ -68,6 +68,8 @@ def reset_diskcache(dry_run=True) -> None:
for d in dirs:
d.rmdir()

volume.commit()


@stub.function()
def extract_colors(num=3) -> None:
Expand Down Expand Up @@ -96,7 +98,7 @@ def extract_colors(num=3) -> None:

@stub.function(
image=rnn_image,
network_file_systems={config.CACHE_DIR: volume},
volumes={config.CACHE_DIR: volume},
timeout=15 * 60,
)
def generate_pokemon_names():
Expand Down
13 changes: 7 additions & 6 deletions 06_gpu_and_ml/vision_model_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
from modal import (
Image,
Mount,
NetworkFileSystem,
Secret,
Stub,
Volume,
asgi_app,
enter,
method,
Expand All @@ -51,10 +51,10 @@
"wandb~=0.13.4",
)

# A persisted network file system will store trained model artefacts across Modal app runs.
# A persisted volume will store trained model artefacts across Modal app runs.
# This is crucial as training runs are separate from the Gradio.app we run as a webhook.

volume = NetworkFileSystem.persisted("cifar10-training-vol")
volume = Volume.from_name("cifar10-training-vol", create_if_missing=True)

FASTAI_HOME = "/fastai_home"
MODEL_CACHE = pathlib.Path(FASTAI_HOME, "models")
Expand Down Expand Up @@ -135,7 +135,7 @@ def download_dataset():
@stub.function(
image=image,
gpu=USE_GPU,
network_file_systems={str(MODEL_CACHE): volume},
volumes={str(MODEL_CACHE): volume},
secrets=[Secret.from_name("my-wandb-secret")],
timeout=2700, # 45 minutes
)
Expand Down Expand Up @@ -207,6 +207,7 @@ def train():
WandbCallback
) # Added W&B callback is not compatible with inference.
learn.export(MODEL_EXPORT_PATH)
volume.commit()


# ## Trained model plumbing
Expand All @@ -220,7 +221,7 @@ def train():

@stub.cls(
image=image,
network_file_systems={str(MODEL_CACHE): volume},
volumes={str(MODEL_CACHE): volume},
)
class ClassifierModel:
@enter()
Expand Down Expand Up @@ -291,7 +292,7 @@ def create_demo_examples() -> List[str]:

@stub.function(
image=image,
network_file_systems={str(MODEL_CACHE): volume},
volumes={str(MODEL_CACHE): volume},
mounts=[Mount.from_local_dir(assets_path, remote_path="/assets")],
)
@asgi_app()
Expand Down
4 changes: 3 additions & 1 deletion 10_integrations/covid_datasette.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
# To separate database creation and maintenance from serving, we'll need the underlying
# database file to be stored persistently. To achieve this we use a [`Volume`](/docs/guide/volumes).

volume = Volume.persisted("example-covid-datasette-cache-vol")
volume = Volume.from_name(
"example-covid-datasette-cache-vol", create_if_missing=True
)

VOLUME_DIR = "/cache-vol"
REPORTS_DIR = pathlib.Path(VOLUME_DIR, "COVID-19")
Expand Down
4 changes: 3 additions & 1 deletion 10_integrations/dbt/dbt_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@
local_path=LOCAL_DBT_PROJECT / "profiles.yml",
remote_path=Path(PROFILES_PATH, "profiles.yml"),
)
dbt_target = modal.NetworkFileSystem.persisted("dbt-target")
dbt_target = modal.NetworkFileSystem.from_name(
"dbt-target", create_if_missing=True
)
# Create this secret using the "AWS" template at https://modal.com/secrets/create.
# Be sure that the AWS user you provide credentials for has permission to
# create S3 buckets and read/write data from them.
Expand Down
2 changes: 1 addition & 1 deletion 10_integrations/kedro_modal/kedro_modal/modal_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def main_stub(project_path, project_name, package_name) -> Stub:
mounts=[kedro_proj_mount] + package_mounts,
)
volume_name = f"kedro.{project_name}.storage"
data_volume = NetworkFileSystem.persisted(volume_name)
data_volume = NetworkFileSystem.from_name(volume_name, create_if_true=True)

stub.function(network_file_systems={"/kedro-storage": data_volume})(
run_kedro
Expand Down

0 comments on commit 4d0562c

Please sign in to comment.