Update a few examples (#621)

* Update a few examples * Ruff * Update download.py --------- Co-authored-by: Eric Zhang <[email protected]>
modal-labs · Mar 5, 2024 · 4d0562c · 4d0562c
1 parent 495b005
commit 4d0562c
Show file tree

Hide file tree

Showing 16 changed files with 59 additions and 37 deletions.
diff --git a/06_gpu_and_ml/diffusers/train_and_serve_diffusers_script.py b/06_gpu_and_ml/diffusers/train_and_serve_diffusers_script.py
@@ -180,12 +180,16 @@
 stub = Stub(name="example-diffusers-app")
 
 MODEL_DIR = Path("/model")
-stub.training_data_volume = Volume.persisted("diffusers-training-data-volume")
-stub.model_volume = Volume.persisted("diffusers-model-volume")
+training_data_volume = Volume.from_name(
+    "diffusers-training-data-volume", create_if_missing=True
+)
+model_volume = Volume.from_name(
+    "diffusers-model-volume", create_if_missing=True
+)
 
 VOLUME_CONFIG = {
-    "/training_data": stub.training_data_volume,
-    "/model": stub.model_volume,
+    "/training_data": training_data_volume,
+    "/model": model_volume,
 }
 
 # ## Set up config
@@ -337,7 +341,7 @@ def launch_training():
     # The trained model artefacts have been output to the volume mounted at `MODEL_DIR`.
     # To persist these artefacts for use in future inference function calls, we 'commit' the changes
     # to the volume.
-    stub.model_volume.commit()
+    model_volume.commit()
 
 
 @stub.local_entrypoint()
@@ -370,7 +374,7 @@ def load_model(self):
         from diffusers import DDIMScheduler, StableDiffusionPipeline
 
         # Reload the modal.Volume to ensure the latest state is accessible.
-        stub.model_volume.reload()
+        model_volume.reload()
 
         # set up a hugging face inference pipeline using our model
         ddim = DDIMScheduler.from_pretrained(MODEL_DIR, subfolder="scheduler")

diff --git a/06_gpu_and_ml/dreambooth/dreambooth_app.py b/06_gpu_and_ml/dreambooth/dreambooth_app.py
@@ -153,7 +153,9 @@ def download_models():
 # We'll use one to store the fine-tuned weights we create during training
 # and then load them back in for inference.
 
-volume = Volume.persisted("dreambooth-finetuning-volume")
+volume = Volume.from_name(
+    "dreambooth-finetuning-volume", create_if_missing=True
+)
 MODEL_DIR = "/model"
 
 

diff --git a/06_gpu_and_ml/embeddings/text_embeddings_inference.py b/06_gpu_and_ml/embeddings/text_embeddings_inference.py
@@ -58,7 +58,7 @@ def download_model():
     spawn_server().terminate()
 
 
-volume = Volume.persisted("tei-hn-data")
+volume = Volume.from_name("tei-hn-data", create_if_missing=True)
 
 stub = Stub("example-tei")
 

diff --git a/06_gpu_and_ml/embeddings/wikipedia/download.py b/06_gpu_and_ml/embeddings/wikipedia/download.py
@@ -7,7 +7,7 @@
 
 
 # We define our Modal Resources that we'll need
-volume = Volume.persisted("embedding-wikipedia")
+volume = Volume.from_name("embedding-wikipedia", create_if_missing=True)
 image = Image.debian_slim(python_version="3.9").pip_install(
     "datasets==2.16.1", "apache_beam==2.53.0"
 )

diff --git a/06_gpu_and_ml/embeddings/wikipedia/main.py b/06_gpu_and_ml/embeddings/wikipedia/main.py
@@ -19,8 +19,12 @@
 
 ## Dataset-Specific Configuration
 DATASET_NAME = "wikipedia"
-DATASET_READ_VOLUME = Volume.persisted("embedding-wikipedia")
-EMBEDDING_CHECKPOINT_VOLUME = Volume.persisted("checkpoint")
+DATASET_READ_VOLUME = Volume.from_name(
+    "embedding-wikipedia", create_if_missing=True
+)
+EMBEDDING_CHECKPOINT_VOLUME = Volume.from_name(
+    "checkpoint", create_if_missing=True
+)
 DATASET_DIR = "/data"
 CHECKPOINT_DIR = "/checkpoint"
 SAVE_TO_DISK = True

diff --git a/06_gpu_and_ml/flan_t5/flan_t5_finetune.py b/06_gpu_and_ml/flan_t5/flan_t5_finetune.py
@@ -38,7 +38,7 @@
 )
 
 stub = Stub(name="example-news-summarizer", image=image)
-output_vol = Volume.persisted("finetune-volume")
+output_vol = Volume.from_name("finetune-volume", create_if_missing=True)
 
 # ### Handling preemption
 #

diff --git a/06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py b/06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py
@@ -22,8 +22,9 @@
     )
 
 
-persistent_volume = modal.NetworkFileSystem.persisted(
-    app_config.persistent_vol_name
+persistent_volume = modal.Volume.from_name(
+    app_config.persistent_vol_name,
+    create_if_missing=True,
 )
 image = modal.Image.debian_slim().pip_install_from_requirements(
     "requirements.txt"
@@ -39,7 +40,7 @@
 
 @stub.function(
     gpu="A10G",
-    network_file_systems={app_config.model_dir: persistent_volume},
+    volumes={app_config.model_dir: persistent_volume},
     # 12hrs
     timeout=12 * 60 * 60,
     # For occasional connection error to 'cdn-lfs.huggingface.co'
@@ -447,6 +448,7 @@ def compute_metrics(pred):
         trainer.log_metrics("train", metrics)
         trainer.save_metrics("train", metrics)
         trainer.save_state()
+        persistent_volume.commit()
 
     logger.info("13. Running evaluation")
     results = {}  # type: ignore

diff --git a/06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py b/06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py
@@ -23,7 +23,9 @@
 from . import config, podcast, search
 
 logger = config.get_logger(__name__)
-volume = NetworkFileSystem.persisted("dataset-cache-vol")
+volume = NetworkFileSystem.from_name(
+    "dataset-cache-vol", create_if_missing=True
+)
 
 app_image = (
     Image.debian_slim(python_version="3.10")

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/app.py b/06_gpu_and_ml/spam-detect/spam_detect/app.py
@@ -20,4 +20,6 @@
 
 stub = modal.Stub(name="example-spam-detect-llm", image=image)
 # Used to store datasets, trained models, model metadata, config.
-volume = modal.Volume.persisted("example-spam-detect-vol")
+volume = modal.Volume.from_name(
+    "example-spam-detect-vol", create_if_missing=True
+)
diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/config.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/config.py
@@ -1,7 +1,7 @@
 import pathlib
 import time
 
-from modal import Image, NetworkFileSystem, Stub
+from modal import Image, Stub, Volume
 
 CACHE_DIR = "/cache"
 MODEL_CACHE = pathlib.Path("/models")
@@ -189,7 +189,7 @@ def null_safety(images, **kwargs):
     return pipe
 
 
-volume = NetworkFileSystem.persisted("txt-to-pokemon-cache-vol")
+volume = Volume.from_name("txt-to-pokemon-cache-vol", create_if_missing=True)
 image = (
     Image.debian_slim()
     .pip_install(

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/main.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/main.py
@@ -61,9 +61,7 @@ def image_to_byte_array(image) -> bytes:
         return buf.getvalue()
 
 
-@stub.cls(
-    gpu="A10G", network_file_systems={config.CACHE_DIR: volume}, keep_warm=1
-)
+@stub.cls(gpu="A10G", volumes={config.CACHE_DIR: volume}, keep_warm=1)
 class Model:
     @enter()
     def load_model(self):
@@ -90,7 +88,7 @@ def normalize_prompt(p: str) -> str:
     return re.sub("[^a-z0-9- ]", "", p.lower())
 
 
-@stub.function(network_file_systems={config.CACHE_DIR: volume})
+@stub.function(volumes={config.CACHE_DIR: volume})
 def diskcached_text_to_pokemon(prompt: str) -> list[bytes]:
     start_time = time.monotonic()
     cached = False
@@ -121,6 +119,7 @@ def diskcached_text_to_pokemon(prompt: str) -> list[bytes]:
             with open(dest_path, "wb") as f:
                 f.write(image_bytes)
             print(f"✔️ Saved a Pokémon sample to {dest_path}.")
+        volume.commit()
     total_duration_secs = timedelta(
         seconds=time.monotonic() - start_time
     ).total_seconds()
@@ -152,7 +151,7 @@ def fastapi_app():
 
 @stub.function(
     image=inpaint.cv_image,
-    network_file_systems={config.CACHE_DIR: volume},
+    volumes={config.CACHE_DIR: volume},
     interactive=False,
 )
 def inpaint_new_pokemon_name(card_image: bytes, prompt: str) -> bytes:
@@ -249,7 +248,7 @@ def color_dist(
     return delta_e
 
 
-@stub.function(network_file_systems={config.CACHE_DIR: volume})
+@stub.function(volumes={config.CACHE_DIR: volume})
 def create_composite_card(i: int, sample: bytes, prompt: str) -> bytes:
     """
     Takes a single Pokémon sample and creates a Pokémon card image for it.
@@ -276,7 +275,7 @@ def create_composite_card(i: int, sample: bytes, prompt: str) -> bytes:
     )
 
 
-@stub.function(network_file_systems={config.CACHE_DIR: volume})
+@stub.function(volumes={config.CACHE_DIR: volume})
 def create_pokemon_cards(prompt: str) -> list[dict]:
     norm_prompt = normalize_prompt(prompt)
     print(f"Creating for prompt '{norm_prompt}'")

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
@@ -22,7 +22,7 @@
 )
 
 
-@stub.function(network_file_systems={config.CACHE_DIR: volume})
+@stub.function(volumes={config.CACHE_DIR: volume})
 def reset_diskcache(dry_run=True) -> None:
     """
     Delete all Pokémon character samples and cards from disk cache.
@@ -68,6 +68,8 @@ def reset_diskcache(dry_run=True) -> None:
             for d in dirs:
                 d.rmdir()
 
+    volume.commit()
+
 
 @stub.function()
 def extract_colors(num=3) -> None:
@@ -96,7 +98,7 @@ def extract_colors(num=3) -> None:
 
 @stub.function(
     image=rnn_image,
-    network_file_systems={config.CACHE_DIR: volume},
+    volumes={config.CACHE_DIR: volume},
     timeout=15 * 60,
 )
 def generate_pokemon_names():

diff --git a/06_gpu_and_ml/vision_model_training.py b/06_gpu_and_ml/vision_model_training.py
@@ -30,9 +30,9 @@
 from modal import (
     Image,
     Mount,
-    NetworkFileSystem,
     Secret,
     Stub,
+    Volume,
     asgi_app,
     enter,
     method,
@@ -51,10 +51,10 @@
     "wandb~=0.13.4",
 )
 
-# A persisted network file system will store trained model artefacts across Modal app runs.
+# A persisted volume will store trained model artefacts across Modal app runs.
 # This is crucial as training runs are separate from the Gradio.app we run as a webhook.
 
-volume = NetworkFileSystem.persisted("cifar10-training-vol")
+volume = Volume.from_name("cifar10-training-vol", create_if_missing=True)
 
 FASTAI_HOME = "/fastai_home"
 MODEL_CACHE = pathlib.Path(FASTAI_HOME, "models")
@@ -135,7 +135,7 @@ def download_dataset():
 @stub.function(
     image=image,
     gpu=USE_GPU,
-    network_file_systems={str(MODEL_CACHE): volume},
+    volumes={str(MODEL_CACHE): volume},
     secrets=[Secret.from_name("my-wandb-secret")],
     timeout=2700,  # 45 minutes
 )
@@ -207,6 +207,7 @@ def train():
         WandbCallback
     )  # Added W&B callback is not compatible with inference.
     learn.export(MODEL_EXPORT_PATH)
+    volume.commit()
 
 
 # ## Trained model plumbing
@@ -220,7 +221,7 @@ def train():
 
 @stub.cls(
     image=image,
-    network_file_systems={str(MODEL_CACHE): volume},
+    volumes={str(MODEL_CACHE): volume},
 )
 class ClassifierModel:
     @enter()
@@ -291,7 +292,7 @@ def create_demo_examples() -> List[str]:
 
 @stub.function(
     image=image,
-    network_file_systems={str(MODEL_CACHE): volume},
+    volumes={str(MODEL_CACHE): volume},
     mounts=[Mount.from_local_dir(assets_path, remote_path="/assets")],
 )
 @asgi_app()

diff --git a/10_integrations/covid_datasette.py b/10_integrations/covid_datasette.py
@@ -42,7 +42,9 @@
 # To separate database creation and maintenance from serving, we'll need the underlying
 # database file to be stored persistently. To achieve this we use a [`Volume`](/docs/guide/volumes).
 
-volume = Volume.persisted("example-covid-datasette-cache-vol")
+volume = Volume.from_name(
+    "example-covid-datasette-cache-vol", create_if_missing=True
+)
 
 VOLUME_DIR = "/cache-vol"
 REPORTS_DIR = pathlib.Path(VOLUME_DIR, "COVID-19")

diff --git a/10_integrations/dbt/dbt_duckdb.py b/10_integrations/dbt/dbt_duckdb.py
@@ -66,7 +66,9 @@
     local_path=LOCAL_DBT_PROJECT / "profiles.yml",
     remote_path=Path(PROFILES_PATH, "profiles.yml"),
 )
-dbt_target = modal.NetworkFileSystem.persisted("dbt-target")
+dbt_target = modal.NetworkFileSystem.from_name(
+    "dbt-target", create_if_missing=True
+)
 # Create this secret using the "AWS" template at https://modal.com/secrets/create.
 # Be sure that the AWS user you provide credentials for has permission to
 # create S3 buckets and read/write data from them.

diff --git a/10_integrations/kedro_modal/kedro_modal/modal_functions.py b/10_integrations/kedro_modal/kedro_modal/modal_functions.py
@@ -60,7 +60,7 @@ def main_stub(project_path, project_name, package_name) -> Stub:
         mounts=[kedro_proj_mount] + package_mounts,
     )
     volume_name = f"kedro.{project_name}.storage"
-    data_volume = NetworkFileSystem.persisted(volume_name)
+    data_volume = NetworkFileSystem.from_name(volume_name, create_if_true=True)
 
     stub.function(network_file_systems={"/kedro-storage": data_volume})(
         run_kedro