diff --git a/03_scaling_out/youtube_face_detection.py b/03_scaling_out/youtube_face_detection.py index d1fc00433..cc2843c9f 100644 --- a/03_scaling_out/youtube_face_detection.py +++ b/03_scaling_out/youtube_face_detection.py @@ -59,7 +59,7 @@ # For temporary storage and sharing of downloaded movie clips, we use a network file system. -stub.sv = modal.NetworkFileSystem.new() +stub.net_file_system = modal.NetworkFileSystem.new() # ### Face detection function # @@ -73,7 +73,9 @@ # and stores the resulting video back to the shared storage. -@stub.function(network_file_systems={"/clips": stub.sv}, timeout=600) +@stub.function( + network_file_systems={"/clips": stub.net_file_system}, timeout=600 +) def detect_faces(fn, start, stop): # Extract the subclip from the video clip = moviepy.editor.VideoFileClip(fn).subclip(start, stop) @@ -106,7 +108,7 @@ def detect_faces(fn, start, stop): # 3. Stitch the results back into a new video -@stub.function(network_file_systems={"/clips": stub.sv}, retries=1) +@stub.function(network_file_systems={"/clips": stub.net_file_system}, retries=1) def process_video(url): print(f"Downloading video from '{url}'") yt = pytube.YouTube(url) diff --git a/06_gpu_and_ml/obj_detection_webcam/webcam.py b/06_gpu_and_ml/obj_detection_webcam/webcam.py index 22b07330e..df8a0e38f 100644 --- a/06_gpu_and_ml/obj_detection_webcam/webcam.py +++ b/06_gpu_and_ml/obj_detection_webcam/webcam.py @@ -38,27 +38,41 @@ from modal import ( Image, Mount, - Secret, - NetworkFileSystem, Stub, method, asgi_app, ) -# We mainly need to install [transformers](https://github.com/huggingface/transformers) + +# We need to install [transformers](https://github.com/huggingface/transformers) # which is a package Huggingface uses for all their models, but also # [Pillow](https://python-pillow.org/) which lets us work with images from Python, # and a system font for drawing. +# +# This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded +# one at image build time using the `download_model` function and saved into the image. +# 'Baking' models into the `modal.Image` at build time provided the fastest cold start. + +model_repo_id = "facebook/detr-resnet-50" + + +def download_model(): + from huggingface_hub import snapshot_download + + snapshot_download(repo_id=model_repo_id, cache_dir="/cache") + stub = Stub("example-webcam-object-detection") image = ( Image.debian_slim() .pip_install( + "huggingface-hub==0.16.4", "Pillow", "timm", "transformers", ) .apt_install("fonts-freefont-ttf") + .run_function(download_model) ) @@ -69,8 +83,8 @@ # * There's a container initialization step in the `__enter__` method, which # runs on every container start. This lets us load the model only once per # container, so that it's reused for subsequent function calls. -# * We store the model in a network file system. This lets us download the model only -# the first time the function is ever called. +# * Above we stored the model in the container image. This lets us download the model only +# when the image is (re)built, and not everytime the function is called. # * We're running it on multiple CPUs for extra performance # # Note that the function takes an image and returns a new image. @@ -82,21 +96,19 @@ @stub.cls( cpu=4, - network_file_systems={"/cache": NetworkFileSystem.new()}, image=image, - secret=Secret.from_dict( - {"TORCH_HOME": "/cache", "TRANSFORMERS_CACHE": "/cache"} - ), ) class ObjectDetection: def __enter__(self): - from transformers import DetrFeatureExtractor, DetrForObjectDetection + from transformers import DetrImageProcessor, DetrForObjectDetection - self.feature_extractor = DetrFeatureExtractor.from_pretrained( - "facebook/detr-resnet-50" + self.feature_extractor = DetrImageProcessor.from_pretrained( + model_repo_id, + cache_dir="/cache", ) self.model = DetrForObjectDetection.from_pretrained( - "facebook/detr-resnet-50" + model_repo_id, + cache_dir="/cache", ) @method() @@ -113,8 +125,12 @@ def detect(self, img_data_in): inputs = self.feature_extractor(image, return_tensors="pt") outputs = self.model(**inputs) img_size = torch.tensor([tuple(reversed(image.size))]) - processed_outputs = self.feature_extractor.post_process( - outputs, img_size + processed_outputs = ( + self.feature_extractor.post_process_object_detection( + outputs=outputs, + target_sizes=img_size, + threshold=0, + ) ) output_dict = processed_outputs[0] @@ -131,7 +147,7 @@ def detect(self, img_data_in): ) output_image = Image.new("RGBA", (image.width, image.height)) output_image_draw = ImageDraw.Draw(output_image) - for score, box, label in zip(scores, boxes, labels): + for _score, box, label in zip(scores, boxes, labels): color = colors[label % len(colors)] text = self.model.config.id2label[label] box = tuple(map(int, box)) diff --git a/10_integrations/stable_diffusion_slackbot.py b/10_integrations/stable_diffusion_slackbot.py index 591d5de2e..260c54376 100644 --- a/10_integrations/stable_diffusion_slackbot.py +++ b/10_integrations/stable_diffusion_slackbot.py @@ -16,7 +16,7 @@ import os from typing import Optional -from modal import Image, Secret, NetworkFileSystem, Stub, web_endpoint +from modal import Image, Secret, Stub, web_endpoint # All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for # the application. Let's give it a friendly name. @@ -40,57 +40,68 @@ # # ![create a huggingface token](./huggingface_token.png) # -# ### Model cache +# ### Model caching # # The `diffusers` library downloads the weights for a pre-trained model to a local # directory, if those weights don't already exist. To decrease start-up time, we want # this download to happen just once, even across separate function invocations. -# To accomplish this, we use a [`NetworkFileSystem`](/docs/guide/shared-volumes), a -# writable volume that can be attached to Modal functions and persisted across function runs. +# To accomplish this, we use simple function that will run at image build time and save the model into +# the image's filesystem. -volume = NetworkFileSystem.persisted("stable-diff-model-vol") +CACHE_PATH = "/root/model_cache" + + +def fetch_model(local_files_only: bool = False): + from diffusers import StableDiffusionPipeline + from torch import float16 + + return StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + use_auth_token=os.environ["HUGGINGFACE_TOKEN"], + variant="fp16", + torch_dtype=float16, + device_map="auto", + cache_dir=CACHE_PATH, # reads model saved in the modal.Image's filesystem. + local_files_only=local_files_only, + ) + + +image = ( + Image.debian_slim() + .run_commands( + "pip install torch --extra-index-url https://download.pytorch.org/whl/cu117" + ) + .pip_install( + "diffusers", + "huggingface-hub", + "safetensors", + "transformers", + "scipy", + "ftfy", + "accelerate", + ) + .run_function(fetch_model, secret=Secret.from_name("huggingface-secret")) +) # ### The actual function # -# Now that we have our token and `NetworkFileSystem` set up, we can put everything together. +# Now that we have our token and `modal.Image` set up, we can put everything together. # # Let's define a function that takes a text prompt and an optional channel name # (so we can post results to Slack if the value is set) and runs stable diffusion. # The `@stub.function()` decorator declares all the resources this function will -# use: we configure it to use a GPU, run on an image that has all the packages we -# need to run the model, mount the `NetworkFileSystem` to a path of our choice, and +# use: we configure it to use a GPU, run on an image that has all the packages and files we +# need to run the model, and # also provide it the secret that contains the token we created above. -# -# By setting the `cache_dir` argument for the model to the mount path of our -# `NetworkFileSystem`, we ensure that the model weights are downloaded only once. - -CACHE_PATH = "/root/model_cache" @stub.function( gpu="A10G", - image=( - Image.debian_slim() - .run_commands( - "pip install torch --extra-index-url https://download.pytorch.org/whl/cu117" - ) - .pip_install("diffusers", "transformers", "scipy", "ftfy", "accelerate") - ), - network_file_systems={CACHE_PATH: volume}, + image=image, secret=Secret.from_name("huggingface-secret"), ) async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None): - from diffusers import StableDiffusionPipeline - from torch import float16 - - pipe = StableDiffusionPipeline.from_pretrained( - "runwayml/stable-diffusion-v1-5", - use_auth_token=os.environ["HUGGINGFACE_TOKEN"], - revision="fp16", - torch_dtype=float16, - cache_dir=CACHE_PATH, - device_map="auto", - ) + pipe = fetch_model(local_files_only=True) image = pipe(prompt, num_inference_steps=100).images[0] diff --git a/misc/news_summarizer.py b/misc/news_summarizer.py index 7f001afcb..d5436b505 100644 --- a/misc/news_summarizer.py +++ b/misc/news_summarizer.py @@ -13,21 +13,41 @@ import modal +stub = modal.Stub(name="example-news-summarizer") + # ## Building Images and Downloading Pre-trained Model # # We start by defining our images. In Modal, each function can use a different # image. This is powerful because you add only the dependencies you need for # each function. -stub = modal.Stub(name="example-news-summarizer") -MODEL_NAME = "google/pegasus-xsum" -CACHE_DIR = "/cache" - # The first image contains dependencies for running our model. We also download the -# pre-trained model into the image using the `huggingface` API. This caches the model so that -# we don't have to download it on every function call. -stub["deep_learning_image"] = modal.Image.debian_slim().pip_install( - "transformers==4.16.2", "torch", "sentencepiece" +# pre-trained model into the image using the `from_pretrained` method. +# This caches the model so that we don't have to download it on every function call. +# The model will be saved at `/cache` when this function is called at image build time; +# subsequent calls of this function at runtime will then load the model from `/cache`. + + +def fetch_model(local_files_only: bool = False): + from transformers import PegasusForConditionalGeneration, PegasusTokenizer + + tokenizer = PegasusTokenizer.from_pretrained( + "google/pegasus-xsum", + cache_dir="/cache", + local_files_only=local_files_only, + ) + model = PegasusForConditionalGeneration.from_pretrained( + "google/pegasus-xsum", + cache_dir="/cache", + local_files_only=local_files_only, + ) + return model, tokenizer + + +stub["deep_learning_image"] = ( + modal.Image.debian_slim() + .pip_install("transformers==4.16.2", "torch", "sentencepiece") + .run_function(fetch_model) ) # Defining the scraping image is very similar. This image only contains the packages required @@ -36,19 +56,6 @@ "requests", "beautifulsoup4", "lxml" ) -volume = modal.NetworkFileSystem.persisted("pegasus-modal-vol") - -# We will also instantiate the model and tokenizer globally so it’s available for all functions that use this image. -if stub.is_inside(stub["deep_learning_image"]): - from transformers import PegasusForConditionalGeneration, PegasusTokenizer - - TOKENIZER = PegasusTokenizer.from_pretrained( - MODEL_NAME, cache_dir=CACHE_DIR - ) - MODEL = PegasusForConditionalGeneration.from_pretrained( - MODEL_NAME, cache_dir=CACHE_DIR - ) - if stub.is_inside(stub["scraping_image"]): import requests @@ -145,18 +152,21 @@ def scrape_nyc_article(url: str) -> str: @stub.function( image=stub["deep_learning_image"], gpu=False, - network_file_systems={CACHE_DIR: volume}, memory=4096, ) def summarize_article(text: str) -> str: print(f"Summarizing text with {len(text)} characters.") + # `local_files_only` is set to `True` because we expect to read the model + # files saved in the image. + model, tokenizer = fetch_model(local_files_only=True) + # summarize text - batch = TOKENIZER( + batch = tokenizer( [text], truncation=True, padding="longest", return_tensors="pt" ).to("cpu") - translated = MODEL.generate(**batch) - summary = TOKENIZER.batch_decode(translated, skip_special_tokens=True)[0] + translated = model.generate(**batch) + summary = tokenizer.batch_decode(translated, skip_special_tokens=True)[0] return summary