From 18bb294cb431862be9217e09facac347802f6798 Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Fri, 28 Jul 2023 22:12:56 +0000 Subject: [PATCH] update: stop using modal.NFS in OCR example --- 09_job_queues/doc_ocr_jobs.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/09_job_queues/doc_ocr_jobs.py b/09_job_queues/doc_ocr_jobs.py index dc3f4ca40..fd3559afd 100644 --- a/09_job_queues/doc_ocr_jobs.py +++ b/09_job_queues/doc_ocr_jobs.py @@ -33,28 +33,41 @@ # # `donut` downloads the weights for pre-trained models to a local directory, if those weights don't already exist. # To decrease start-up time, we want this download to happen just once, even across separate function invocations. -# To accomplish this, we use a [`NetworkFileSystem`](/docs/guide/shared-volumes), a writable volume that can be attached -# to Modal functions and persisted across function runs. +# To accomplish this, we use the [`Image.run_function`](docs/reference/modal.Image#run_function) method, which allows +# us to run some code at image build time to save the model weights into the image. -volume = modal.NetworkFileSystem.persisted("doc_ocr_model_vol") CACHE_PATH = "/root/model_cache" +MODEL_NAME = "naver-clova-ix/donut-base-finetuned-cord-v2" + + +def download_model_weights() -> None: + from huggingface_hub import snapshot_download + + snapshot_download(repo_id=MODEL_NAME, cache_dir=CACHE_PATH) + + +image = ( + modal.Image.debian_slim() + .pip_install( + "donut-python==1.0.7", + "huggingface-hub==0.16.4", + "transformers==4.21.3", + "timm==0.5.4", + ) + .run_function(download_model_weights) +) # ## Handler function # # Now let's define our handler function. Using the [@stub.function()](https://modal.com/docs/reference/modal.Stub#function) # decorator, we set up a Modal [Function](/docs/reference/modal.Function) that uses GPUs, -# has a [`NetworkFileSystem`](/docs/guide/shared-volumes) mount, runs on a [custom container image](/docs/guide/custom-container), +# runs on a [custom container image](/docs/guide/custom-container), # and automatically [retries](/docs/guide/retries#function-retries) failures up to 3 times. @stub.function( gpu="any", - image=modal.Image.debian_slim().pip_install( - "donut-python==1.0.7", - "transformers==4.21.3", - "timm==0.5.4", - ), - network_file_systems={CACHE_PATH: volume}, + image=image, retries=3, ) def parse_receipt(image: bytes): @@ -67,7 +80,7 @@ def parse_receipt(image: bytes): # Use donut fine-tuned on an OCR dataset. task_prompt = "" pretrained_model = DonutModel.from_pretrained( - "naver-clova-ix/donut-base-finetuned-cord-v2", + MODEL_NAME, cache_dir=CACHE_PATH, )