From 959865f82493b14a386842ab293e9b62c1a57953 Mon Sep 17 00:00:00 2001 From: Elias Freider Date: Thu, 7 Nov 2024 14:54:42 +0100 Subject: [PATCH 1/4] wip --- modal/image.py | 9 ++++++++- test/image_test.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/modal/image.py b/modal/image.py index b5d5edcaf..1533be698 100644 --- a/modal/image.py +++ b/modal/image.py @@ -606,6 +606,13 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: context_mount=mount, ) + def add_local_file( + self, local_path: Union[str, Path], remote_path: Union[str, PurePosixPath] = "./", *, copy=False + ): + # TODO: handle relative remote_path respecting workdirs! + mount = _Mount.from_local_file(local_path, remote_path) + return self._add_mount_layer_or_copy(mount, copy=copy) + def copy_local_file(self, local_path: Union[str, Path], remote_path: Union[str, Path] = "./") -> "_Image": """Copy a file into the image as a part of building it. @@ -1634,7 +1641,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: dockerfile_function=build_dockerfile, ) - def workdir(self, path: str) -> "_Image": + def workdir(self, path: Union[str, PurePosixPath]) -> "_Image": """Set the working directory for subsequent image build steps and function execution. **Example** diff --git a/test/image_test.py b/test/image_test.py index 2403da17f..76ff58945 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -624,6 +624,23 @@ def tmp_path_with_content(tmp_path): return tmp_path +def test_image_add_local_file(servicer, client, tmp_path_with_content): + app = App() + + img = ( + Image.from_registry("unknown_image") + .add_local_file(tmp_path_with_content / "data.txt", "/some/place/nice.txt") # absolute + .add_local_file(tmp_path_with_content / "data.txt", "output.txt") # relative target + .add_local_file(tmp_path_with_content / "data.txt") # default target + ) + app.function(image=img)(dummy) + + with app.run(client=client): + assert len(img._mount_layers) == 3 + abs_path, rel_path, default_path = img._mount_layers + # the mounts all need to have absolute paths to be *mountable* as mount layers + + def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().copy_local_dir(tmp_path_with_content, remote_path="/dummy") From 18ca935928ee6f7920f9b705bde04cc8ae224dda Mon Sep 17 00:00:00 2001 From: Elias Freider Date: Thu, 14 Nov 2024 14:38:21 +0100 Subject: [PATCH 2/4] Adds tests and working .attach_local_file and .attach_local_dir --- modal/image.py | 54 ++++++++++++++++++++++++++++++++++--- modal/mount.py | 4 ++- test/image_test.py | 67 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 113 insertions(+), 12 deletions(-) diff --git a/modal/image.py b/modal/image.py index 1533be698..5b509078f 100644 --- a/modal/image.py +++ b/modal/image.py @@ -606,19 +606,65 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: context_mount=mount, ) - def add_local_file( - self, local_path: Union[str, Path], remote_path: Union[str, PurePosixPath] = "./", *, copy=False - ): - # TODO: handle relative remote_path respecting workdirs! + def attach_local_file(self, local_path: Union[str, Path], remote_path: str, *, copy: bool = False) -> "_Image": + """Attach a local file to the image + + By default, the file is attached as a lightweight mount on top of any + container that runs the returned Image. This makes this operation quick + and will not invalidate + rebuild the image when contents of the mount + change. + + In order to run use the file in *subsequent* build steps, you explicitly + need to set `copy=True` on this function to instead copy the data into + the image itself. + + In copy=True mode, this command works similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) + works in a `Dockerfile`. + """ + if not os.path.isabs(remote_path): + # TODO(elias): implement relative to absolute resolution using image workdir metadata + # + make default remote_path="./" + raise InvalidError("image.attach_local_file() currently only supports absolute remote_path values") + mount = _Mount.from_local_file(local_path, remote_path) return self._add_mount_layer_or_copy(mount, copy=copy) + def attach_local_dir( + self, local_path: Union[str, Path], remote_path: Union[str, Path], *, copy: bool = False + ) -> "_Image": + """Attach a local dir to the image + + By default, the dir is attached as a lightweight mount on top of any + container that runs the returned Image. This makes this operation quick + and will not invalidate + rebuild the image when contents of the mount + change. + + In order to run use the dir in *subsequent* build steps, you explicitly + need to set `copy=True` on this function to instead copy the data into + the image itself. + + In copy=True mode, this command works similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) + works in a `Dockerfile`. + """ + # TODO(elias): distinguish adding *into* remote_path and *as* remote_path, similar + # to shell `cp -r source dest` vs `cp -r source/ dest` + # NOTE: docker COPY always copies the *contents* of the source directory, and you have + # to specify the destination dir name explicitly + remote_path = PurePosixPath(remote_path) + if not remote_path.is_absolute(): + # TODO(elias): implement relative to absolute resolution using image workdir metadata + # + make default remote_path="./" + raise InvalidError("image.attach_local_dir() currently only supports absolute remote_path values") + mount = _Mount.from_local_dir(local_path, remote_path=remote_path) + return self._add_mount_layer_or_copy(mount, copy=copy) + def copy_local_file(self, local_path: Union[str, Path], remote_path: Union[str, Path] = "./") -> "_Image": """Copy a file into the image as a part of building it. This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) works in a `Dockerfile`. """ + # TODO(elias): add pending deprecation - use attach basename = str(Path(local_path).name) mount = _Mount.from_local_file(local_path, remote_path=f"/{basename}") diff --git a/modal/mount.py b/modal/mount.py index 4c62b99b6..305a5b2aa 100644 --- a/modal/mount.py +++ b/modal/mount.py @@ -378,7 +378,9 @@ def from_local_dir( ) def add_local_file( - self, local_path: Union[str, Path], remote_path: Union[str, PurePosixPath, None] = None + self, + local_path: Union[str, Path], + remote_path: Union[str, PurePosixPath, None] = None, ) -> "_Mount": """ Add a local file to the `Mount` object. diff --git a/test/image_test.py b/test/image_test.py index 76ff58945..279e52e5a 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -624,21 +624,74 @@ def tmp_path_with_content(tmp_path): return tmp_path -def test_image_add_local_file(servicer, client, tmp_path_with_content): +@pytest.mark.parametrize(["copy"], [(True,), (False,)]) +@pytest.mark.parametrize( + ["remote_path", "expected_dest"], + [ + ("/place/nice.txt", "/place/nice.txt"), + # Not supported yet, but soon: + # ("/place/", "/place/data.txt"), # use original basename if destination has a trailing slash + # ("output.txt", "/proj/output.txt") # workdir relative target + # (None, "/proj/data.txt") # default target - basename in current directory + ], +) +def test_image_attach_local_file(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): + app = App() + + img = ( + Image.from_registry("unknown_image") + .workdir("/proj") + .attach_local_file(tmp_path_with_content / "data.txt", remote_path, copy=copy) + ) + app.function(image=img)(dummy) + + with app.run(client=client): + if copy: + # check that dockerfile commands include COPY . . + layers = get_image_layers(img.object_id, servicer) + assert layers[0].dockerfile_commands == ["FROM base", "COPY . /"] + mount_id = layers[0].context_mount_id + # and then get the relevant context mount to check + if not copy: + assert len(img._mount_layers) == 1 + mount_id = img._mount_layers[0].object_id + + assert set(servicer.mount_contents[mount_id].keys()) == {expected_dest} + + +@pytest.mark.parametrize(["copy"], [(True,), (False,)]) +@pytest.mark.parametrize( + ["remote_path", "expected_dest"], + [ + ("/place", "/place/sub"), # similar to docker copy - not like shell cp which would copy the source dir itself + ("/place/", "/place/sub"), # trailing slash on source makes no difference + # Not supported yet, but soon: + # ("place", "/proj/place/sub") # workdir relative target + # (None, "/proj/sub") # default target - copy into current directory + ], +) +def test_image_attach_local_dir(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): app = App() img = ( Image.from_registry("unknown_image") - .add_local_file(tmp_path_with_content / "data.txt", "/some/place/nice.txt") # absolute - .add_local_file(tmp_path_with_content / "data.txt", "output.txt") # relative target - .add_local_file(tmp_path_with_content / "data.txt") # default target + .workdir("/proj") + .attach_local_dir(tmp_path_with_content / "data", remote_path, copy=copy) ) app.function(image=img)(dummy) with app.run(client=client): - assert len(img._mount_layers) == 3 - abs_path, rel_path, default_path = img._mount_layers - # the mounts all need to have absolute paths to be *mountable* as mount layers + if copy: + # check that dockerfile commands include COPY . . + layers = get_image_layers(img.object_id, servicer) + assert layers[0].dockerfile_commands == ["FROM base", "COPY . /"] + mount_id = layers[0].context_mount_id + # and then get the relevant context mount to check + if not copy: + assert len(img._mount_layers) == 1 + mount_id = img._mount_layers[0].object_id + + assert set(servicer.mount_contents[mount_id].keys()) == {expected_dest} def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_content): From a4e7016eaa5ae3e76b9770eb40ec300811d98512 Mon Sep 17 00:00:00 2001 From: Elias Freider Date: Fri, 15 Nov 2024 13:39:20 +0100 Subject: [PATCH 3/4] allow inferred base name when using trailling slash on destination --- modal/image.py | 3 +++ test/image_test.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/modal/image.py b/modal/image.py index 5b509078f..4f2bf70e2 100644 --- a/modal/image.py +++ b/modal/image.py @@ -626,6 +626,9 @@ def attach_local_file(self, local_path: Union[str, Path], remote_path: str, *, c # + make default remote_path="./" raise InvalidError("image.attach_local_file() currently only supports absolute remote_path values") + if remote_path.endswith("/"): + remote_path = remote_path + Path(local_path).name + mount = _Mount.from_local_file(local_path, remote_path) return self._add_mount_layer_or_copy(mount, copy=copy) diff --git a/test/image_test.py b/test/image_test.py index 279e52e5a..81f360e07 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -630,7 +630,7 @@ def tmp_path_with_content(tmp_path): [ ("/place/nice.txt", "/place/nice.txt"), # Not supported yet, but soon: - # ("/place/", "/place/data.txt"), # use original basename if destination has a trailing slash + ("/place/", "/place/data.txt"), # use original basename if destination has a trailing slash # ("output.txt", "/proj/output.txt") # workdir relative target # (None, "/proj/data.txt") # default target - basename in current directory ], From 11dd4469ec6ca4fc379e84b9400cdc9af2202f42 Mon Sep 17 00:00:00 2001 From: Elias Freider Date: Fri, 15 Nov 2024 13:46:54 +0100 Subject: [PATCH 4/4] Prep tests for relative path testing --- modal/image.py | 2 ++ test/image_test.py | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/modal/image.py b/modal/image.py index 4f2bf70e2..68f939de6 100644 --- a/modal/image.py +++ b/modal/image.py @@ -624,6 +624,8 @@ def attach_local_file(self, local_path: Union[str, Path], remote_path: str, *, c if not os.path.isabs(remote_path): # TODO(elias): implement relative to absolute resolution using image workdir metadata # + make default remote_path="./" + # This requires deferring the Mount creation until after "self" (the base image) has been resolved + # so we know the workdir of the operation. raise InvalidError("image.attach_local_file() currently only supports absolute remote_path values") if remote_path.endswith("/"): diff --git a/test/image_test.py b/test/image_test.py index 81f360e07..2e8f6eeeb 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -638,10 +638,15 @@ def tmp_path_with_content(tmp_path): def test_image_attach_local_file(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): app = App() + if remote_path is None: + remote_path_kwargs = {} + else: + remote_path_kwargs = {"remote_path": remote_path} + img = ( Image.from_registry("unknown_image") .workdir("/proj") - .attach_local_file(tmp_path_with_content / "data.txt", remote_path, copy=copy) + .attach_local_file(tmp_path_with_content / "data.txt", **remote_path_kwargs, copy=copy) ) app.function(image=img)(dummy) @@ -663,8 +668,9 @@ def test_image_attach_local_file(servicer, client, tmp_path_with_content, copy, @pytest.mark.parametrize( ["remote_path", "expected_dest"], [ - ("/place", "/place/sub"), # similar to docker copy - not like shell cp which would copy the source dir itself - ("/place/", "/place/sub"), # trailing slash on source makes no difference + ("/place/", "/place/sub"), # copy full dir + ("/place", "/place/sub"), # removing trailing slash on source makes no difference, unlike shell cp + # TODO: add support for relative paths: # Not supported yet, but soon: # ("place", "/proj/place/sub") # workdir relative target # (None, "/proj/sub") # default target - copy into current directory @@ -673,10 +679,15 @@ def test_image_attach_local_file(servicer, client, tmp_path_with_content, copy, def test_image_attach_local_dir(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): app = App() + if remote_path is None: + remote_path_kwargs = {} + else: + remote_path_kwargs = {"remote_path": remote_path} + img = ( Image.from_registry("unknown_image") .workdir("/proj") - .attach_local_dir(tmp_path_with_content / "data", remote_path, copy=copy) + .attach_local_dir(tmp_path_with_content / "data", **remote_path_kwargs, copy=copy) ) app.function(image=img)(dummy)