From b5750f677e3b857e44502e6d91a3a4a5de245a9a Mon Sep 17 00:00:00 2001 From: Josh Schneier Date: Fri, 10 May 2024 11:18:57 -0400 Subject: [PATCH] [s3] Fix newline handling for text-mode files --- storages/backends/s3.py | 19 ++++++++++--- tests/test_s3.py | 60 ++++++++++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/storages/backends/s3.py b/storages/backends/s3.py index d78e2303..9822f2ef 100644 --- a/storages/backends/s3.py +++ b/storages/backends/s3.py @@ -1,3 +1,4 @@ +import io import mimetypes import os import posixpath @@ -123,7 +124,6 @@ def __init__(self, name, mode, storage, buffer_size=None): self._storage = storage self.name = name[len(self._storage.location) :].lstrip("/") self._mode = mode - self._force_mode = (lambda b: b) if "b" in mode else (lambda b: b.decode()) self.obj = storage.bucket.Object(name) if "w" not in mode: # Force early RAII-style exception if object does not exist @@ -184,6 +184,19 @@ def _get_file(self): self._file.seek(0) if self._storage.gzip and self.obj.content_encoding == "gzip": self._file = self._decompress_file(mode=self._mode, file=self._file) + elif "b" not in self._mode: + if hasattr(self._file, "readable"): + # For versions > Python 3.10 compatibility + # See SpooledTemporaryFile changes in 3.11 (https://docs.python.org/3/library/tempfile.html) # noqa: E501 + # Now fully implements the io.BufferedIOBase and io.TextIOBase abstract base classes allowing the file # noqa: E501 + # to be readable in the mode that it was specified (without accessing the underlying _file object). # noqa: E501 + # In this case, we need to wrap the file in a TextIOWrapper to ensure that the file is read as a text file. # noqa: E501 + self._file = io.TextIOWrapper(self._file, encoding="utf-8") + else: + # For versions <= Python 3.10 compatibility + self._file = io.TextIOWrapper( + self._file._file, encoding="utf-8" + ) self._closed = False return self._file @@ -195,12 +208,12 @@ def _set_file(self, value): def read(self, *args, **kwargs): if "r" not in self._mode: raise AttributeError("File was not opened in read mode.") - return self._force_mode(super().read(*args, **kwargs)) + return super().read(*args, **kwargs) def readline(self, *args, **kwargs): if "r" not in self._mode: raise AttributeError("File was not opened in read mode.") - return self._force_mode(super().readline(*args, **kwargs)) + return super().readline(*args, **kwargs) def readlines(self): return list(self) diff --git a/tests/test_s3.py b/tests/test_s3.py index a903ccc7..12c2d009 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -317,23 +317,6 @@ def test_storage_open_read_string(self): content_str = file.read() self.assertEqual(content_str, "") - def test_storage_open_readlines(self): - """ - Test readlines with file opened in "r" and "rb" modes - """ - name = "test_open_readlines_string.txt" - with io.BytesIO() as temp_file: - temp_file.write(b"line1\nline2") - with self.storage.open(name, "r") as f1: - f1._file = temp_file - content_lines = f1.readlines() - self.assertEqual(content_lines, ["line1\n", "line2"]) - temp_file.seek(0) - with self.storage.open(name, "rb") as f2: - f2._file = temp_file - content_lines = f2.readlines() - self.assertEqual(content_lines, [b"line1\n", b"line2"]) - def test_storage_open_write(self): """ Test opening a file in write mode @@ -348,7 +331,7 @@ def test_storage_open_write(self): "ACL": "public-read", } - with self.storage.open(name, "w") as file: + with self.storage.open(name, "wb") as file: self.storage.bucket.Object.assert_called_with(name) obj = self.storage.bucket.Object.return_value # Set the name of the mock object @@ -398,7 +381,7 @@ def test_storage_open_no_write(self): "StorageClass": "REDUCED_REDUNDANCY", } - with self.storage.open(name, "w"): + with self.storage.open(name, "wb"): self.storage.bucket.Object.assert_called_with(name) obj = self.storage.bucket.Object.return_value obj.load.side_effect = ClientError( @@ -429,7 +412,7 @@ def test_storage_open_no_overwrite_existing(self): "StorageClass": "REDUCED_REDUNDANCY", } - with self.storage.open(name, "w"): + with self.storage.open(name, "wb"): self.storage.bucket.Object.assert_called_with(name) obj = self.storage.bucket.Object.return_value @@ -451,7 +434,7 @@ def test_storage_write_beyond_buffer_size(self): "StorageClass": "REDUCED_REDUNDANCY", } - with self.storage.open(name, "w") as file: + with self.storage.open(name, "wb") as file: self.storage.bucket.Object.assert_called_with(name) obj = self.storage.bucket.Object.return_value # Set the name of the mock object @@ -1167,6 +1150,41 @@ def test_content_type_not_detectable(self): s3.S3Storage.default_content_type, ) + def test_storage_open_reading_with_newlines(self): + """Test file reading with "r" and "rb" and various newline characters.""" + name = "test_storage_open_read_with_newlines.txt" + with io.BytesIO() as temp_file: + temp_file.write(b"line1\nline2\r\nmore\rtext\n") + self.storage.save(name, temp_file) + file = self.storage.open(name, "r") + content_str = file.read() + file.close() + self.assertEqual(content_str, "line1\nline2\nmore\ntext\n") + + with io.BytesIO() as temp_file: + temp_file.write(b"line1\nline2\r\nmore\rtext\n") + self.storage.save(name, temp_file) + file = self.storage.open(name, "rb") + content_str = file.read() + file.close() + self.assertEqual(content_str, b"line1\nline2\r\nmore\rtext\n") + + with io.BytesIO() as temp_file: + temp_file.write(b"line1\nline2\r\nmore\rtext") + self.storage.save(name, temp_file) + file = self.storage.open(name, "r") + content_lines = file.readlines() + file.close() + self.assertEqual(content_lines, ["line1\n", "line2\n", "more\n", "text"]) + + with io.BytesIO() as temp_file: + temp_file.write(b"line1\nline2\r\nmore\rtext") + self.storage.save(name, temp_file) + file = self.storage.open(name, "rb") + content_lines = file.readlines() + file.close() + self.assertEqual(content_lines, [b"line1\n", b"line2\r\n", b"more\r", b"text"]) + class TestBackwardsNames(TestCase): def test_importing(self):