Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1398 remove default download zip extension #1400

Draft
wants to merge 11 commits into
base: develop
Choose a base branch
from
15 changes: 9 additions & 6 deletions eodag/plugins/download/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def _prepare_download(
or tempfile.gettempdir()
)
output_extension = kwargs.get("output_extension", None) or getattr(
self.config, "output_extension", ".zip"
self.config, "output_extension", ""
)

# Strong asumption made here: all products downloaded will be zip files
Expand Down Expand Up @@ -341,11 +341,14 @@ def _finalize(
)
output_extension = kwargs.pop("output_extension", ".zip")

product_path = (
fs_path[: fs_path.index(output_extension)]
if output_extension in fs_path
else fs_path
)
if output_extension:
product_path = (
fs_path[: fs_path.index(output_extension)]
if output_extension in fs_path
else fs_path
)
else:
product_path, _ = os.path.splitext(fs_path)
product_path_exists = os.path.exists(product_path)
if product_path_exists and os.path.isfile(product_path):
logger.info(
Expand Down
171 changes: 79 additions & 92 deletions eodag/plugins/download/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
from email.message import Message
from itertools import chain
from json import JSONDecodeError
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Iterator,
List,
Expand Down Expand Up @@ -588,7 +590,7 @@ def download(
output_extension = getattr(self.config, "products", {}).get(
product.product_type, {}
).get("output_extension", None) or getattr(
self.config, "output_extension", ".zip"
self.config, "output_extension", None
)
kwargs["output_extension"] = kwargs.get("output_extension", output_extension)

Expand All @@ -610,7 +612,7 @@ def download(
try:
fs_path = self._download_assets(
product,
fs_path.replace(".zip", ""),
fs_path,
record_filename,
auth,
progress_callback,
Expand All @@ -635,74 +637,54 @@ def download_request(
wait: int,
timeout: int,
**kwargs: Unpack[DownloadConf],
) -> None:
chunks = self._stream_download(product, auth, progress_callback, **kwargs)
) -> os.PathLike:
is_empty = True
result = self._stream_download(product, auth, progress_callback, **kwargs)
if result is not None and fs_path is not None:
filename, chunk_iterator = result

with open(fs_path, "wb") as fhandle:
for chunk in chunks:
is_empty = False
fhandle.write(chunk)
ext = Path(filename).suffix
path = Path(fs_path).with_suffix(ext)

if is_empty:
raise DownloadError(f"product {product.properties['id']} is empty")
with open(path, "wb") as fhandle:
chunks = chunk_iterator()
for chunk in chunks:
is_empty = False
fhandle.write(chunk)
self.stream.close() # Closing response stream

download_request(product, auth, progress_callback, wait, timeout, **kwargs)
if is_empty:
raise DownloadError(f"product {product.properties['id']} is empty")

return path
else:
raise DownloadError(
f"download of product {product.properties['id']} failed"
)

path = download_request(
product, auth, progress_callback, wait, timeout, **kwargs
)

with open(record_filename, "w") as fh:
fh.write(url)
logger.debug("Download recorded in %s", record_filename)

# Check that the downloaded file is really a zip file
if not zipfile.is_zipfile(fs_path) and output_extension == ".zip":
logger.warning(
"Downloaded product is not a Zip File. Please check its file type before using it"
)
new_fs_path = os.path.join(
os.path.dirname(fs_path),
sanitize(product.properties["title"]),
)
if os.path.isfile(fs_path) and not tarfile.is_tarfile(fs_path):
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
shutil.move(fs_path, new_fs_path)
file_path = os.path.join(new_fs_path, os.path.basename(fs_path))
new_file_path = file_path[: file_path.index(".zip")]
shutil.move(file_path, new_file_path)
# in the case where the outputs extension has not been set
# to ".tar" in the product type nor provider configuration
elif tarfile.is_tarfile(fs_path):
if not new_fs_path.endswith(".tar"):
new_fs_path += ".tar"
shutil.move(fs_path, new_fs_path)
kwargs["output_extension"] = ".tar"
product_path = self._finalize(
new_fs_path,
progress_callback=progress_callback,
**kwargs,
)
product.location = path_to_uri(product_path)
return product_path
else:
# not a file (dir with zip extension)
shutil.move(fs_path, new_fs_path)
product.location = path_to_uri(new_fs_path)
return new_fs_path

if os.path.isfile(fs_path) and not (
zipfile.is_zipfile(fs_path) or tarfile.is_tarfile(fs_path)
if os.path.isfile(path) and not (
zipfile.is_zipfile(path) or tarfile.is_tarfile(path)
):
new_fs_path = os.path.join(
os.path.dirname(fs_path),
os.path.dirname(path),
sanitize(product.properties["title"]),
)
if not os.path.isdir(new_fs_path):
os.makedirs(new_fs_path)
shutil.move(fs_path, new_fs_path)
shutil.move(path, new_fs_path)
product.location = path_to_uri(new_fs_path)
return new_fs_path

product_path = self._finalize(
fs_path,
str(path),
progress_callback=progress_callback,
**kwargs,
)
Expand Down Expand Up @@ -834,9 +816,13 @@ def _stream_download_dict(
else:
pass

chunks = self._stream_download(product, auth, progress_callback, **kwargs)
result = self._stream_download(product, auth, progress_callback, **kwargs)
if result is None:
raise DownloadError(f"download of {product.properties['id']} is empty")
filename, chunk_iterator = result
# start reading chunks to set product.headers
try:
chunks = chunk_iterator()
first_chunk = next(chunks)
except StopIteration:
# product is empty file
Expand Down Expand Up @@ -908,7 +894,7 @@ def _stream_download(
auth: Optional[AuthBase] = None,
progress_callback: Optional[ProgressCallback] = None,
**kwargs: Unpack[DownloadConf],
) -> Iterator[Any]:
) -> tuple[str, Callable[[], Any]] | None:
"""
fetches a zip file containing the assets of a given product as a stream
and returns a generator yielding the chunks of the file
Expand Down Expand Up @@ -969,7 +955,7 @@ def _stream_download(
auth = None

s = requests.Session()
with s.request(
self.stream = s.request(
req_method,
req_url,
stream=True,
Expand All @@ -979,49 +965,50 @@ def _stream_download(
timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT,
verify=ssl_verify,
**req_kwargs,
) as self.stream:
try:
self.stream.raise_for_status()
except requests.exceptions.Timeout as exc:
raise TimeOutError(
exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT
) from exc
except RequestException as e:
self._process_exception(e, product, ordered_message)
else:
# check if product was ordered

if getattr(
self.stream, "status_code", None
) is not None and self.stream.status_code == getattr(
self.config, "order_status", {}
).get(
"ordered", {}
).get(
"http_code"
):
product.properties["storageStatus"] = "ORDERED"
self._process_exception(None, product, ordered_message)
stream_size = self._check_stream_size(product) or None

product.headers = self.stream.headers
filename = self._check_product_filename(product) or None
product.headers[
"content-disposition"
] = f"attachment; filename={filename}"
content_type = product.headers.get("Content-Type")
guessed_content_type = (
guess_file_type(filename) if filename and not content_type else None
)
if guessed_content_type is not None:
product.headers["Content-Type"] = guessed_content_type
)
try:
self.stream.raise_for_status()
except requests.exceptions.Timeout as exc:
raise TimeOutError(exc, timeout=DEFAULT_STREAM_REQUESTS_TIMEOUT) from exc
except RequestException as e:
self._process_exception(e, product, ordered_message)
return None
else:
# check if product was ordered

if getattr(
self.stream, "status_code", None
) is not None and self.stream.status_code == getattr(
self.config, "order_status", {}
).get(
"ordered", {}
).get(
"http_code"
):
product.properties["storageStatus"] = "ORDERED"
self._process_exception(None, product, ordered_message)
stream_size = self._check_stream_size(product) or None

product.headers = self.stream.headers
filename = self._check_product_filename(product)
product.headers["content-disposition"] = f"attachment; filename={filename}"
content_type = product.headers.get("Content-Type")
guessed_content_type = (
guess_file_type(filename) if filename and not content_type else None
)
if guessed_content_type is not None:
product.headers["Content-Type"] = guessed_content_type

progress_callback.reset(total=stream_size)
progress_callback.reset(total=stream_size)

def iteration_wrapper():
for chunk in self.stream.iter_content(chunk_size=64 * 1024):
if chunk:
progress_callback(len(chunk))
yield chunk

return filename, iteration_wrapper

def _stream_download_assets(
self,
product: EOProduct,
Expand Down
2 changes: 1 addition & 1 deletion eodag/types/download_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class DownloadConf(TypedDict, total=False):
"""

output_dir: str
output_extension: str
output_extension: str | None
extract: bool
dl_url_params: Dict[str, str]
delete_archive: bool
Expand Down
17 changes: 13 additions & 4 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import unittest
from collections import OrderedDict, namedtuple
from io import StringIO
from pathlib import Path
from unittest import mock # PY3

from owslib.etree import etree
Expand Down Expand Up @@ -304,12 +305,12 @@ def _dummy_downloadable_product(
):
self._set_download_simulation()
self.tmp_download_dir = tempfile.TemporaryDirectory()
if output_dir is None:
output_dir = str(Path(self.tmp_download_dir.name).parent)
dl_config = config.PluginConfig.from_mapping(
{
"base_uri": "fake_base_uri" if base_uri is None else base_uri,
"output_dir": self.tmp_download_dir.name
if output_dir is None
else output_dir,
"output_dir": output_dir,
"extract": True if extract is None else extract,
"delete_archive": False if delete_archive is None else delete_archive,
}
Expand All @@ -321,6 +322,8 @@ def _dummy_downloadable_product(
return product

def _clean_product(self, product_path):
if os.path.exists(product_path):
shutil.rmtree(product_path)
self.tmp_download_dir.cleanup()

def _set_download_simulation(self):
Expand All @@ -335,7 +338,10 @@ def __init__(response):
with open(self.local_product_as_archive_path, "rb") as fh:
response.__zip_buffer = io.BytesIO(fh.read())
cl = response.__zip_buffer.getbuffer().nbytes
response.headers = {"content-length": cl}
response.headers = {
"content-length": cl,
"content-disposition": "attachment; filename=foobar.zip",
}
response.url = "http://foo.bar"

def __enter__(response):
Expand All @@ -355,6 +361,9 @@ def iter_content(response, **kwargs):
def raise_for_status(response):
pass

def close(response):
pass

return Response()


Expand Down
Loading