Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cli and git-based version #2

Merged
merged 7 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
exclude: '^$'
fail_fast: false

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.3.7'
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: no-commit-to-branch
3 changes: 2 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
from pytroll_watchers.version import version

project = "pytroll-watchers"
copyright = "2024, Martin Raspaud"
author = "Martin Raspaud"
release = "0.1.0"
release = version

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
17 changes: 14 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
[project]
name = "pytroll-watchers"
version = "0.1.0"
dynamic = ["version"]
description = "Utility functions and scripts to watch for new files on local or remote filesystems."
authors = [
{ name = "Martin Raspaud", email = "[email protected]" }
]
dependencies = ["universal-pathlib", "trollsift"]
dependencies = ["universal-pathlib", "trollsift", "pyyaml"]
readme = "README.md"
requires-python = ">= 3.10"
license = {file = "LICENSE.txt"}

[project.scripts]
pytroll-watcher = "pytroll_watchers.main_interface:cli"

[project.urls]
"Documentation" = "https://pytroll-watchers.readthedocs.io/en/latest/"

Expand All @@ -20,7 +23,7 @@ publishing = ["posttroll"]
ssh = ["paramiko"]

[build-system]
requires = ["hatchling"]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[tool.rye]
Expand All @@ -33,6 +36,13 @@ allow-direct-references = true
[tool.hatch.build.targets.wheel]
packages = ["src/pytroll_watchers"]

[tool.hatch.version]
source = "vcs"

[tool.hatch.build.hooks.vcs]
version-file = "src/pytroll_watchers/version.py"


[tool.ruff]
line-length = 120

Expand All @@ -43,6 +53,7 @@ select = ["A", "B", "D", "E", "W", "F", "I", "N", "PT", "S", "TID", "C90", "Q",
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S101"] # assert allowed in tests
"docs/source/conf.py" = ["D100", "A001"] # sphinx misbihaving
"src/pytroll_watchers/version.py" = ["D100", "Q000"] # automatically generated by hatch-vcs

[tool.ruff.lint.pydocstyle]
convention = "google"
Expand Down
41 changes: 0 additions & 41 deletions src/pytroll_watchers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,42 +1 @@
"""Main package file for pytroll watchers."""

from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher


def get_publisher_for_backend(backend):
"""Get the right publisher for the given backend.

For the parameters to pass the returned function, check the individual backend documentation pages.

Example:
>>> file_publisher = get_publisher_for_backend("local")
>>> file_publisher(fs_config, publisher_config, message_config)

"""
if backend == "minio":
return minio_publisher
elif backend == "local":
return local_publisher
else:
raise ValueError(f"Unknown backend {backend}.")

def get_generator_for_backend(backend):
"""Get the right generator for the given backend.

For the parameters to pass the returned function, check the individual backend documentation pages.

Example:
>>> file_generator = get_generator_for_backend("local")
>>> for filename, file_metadata in file_generator("/tmp"):
... # do something with filename and file_metadata

"""
if backend == "minio":
return minio_generator
elif backend == "local":
return local_generator
else:
raise ValueError(f"Unknown backend {backend}.")
79 changes: 79 additions & 0 deletions src/pytroll_watchers/main_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Main interface functions."""

import argparse

import yaml

from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher


def get_publisher_for_backend(backend):
"""Get the right publisher for the given backend.

For the parameters to pass the returned function, check the individual backend documentation pages.

Example:
>>> file_publisher = get_publisher_for_backend("local")
>>> file_publisher(fs_config, publisher_config, message_config)

"""
if backend == "minio":
return minio_publisher
elif backend == "local":
return local_publisher
else:
raise ValueError(f"Unknown backend {backend}.")

def get_generator_for_backend(backend):
"""Get the right generator for the given backend.

For the parameters to pass the returned function, check the individual backend documentation pages.

Example:
>>> file_generator = get_generator_for_backend("local")
>>> for filename, file_metadata in file_generator("/tmp"):
... # do something with filename and file_metadata

"""
if backend == "minio":
return minio_generator
elif backend == "local":
return local_generator
else:
raise ValueError(f"Unknown backend {backend}.")


def publish_from_config(config):
"""Publish files/objects given a config.

Args:
config: a dictionary containing the `backend` string (`local` or `minio`), and `fs_config`, `publisher_config`
and `message_config` dictionaries.
"""
if config["backend"] == "local":
return local_publisher(config["fs_config"], config["publisher_config"], config["message_config"])
elif config["backend"] == "minio":
return minio_publisher(config["fs_config"], config["publisher_config"], config["message_config"])
else:
raise ValueError(f"Unknown backend {config['backend']}")

def cli(args=None):
"""Command-line interface for pytroll-watchers."""
parser = argparse.ArgumentParser(
prog="pytroll-watcher",
description="Watches the appearance of new files/objects on different filesystems.",
epilog="Thanks for using pytroll-watchers!")

parser.add_argument("config", type=str, help="The yaml config file.")

parsed = parser.parse_args(args)

config_file = parsed.config

with open(config_file) as fd:
config_dict = yaml.safe_load(fd.read())

return publish_from_config(config_dict)
22 changes: 11 additions & 11 deletions src/pytroll_watchers/minio_notification_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def file_publisher(fs_config, publisher_config, message_config):
"""Publish files coming from bucket notifications.
"""Publish objects coming from bucket notifications.

Args:
fs_config: the configuration for the filesystem watching, will be passed as argument to `file_generator`.
Expand All @@ -19,17 +19,17 @@ def file_publisher(fs_config, publisher_config, message_config):


def file_generator(endpoint_url, bucket_name, file_pattern=None, storage_options=None):
"""Generate new files appearing in the watched directory.
"""Generate new objects appearing in the watched bucket.

Args:
endpoint_url: The endpoint_url to use.
bucket_name: The bucket to watch for changes.
file_pattern: The trollsift pattern to use for matching and extracting metadata from the filename.
This must not include the directory.
file_pattern: The trollsift pattern to use for matching and extracting metadata from the object name.
This must not include the prefix.
storage_options: The storage options for the service, for example for specifying a profile to the aws config.

Returns:
A tuple of UPath and file metadata.
A tuple of UPath and metadata.

Examples:
To iterate over new files in `s3:///tmp/`:
Expand All @@ -40,25 +40,25 @@ def file_generator(endpoint_url, bucket_name, file_pattern=None, storage_options
UPath("s3:///tmp/20200428_1000_foo.tif")

"""
file_metadata = {}
object_metadata = {}

if storage_options is None:
storage_options = {}
for record in _record_generator(endpoint_url, bucket_name, storage_options):
for item in record["Records"]:
new_bucket_name = item["s3"]["bucket"]["name"]
new_file_name = item["s3"]["object"]["key"]
object_name = item["s3"]["object"]["key"]
try:
file_metadata = parse_metadata(file_pattern, new_file_name)
object_metadata = parse_metadata(file_pattern, object_name)
except ValueError:
continue

path = UPath(f"s3://{new_bucket_name}/{new_file_name}", **storage_options)
yield path, file_metadata
path = UPath(f"s3://{new_bucket_name}/{object_name}", **storage_options)
yield path, object_metadata


def _record_generator(endpoint_url, bucket_name, profile=None):
"""Generate records for new files in the bucket."""
"""Generate records for new objects in the bucket."""
from minio import Minio
from minio.credentials.providers import AWSConfigProvider

Expand Down
6 changes: 1 addition & 5 deletions src/pytroll_watchers/testing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for testing code that uses pytroll watchers."""
"""Pytest fixtures for testing code that uses pytroll watchers."""

from contextlib import contextmanager, nullcontext

Expand Down Expand Up @@ -46,8 +46,4 @@ def fake_listen(*args, **kwargs):
import minio
monkeypatch.setattr(minio.Minio, "listen_bucket_notification", fake_listen)
yield
#fake_minio = mock.Mock(wraps=minio.Minio)
#fake_minio.return_value.listen_bucket_notification.return_value = nullcontext(enter_result=records)
#with mock.patch("minio.Minio", fake_minio):
# yield
return _patched_bucket_listener
86 changes: 85 additions & 1 deletion tests/test_main_interface.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"""Tests for the gathered publisher functions."""

import pytest
from pytroll_watchers import get_generator_for_backend, get_publisher_for_backend
import yaml
from posttroll.testing import patched_publisher
from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.main_interface import (
cli,
get_generator_for_backend,
get_publisher_for_backend,
publish_from_config,
)
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher
from pytroll_watchers.testing import patched_bucket_listener, patched_local_events # noqa


def test_getting_right_publisher():
Expand All @@ -25,3 +33,79 @@ def test_getting_right_generator():
assert generator == local_generator
with pytest.raises(ValueError, match="Unknown backend"):
_ = get_generator_for_backend("some_other_backend")


def test_pass_config_to_file_publisher_for_local_backend(tmp_path, patched_local_events): # noqa
"""Test passing a config to create a file publisher from a local fs."""
local_settings = dict(directory=tmp_path)
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="local",
fs_config=local_settings,
publisher_config=publisher_settings,
message_config=message_settings)
with patched_publisher() as msgs:
filename = tmp_path / "bla"
with patched_local_events([filename]):
publish_from_config(config)
assert len(msgs) == 1
assert str(filename) in msgs[0]


def test_pass_config_to_object_publisher_for_minio_backend(patched_bucket_listener): # noqa
"""Test passing a config to create an objec publisher from minio bucket."""
s3_settings = dict(endpoint_url="someendpoint",
bucket_name="viirs-data",
storage_options=dict())
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="minio",
fs_config=s3_settings,
publisher_config=publisher_settings,
message_config=message_settings)

records = [{"Records": [{
"eventName": "s3:ObjectCreated:Put",
"s3": {"bucket": {"arn": "arn:aws:s3:::viirs-data",
"name": "viirs-data",
"ownerIdentity": {"principalId": "someuser"}},
"object": {"contentType": "application/x-hdf5",
"key": "sdr/bla.h5",
"size": 22183568,
"userMetadata": {"content-type": "application/x-hdf5"}}}}]}]


with patched_publisher() as msgs:
with patched_bucket_listener(records):
publish_from_config(config)
assert len(msgs) == 1
assert str("sdr/bla.h5") in msgs[0]


def test_pass_config_to_file_publisher_for_spurious_backend():
"""Test that spurious backend fails."""
config = {}
config["backend"] = "some_other_backend"
with pytest.raises(ValueError, match="Unknown backend"):
publish_from_config(config)

def test_cli(tmp_path, patched_local_events): # noqa
"""Test the command-line interface."""
local_settings = dict(directory=str(tmp_path))
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="local",
fs_config=local_settings,
publisher_config=publisher_settings,
message_config=message_settings)

config_file = tmp_path / "config.yaml"
with open(config_file, "w") as fd:
fd.write(yaml.dump(config))

with patched_publisher() as msgs:
filename = tmp_path / "bla"
with patched_local_events([filename]):
cli([str(config_file)])
assert len(msgs) == 1
assert str(filename) in msgs[0]
Loading