diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 0000000..95f06cf --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,17 @@ +# Changes here will be overwritten by Copier +_commit: v1.4.5 +_src_path: gh:lincc-frameworks/python-project-template +author_email: lincc-frameworks-team@lists.lsst.org +author_name: LINCC Frameworks +create_example_module: true +custom_install: true +include_benchmarks: true +include_docs: false +mypy_type_checking: none +package_name: hipscat_cloudtests +preferred_linter: pylint +project_license: BSD +project_name: hipscat_cloudtests +project_organization: astronomy-commons +use_gitlfs: none +use_isort: true diff --git a/.github/workflows/asv-main.yml b/.github/workflows/asv-main.yml new file mode 100644 index 0000000..f9fd700 --- /dev/null +++ b/.github/workflows/asv-main.yml @@ -0,0 +1,101 @@ +# This workflow will run benchmarks with airspeed velocity (asv), +# store the new results in the "benchmarks" branch and publish them +# to a dashboard on GH Pages. + +name: Run ASV benchmarks for main + +on: + push: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + setup-python: + runs-on: ubuntu-latest + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-main: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + contents: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv tabulate + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Fetch previous results from the "benchmarks" branch + run: | + if git ls-remote --exit-code origin benchmarks > /dev/null 2>&1; then + git merge origin/benchmarks \ + --allow-unrelated-histories \ + --no-commit + mv ../_results . + fi + + - name: Run ASV for the main branch + run: asv run ALL --skip-existing --verbose || true + + - name: Submit new results to the "benchmarks" branch + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: benchmarks + folder: ${{ env.WORKING_DIR }}/_results + target-folder: _results + + - name: Generate dashboard HTML + run: | + asv show + asv publish + + - name: Deploy to Github pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages + folder: ${{ env.WORKING_DIR }}/_html \ No newline at end of file diff --git a/.github/workflows/asv-nightly.yml b/.github/workflows/asv-nightly.yml new file mode 100644 index 0000000..fa8012c --- /dev/null +++ b/.github/workflows/asv-nightly.yml @@ -0,0 +1,93 @@ +# This workflow will run daily at 06:45. +# It will run benchmarks with airspeed velocity (asv) +# and compare performance with the previous nightly build. + +name: Run benchmarks nightly job + +on: + schedule: + - cron: 45 6 * * * + workflow_dispatch: + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + NIGHTLY_HASH_FILE: nightly-hash + +jobs: + + asv-nightly: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Fetch previous results from the "benchmarks" branch + run: | + if git ls-remote --exit-code origin benchmarks > /dev/null 2>&1; then + git merge origin/benchmarks \ + --allow-unrelated-histories \ + --no-commit + mv ../_results . + fi + + - name: Get nightly dates under comparison + id: nightly-dates + run: | + echo "yesterday=$(date -d yesterday +'%Y-%m-%d')" >> $GITHUB_OUTPUT + echo "today=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Use last nightly commit hash from cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.yesterday }} + + - name: Run comparison of main against last nightly build + run: | + HASH_FILE=${{ env.NIGHTLY_HASH_FILE }} + CURRENT_HASH=${{ github.sha }} + if [ -f $HASH_FILE ]; then + PREV_HASH=$(cat $HASH_FILE) + asv continuous $PREV_HASH $CURRENT_HASH --verbose || true + asv compare $PREV_HASH $CURRENT_HASH --sort ratio --verbose + fi + echo $CURRENT_HASH > $HASH_FILE + + - name: Update last nightly hash in cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.today }} \ No newline at end of file diff --git a/.github/workflows/asv-pr.yml b/.github/workflows/asv-pr.yml new file mode 100644 index 0000000..bef2208 --- /dev/null +++ b/.github/workflows/asv-pr.yml @@ -0,0 +1,101 @@ +# This workflow will run benchmarks with airspeed velocity (asv) for pull requests. +# It will compare the performance of the main branch with the performance of the merge +# with the new changes and publish a comment with this assessment. + +name: Run ASV benchmarks for PR + +on: + pull_request: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + setup-python: + runs-on: ubuntu-latest + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-pr: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + actions: read + pull-requests: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout PR branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv tabulate lf-asv-formatter + + - name: Get current job logs URL + uses: Tiryoh/gha-jobid-action@v0 + id: jobs + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + job_name: ${{ github.job }} + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Run comparison of PR against main branch + run: | + git remote add upstream https://github.com/${{ github.repository }}.git + git fetch upstream + asv continuous upstream/main HEAD --verbose || true + asv compare upstream/main HEAD --sort ratio --verbose | tee output + python -m lf_asv_formatter --asv_version "$(echo asv --version)" + printf "\n\nClick [here]($STEP_URL) to view all benchmarks." >> output + env: + STEP_URL: "${{ steps.jobs.outputs.html_url }}#step:8:1" + + - name: Find benchmarks comment + uses: peter-evans/find-comment@v2 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: view all benchmarks + + - name: Create or update benchmarks comment + uses: peter-evans/create-or-update-comment@v3 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body-path: ${{ env.WORKING_DIR }}/output + edit-mode: replace \ No newline at end of file diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..68169e6 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, then perform static linting analysis. +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Lint + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install . + pip install .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Analyze code with linter + run: | + pylint -rn -sn --recursive=y ./src --rcfile=./src/.pylintrc + pylint -rn -sn --recursive=y ./tests --rcfile=./tests/.pylintrc + pylint -rn -sn --recursive=y ./benchmarks --rcfile=./tests/.pylintrc diff --git a/.github/workflows/pre-commit-ci.yml b/.github/workflows/pre-commit-ci.yml new file mode 100644 index 0000000..8397877 --- /dev/null +++ b/.github/workflows/pre-commit-ci.yml @@ -0,0 +1,34 @@ +# This workflow runs pre-commit hooks on pull requests to enforce coding style. +# To ensure correct configuration, please refer to: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_precommit.html + +name: Run pre-commit hooks + +on: + pull_request: + +jobs: + pre-commit-ci: + runs-on: ubuntu-latest + env: + SKIP: "check-lincc-frameworks-template-version,pytest-check,no-commit-to-branch,validate-pyproject,check-added-large-files,sphinx-build" + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install . + pip install .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }} + - uses: pre-commit-ci/lite-action@v1.0.1 + if: always() \ No newline at end of file diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml new file mode 100644 index 0000000..cb1073f --- /dev/null +++ b/.github/workflows/smoke-test.yml @@ -0,0 +1,46 @@ +# This workflow will run daily at 06:45. +# It will install Python dependencies and run tests with a variety of Python versions. +# See documentation for help debugging smoke test issues: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_testing.html#version-culprit + +name: Unit test smoke test + +on: + + # Runs this workflow automatically + schedule: + - cron: 45 6 * * * + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install . + pip install .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: List dependencies + run: | + pip list + - name: Run ABFS unit tests with pytest + run: | + python -m pytest tests --cloud abfs + env: + ABFS_LINCCDATA_ACCOUNT_NAME: ${{ secrets.LINCC_ABFS_ACCOUNT_NAME }} + ABFS_LINCCDATA_ACCOUNT_KEY: ${{ secrets.LINCC_ABFS_ACCOUNT_KEY }} diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml new file mode 100644 index 0000000..9883178 --- /dev/null +++ b/.github/workflows/testing-and-coverage.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Unit test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install . + pip install .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Run ABFS unit tests with pytest + run: | + python -m pytest tests --cloud abfs + env: + ABFS_LINCCDATA_ACCOUNT_NAME: ${{ secrets.LINCC_ABFS_ACCOUNT_NAME }} + ABFS_LINCCDATA_ACCOUNT_KEY: ${{ secrets.LINCC_ABFS_ACCOUNT_KEY }} + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1819b25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,147 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +_version.py + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +_readthedocs/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# vscode +.vscode/ + +# dask +dask-worker-space/ + +# tmp directory +tmp/ + +# Mac OS +.DS_Store + +# Airspeed Velocity performance results +_results/ +_html/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..53ca2f5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,86 @@ +fail_fast: true +repos: + + # Compare the local template version to the latest remote template version + # This hook should always pass. It will print a message if the local version + # is out of date. + - repo: https://github.com/lincc-frameworks/pre-commit-hooks + rev: v0.1.1 + hooks: + - id: check-lincc-frameworks-template-version + name: Check template version + description: Compare current template version against latest + verbose: true + + # Prevents committing directly branches named 'main' and 'master'. + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: no-commit-to-branch + name: Prevent main branch commits + description: Prevent the user from committing directly to the primary branch. + - id: check-added-large-files + name: Check for large files + description: Prevent the user from committing very large files. + args: ['--maxkb=500'] + + # Verify that pyproject.toml is well formed + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.12.1 + hooks: + - id: validate-pyproject + name: Validate pyproject.toml + description: Verify that pyproject.toml adheres to the established schema. + + # Automatically sort the imports used in .py files + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: Run isort + description: Sort and organize imports in .py and .pyi files. + types_or: [python, pyi] + + # Analyze the src code style and report code that doesn't adhere. + - repo: local + hooks: + - id: pylint + name: pylint (python files in src/) + entry: pylint + language: system + types: [python] + files: ^src/ + args: + [ + "-rn", # Only display messages + "-sn", # Don't display the score + "--rcfile=src/.pylintrc", + ] + + # Analyze the tests code style and report code that doesn't adhere. + - repo: local + hooks: + - id: pylint + name: pylint (python files in tests/ and benchmarks/) + entry: pylint + language: system + types: [python] + files: ^(tests|benchmarks)/ + args: + [ + "-rn", # Only display messages + "-sn", # Don't display the score + "--rcfile=tests/.pylintrc", + ] + + # Run unit tests, verify that they pass. + # `python -m pytest` + - repo: local + hooks: + - id: pytest-check + name: Run unit tests + description: Run unit tests with pytest. + entry: bash -c "if python -m pytest --co -qq; then python -m pytest; fi" + language: system + pass_filenames: false + always_run: true diff --git a/LICENSE b/LICENSE index 6c97b3a..aee3ea7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2023, Astronomy Data Commons +Copyright (c) 2023, LINCC Frameworks Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/README.md b/README.md index 057fea8..1ea68dc 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,105 @@ -# hipscat-cloudtests -Cloud tests for hipscat and lsdb +# hipscat_cloudtests + +[![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) + +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/astronomy-commons/hipscat_cloudtests/smoke-test.yml)](https://github.com/astronomy-commons/hipscat_cloudtests/actions/workflows/smoke-test.yml) +[![benchmarks](https://img.shields.io/github/actions/workflow/status/astronomy-commons/hipscat_cloudtests/asv-main.yml?label=benchmarks)](https://astronomy-commons.github.io/hipscat_cloudtests/) + +Integration tests for cloud read and write through HiPScat and LSDB libraries. + +## Dev Guide - Getting Started + +Before installing any dependencies or writing code, it's a great idea to create a +virtual environment. LINCC-Frameworks engineers primarily use `conda` to manage virtual +environments. If you have conda installed locally, you can run the following to +create and activate a new environment. + +``` +>> conda create env -n python=3.10 +>> conda activate +``` + +Once you have created a new environment, you can install this project for local +development using the following commands: + +``` +>> pip install -e .'[dev]' +>> pre-commit install +>> conda install pandoc +``` + +## Performing HiPSCat cloud tests locally + +The only currently implemented cloud platform is abfs. In order to run the tests, you will need to +export the following environmental variables in a command line: + +```bash +export ABFS_LINCCDATA_ACCOUNT_NAME=lincc_account_name +export ABFS_LINCCDATA_ACCOUNT_KEY=lincc_account_key +``` + +Then to run the tests: + +```bash +pytest --cloud abfs +``` + +### How are we connecting to the cloud resources? + +We have abstracted our entire i/o infrastructure to be read through the python +[fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html) library. +All that needs to be provided is a valid protocol pathway, and storage options +for the cloud interface. + +## Adding tests for a new cloud interface protocol + +There are various steps to have tests run on another cloud bucket provider (like s3 or gcs). + +1. You will have to create the container/bucket +2. You will have to edit `tests/conftest.py` in multiple places: + +```python +... +#...line 38... +@pytest.fixture +def example_cloud_path(cloud): + if cloud == "abfs": + return "abfs://hipscat/pytests/hipscat" + + #your new addition + elif cloud == "new_protocol": + return "new_protocol://path/to/pytest/hipscat" + + raise NotImplementedError("Cloud format not implemented for hipscat tests!") + +@pytest.fixture +def example_cloud_storage_options(cloud): + if cloud == "abfs": + storage_options = { + "account_key" : os.environ.get("ABFS_LINCCDATA_ACCOUNT_KEY"), + "account_name" : os.environ.get("ABFS_LINCCDATA_ACCOUNT_NAME") + } + return storage_options + + #your new addition + elif cloud == "new_protocol": + storage_options = { + "valid_storage_option_param1" : os.environ.get("NEW_PROTOCOL_PARAM1"), + "valid_storage_option_param2" : os.environ.get("NEW_PROTOCOL_PARAM2"), + ... + } + + return {} +``` + +3. Finally, you will need to copy several `/tests/data/` directories into your newly + created bucket. This can be accomplished by running the `copy_data_to_fs.py` script. +4. Before running the tests, you will need to export your `valid_storage_option_param` into the environment. + + +## Adding tests to the github workflows + +1. TODO - enumerate these steps +1. REPOSITORY secrets +1. smoke_test.yml +1. testing-and-coverage diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 0000000..70f7f55 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,77 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + // The name of the project being benchmarked. + "project": "hipscat_cloudtests", + // The project's homepage. + "project_url": "https://github.com/astronomy-commons/hipscat_cloudtests", + // The URL or local path of the source code repository for the + // project being benchmarked. + "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": [ + "HEAD" + ], + "build_command": [ + "python -m build --wheel -o {build_cache_dir} {build_dir}" + ], + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/astronomy-commons/hipscat_cloudtests/commit", + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": [ + "3.10" + ], + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "Cython": [], + "build": [], + "packaging": [] + }, + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks". + "benchmark_dir": ".", + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env". + "env_dir": "env", + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "_results", + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "_html", + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8 + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} \ No newline at end of file diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py new file mode 100644 index 0000000..27353f1 --- /dev/null +++ b/benchmarks/benchmarks.py @@ -0,0 +1,16 @@ +"""Two sample benchmarks to compute runtime and memory usage. + +For more information on writing benchmarks: +https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" + +from hipscat_cloudtests import example_benchmarks + + +def time_computation(): + """Time computations are prefixed with 'time'.""" + example_benchmarks.runtime_computation() + + +def mem_list(): + """Memory computations are prefixed with 'mem' or 'peakmem'.""" + return example_benchmarks.memory_computation() diff --git a/copy_data_to_fs.py b/copy_data_to_fs.py new file mode 100644 index 0000000..6232bdc --- /dev/null +++ b/copy_data_to_fs.py @@ -0,0 +1,87 @@ +import os + +from hipscat.io.file_io.file_io import get_fs + + +def copy_tree_fs_to_fs( + fs1_source: str, + fs2_destination: str, + storage_options1: dict = None, + storage_options2: dict = None, + verbose=False, +): + """Recursive Copies directory from one filesystem to the other. + + Args: + fs1_source: location of source directory to copy + fs2_destination: location of destination directory to for fs1 to be written two + storage_options1: dictionary that contains abstract filesystem1 credentials + storage_options2: dictionary that contains abstract filesystem2 credentials + """ + + source_fs, source_fp = get_fs(fs1_source, storage_options=storage_options1) + destination_fs, desintation_fp = get_fs(fs2_destination, storage_options=storage_options2) + copy_dir(source_fs, source_fp, destination_fs, desintation_fp, verbose=verbose) + + +def copy_dir( + source_fs, + source_fp, + destination_fs, + desintation_fp, + verbose=False, + chunksize=1024 * 1024, +): + """Recursive method to copy directories and their contents. + + Args: + fs1: fsspec.filesystem for the source directory contents + fs1_pointer: source directory to copy content files + fs2: fsspec.filesytem for destination directory + fs2_pointer: destination directory for copied contents + """ + destination_folder = os.path.join(desintation_fp, source_fp.split("/")[-1]) + if destination_folder[-1] != "/": + destination_folder += "/" + if not destination_fs.exists(destination_folder): + if verbose: + print(f"Creating destination folder: {destination_folder}") + destination_fs.makedirs(destination_folder, exist_ok=True) + + dir_contents = source_fs.listdir(source_fp) + files = [x for x in source_fs.listdir(source_fp) if x["type"] == "file"] + + for _file in files: + destination_fname = os.path.join(destination_folder, _file["name"].split("/")[-1]) + if verbose: + print(f'Copying file {_file["name"]} to {destination_fname}') + with source_fs.open(_file["name"], "rb") as source_file: + with destination_fs.open(destination_fname, "wb") as destination_file: + while True: + chunk = source_file.read(chunksize) + if not chunk: + break + destination_file.write(chunk) + + dirs = [x for x in dir_contents if x["type"] == "directory"] + for _dir in dirs: + copy_dir( + source_fs, + _dir["name"], + destination_fs, + destination_folder, + chunksize=chunksize, + verbose=verbose, + ) + + +if __name__ == "__main__": + + source_pw = f"{os.getcwd()}/../tests/data" + target_pw = "abfs://hipscat/pytests/lsdb" + + target_so = { + "account_name": "linccdata", + "account_key": "ezBADSIGArKcI0JNHFdRfLF5S/64ZJcdrbXKbK5GJikF+YAC0hDAhMputN59HA4RS4N3HmjNZgdc+AStBFuQ6Q==", + } + copy_tree_fs_to_fs(source_pw, target_pw, {}, target_so, verbose=True) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f8d2800 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[project] +name = "hipscat_cloudtests" +license = {file = "LICENSE"} +readme = "README.md" +authors = [ + { name = "LINCC Frameworks", email = "lincc-frameworks-team@lists.lsst.org" } +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: BSD License", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python", +] +dynamic = ["version"] +dependencies = [ + # lsdb, hipscat, and hipscat-import are built from source via requirements.txt + "adlfs", + "shortuuid", +] + +# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) +[project.optional-dependencies] +dev = [ + "pytest", + "pre-commit", # Used to run checks before finalizing a git commit + "pylint", # Used for static linting of files + "asv==0.6.1", # Used to compute performance benchmarks +] + +[build-system] +requires = [ + "setuptools>=62", # Used to build and package the Python project + "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically +] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 110 +target-version = ["py38"] + +[tool.isort] +profile = "black" +line_length = 110 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b3c81f2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +git+https://github.com/astronomy-commons/hipscat.git@main +git+https://github.com/astronomy-commons/hipscat-import.git@main +git+https://github.com/astronomy-commons/lsdb.git@main \ No newline at end of file diff --git a/src/.pylintrc b/src/.pylintrc new file mode 100644 index 0000000..3fd6eeb --- /dev/null +++ b/src/.pylintrc @@ -0,0 +1,627 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. +ignore-patterns=_version.py + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + ra, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=20 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=20 + +# Maximum number of locals for function / method body. +max-locals=20 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=110 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + missing-module-docstring, + unnecessary-pass, + + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=6 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/src/hipscat_cloudtests/__init__.py b/src/hipscat_cloudtests/__init__.py new file mode 100644 index 0000000..c8236d7 --- /dev/null +++ b/src/hipscat_cloudtests/__init__.py @@ -0,0 +1,6 @@ +from .example_module import greetings, meaning + +__all__ = ["greetings", "meaning"] + +from .file_checks import assert_text_file_matches +from .temp_cloud_directory import TempCloudDirectory diff --git a/src/hipscat_cloudtests/example_benchmarks.py b/src/hipscat_cloudtests/example_benchmarks.py new file mode 100644 index 0000000..5a77b06 --- /dev/null +++ b/src/hipscat_cloudtests/example_benchmarks.py @@ -0,0 +1,14 @@ +"""An example module containing simplistic methods under benchmarking.""" + +import random +import time + + +def runtime_computation(): + """Runtime computation consuming between 0 and 5 seconds.""" + time.sleep(random.uniform(0, 5)) + + +def memory_computation(): + """Memory computation for a random list up to 512 samples.""" + return [0] * random.randint(0, 512) diff --git a/src/hipscat_cloudtests/example_module.py b/src/hipscat_cloudtests/example_module.py new file mode 100644 index 0000000..f76e837 --- /dev/null +++ b/src/hipscat_cloudtests/example_module.py @@ -0,0 +1,23 @@ +"""An example module containing simplistic functions.""" + + +def greetings() -> str: + """A friendly greeting for a future friend. + + Returns + ------- + str + A typical greeting from a software engineer. + """ + return "Hello from LINCC-Frameworks!" + + +def meaning() -> int: + """The meaning of life, the universe, and everything. + + Returns + ------- + int + The meaning of life. + """ + return 42 diff --git a/src/hipscat_cloudtests/file_checks.py b/src/hipscat_cloudtests/file_checks.py new file mode 100644 index 0000000..7db867e --- /dev/null +++ b/src/hipscat_cloudtests/file_checks.py @@ -0,0 +1,37 @@ +"""Set of convenience methods for testing file contents""" + +import re + +from hipscat.io.file_io.file_io import load_text_file +from hipscat.io.file_io.file_pointer import does_file_or_directory_exist + + +def assert_text_file_matches(expected_lines, file_name, storage_options: dict = None): + """Convenience method to read a text file and compare the contents, line for line. + + When file contents get even a little bit big, it can be difficult to see + the difference between an actual file and the expected contents without + increased testing verbosity. This helper compares files line-by-line, + using the provided strings or regular expressions. + + Notes: + Because we check strings as regular expressions, you may need to escape some + contents of `expected_lines`. + + Args: + expected_lines(:obj:`string array`) list of strings, formatted as regular expressions. + file_name (str): fully-specified path of the file to read + storage_options (dict): dictionary of filesystem storage options + """ + assert does_file_or_directory_exist( + file_name, storage_options=storage_options + ), f"file not found [{file_name}]" + contents = load_text_file(file_name, storage_options=storage_options) + + assert len(expected_lines) == len( + contents + ), f"files not the same length ({len(contents)} vs {len(expected_lines)})" + for i, expected in enumerate(expected_lines): + assert re.match(expected, contents[i]), ( + f"files do not match at line {i+1} " f"(actual: [{contents[i]}] vs expected: [{expected}])" + ) diff --git a/src/hipscat_cloudtests/temp_cloud_directory.py b/src/hipscat_cloudtests/temp_cloud_directory.py new file mode 100644 index 0000000..044f788 --- /dev/null +++ b/src/hipscat_cloudtests/temp_cloud_directory.py @@ -0,0 +1,45 @@ +"""Testing utility class to create a temporary directory that's local +to some unit test execution.""" + +import os + +import shortuuid +from hipscat.io.file_io import file_io + + +class TempCloudDirectory: + """Simple context manager that creates a unique temporary directory + path for a single testing context. + + On exit, we will recursively remove the created directory.""" + + def __init__(self, prefix_path, method_name="", storage_options: dict = None): + """Create a new context manager. + + This will NOT create the new temp path - that happens when we enter the context. + + Args: + prefix_path (str): base path to the cloud resource + method_name (str): optional token to indicate the method under test + storage_options (dict): dictionary that contains abstract filesystem credentials + """ + self.prefix_path = prefix_path + self.method_name = method_name + self.storage_options = storage_options + self.temp_path = "" + + def __enter__(self): + """Create a new temporary path + + Returns: + string path that's been created. it will take the form of + / + """ + my_uuid = shortuuid.uuid() + self.temp_path = os.path.join(self.prefix_path, f"{self.method_name}{my_uuid}") + return self.temp_path + + def __exit__(self, exc_type, exc_val, exc_tb): + """Recursively delete the created resources.""" + if self.temp_path: + file_io.remove_directory(self.temp_path, ignore_errors=True, storage_options=self.storage_options) diff --git a/tests/.pylintrc b/tests/.pylintrc new file mode 100644 index 0000000..4f41ec3 --- /dev/null +++ b/tests/.pylintrc @@ -0,0 +1,628 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. +ignore-patterns=_version.py + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + ra, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=20 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=20 + +# Maximum number of locals for function / method body. +max-locals=30 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=110 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + missing-function-docstring, + redefined-outer-name, + protected-access, + missing-module-docstring, + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=6 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..40628f7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,46 @@ +import os + +import pytest + +DATA_DIR_NAME = "data" + +TEST_DIR = os.path.dirname(__file__) +SMALL_SKY_DIR_NAME = "small_sky" + + +def pytest_addoption(parser): + parser.addoption("--cloud", action="store", default="abfs") + + +def pytest_generate_tests(metafunc): + # This is called for every test. Only get/set command line arguments + # if the argument is specified in the list of test "fixturenames". + option_value = metafunc.config.option.cloud + if "cloud" in metafunc.fixturenames and option_value is not None: + metafunc.parametrize("cloud", [option_value]) + + +@pytest.fixture +def example_cloud_path(cloud): + if cloud == "abfs": + return "abfs://hipscat/pytests/" + + raise NotImplementedError("Cloud format not implemented for tests!") + + +@pytest.fixture +def example_cloud_storage_options(cloud): + if cloud == "abfs": + storage_options = { + "account_key": os.environ.get("ABFS_LINCCDATA_ACCOUNT_KEY"), + "account_name": os.environ.get("ABFS_LINCCDATA_ACCOUNT_NAME"), + } + return storage_options + + return {} + + +@pytest.fixture +def small_sky_dir_local(): + cloud_test_path = os.path.dirname(__file__) + return os.path.join(cloud_test_path, "data", SMALL_SKY_DIR_NAME) diff --git a/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet b/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet new file mode 100644 index 0000000..3f46bcd Binary files /dev/null and b/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet differ diff --git a/tests/data/small_sky/catalog_info.json b/tests/data/small_sky/catalog_info.json new file mode 100644 index 0000000..ebfe52e --- /dev/null +++ b/tests/data/small_sky/catalog_info.json @@ -0,0 +1,12 @@ +{ + "catalog_name": "small_sky", + "catalog_type": "source", + "version": "0.0.1", + "generation_date": "2022.12.20", + "epoch": "J2000", + "ra_kw": "ra", + "dec_kw": "dec", + "id_kw": "id", + "total_objects": 131, + "pixel_threshold": 1000000 +} \ No newline at end of file diff --git a/tests/data/small_sky/partition_info.csv b/tests/data/small_sky/partition_info.csv new file mode 100644 index 0000000..ed01572 --- /dev/null +++ b/tests/data/small_sky/partition_info.csv @@ -0,0 +1,2 @@ +Norder,Dir,Npix,num_rows +0,0,11,131 diff --git a/tests/data/small_sky/point_map.fits b/tests/data/small_sky/point_map.fits new file mode 100644 index 0000000..1a5b0a6 Binary files /dev/null and b/tests/data/small_sky/point_map.fits differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet new file mode 100644 index 0000000..30ef36c Binary files /dev/null and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet new file mode 100644 index 0000000..b0af099 Binary files /dev/null and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet new file mode 100644 index 0000000..7d4ad21 Binary files /dev/null and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet new file mode 100644 index 0000000..01f723d Binary files /dev/null and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet differ diff --git a/tests/data/small_sky_order1/catalog_info.json b/tests/data/small_sky_order1/catalog_info.json new file mode 100644 index 0000000..614fa55 --- /dev/null +++ b/tests/data/small_sky_order1/catalog_info.json @@ -0,0 +1,12 @@ +{ + "catalog_name": "small_sky_order1", + "catalog_type": "source", + "version": "0.0.0", + "generation_date": "2022.12.21", + "epoch": "J2000", + "ra_kw": "ra", + "dec_kw": "dec", + "id_kw": "id", + "total_objects": 131, + "pixel_threshold": 50 +} \ No newline at end of file diff --git a/tests/data/small_sky_order1/partition_info.csv b/tests/data/small_sky_order1/partition_info.csv new file mode 100644 index 0000000..d15927f --- /dev/null +++ b/tests/data/small_sky_order1/partition_info.csv @@ -0,0 +1,5 @@ +Norder,Dir,Npix,num_rows +1,0,44,42 +1,0,45,29 +1,0,46,42 +1,0,47,18 diff --git a/tests/data/small_sky_order1/point_map.fits b/tests/data/small_sky_order1/point_map.fits new file mode 100644 index 0000000..1a5b0a6 Binary files /dev/null and b/tests/data/small_sky_order1/point_map.fits differ diff --git a/tests/hipscat/catalog/dataset/test_base_catalog_info_cloud.py b/tests/hipscat/catalog/dataset/test_base_catalog_info_cloud.py new file mode 100644 index 0000000..eb2e028 --- /dev/null +++ b/tests/hipscat/catalog/dataset/test_base_catalog_info_cloud.py @@ -0,0 +1,18 @@ +import dataclasses + +from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo +from hipscat.io import file_io + + +def test_read_from_file(base_catalog_info_file_cloud, example_cloud_storage_options): + base_cat_info_fp = file_io.get_file_pointer_from_path(base_catalog_info_file_cloud) + catalog_info = BaseCatalogInfo.read_from_metadata_file( + base_cat_info_fp, storage_options=example_cloud_storage_options + ) + catalog_info_json = file_io.file_io.load_json_file( + base_catalog_info_file_cloud, storage_options=example_cloud_storage_options + ) + + catalog_info_dict = dataclasses.asdict(catalog_info) + for key, value in catalog_info_json.items(): + assert catalog_info_dict[key] == value diff --git a/tests/hipscat/catalog/test_catalog_cloud.py b/tests/hipscat/catalog/test_catalog_cloud.py new file mode 100644 index 0000000..5ba0b39 --- /dev/null +++ b/tests/hipscat/catalog/test_catalog_cloud.py @@ -0,0 +1,49 @@ +"""Tests of catalog functionality""" + +import os + +import pytest +from hipscat.catalog import Catalog, PartitionInfo +from hipscat.io.file_io import file_io +from hipscat.pixel_math import HealpixPixel + +from hipscat_cloudtests import TempCloudDirectory + + +def test_load_catalog_small_sky(small_sky_dir_cloud, example_cloud_storage_options): + """Instantiate a catalog with 1 pixel""" + cat = Catalog.read_from_hipscat(small_sky_dir_cloud, storage_options=example_cloud_storage_options) + + assert cat.catalog_name == "small_sky" + assert len(cat.get_healpix_pixels()) == 1 + + +def test_empty_directory(tmp_dir_cloud, example_cloud_storage_options): + """Test loading empty or incomplete data""" + with TempCloudDirectory(tmp_dir_cloud, "empty", example_cloud_storage_options) as temp_path: + catalog_path = temp_path + + ## Path exists but there's nothing there (which means it doesn't exist!) + with pytest.raises(FileNotFoundError, match="No directory"): + Catalog.read_from_hipscat(catalog_path, storage_options=example_cloud_storage_options) + + ## catalog_info file exists - getting closer + file_name = os.path.join(catalog_path, "catalog_info.json") + file_io.write_string_to_file( + file_name, + string='{"catalog_name":"empty", "catalog_type":"source"}', + storage_options=example_cloud_storage_options, + ) + + with pytest.raises(FileNotFoundError, match="metadata"): + Catalog.read_from_hipscat(catalog_path, storage_options=example_cloud_storage_options) + + ## partition_info file exists - enough to create a catalog + ## Now we create the needed _metadata and everything is right. + part_info = PartitionInfo.from_healpix([HealpixPixel(0, 11)]) + part_info.write_to_metadata_files( + catalog_path=catalog_path, storage_options=example_cloud_storage_options + ) + + catalog = Catalog.read_from_hipscat(catalog_path, storage_options=example_cloud_storage_options) + assert catalog.catalog_name == "empty" diff --git a/tests/hipscat/conftest.py b/tests/hipscat/conftest.py new file mode 100644 index 0000000..3a81295 --- /dev/null +++ b/tests/hipscat/conftest.py @@ -0,0 +1,46 @@ +import os +import os.path + +import pytest + +ALMANAC_DIR_NAME = "almanac" +SMALL_SKY_DIR_NAME = "small_sky" +SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" +SMALL_SKY_TO_SMALL_SKY_ORDER1_DIR_NAME = "small_sky_to_small_sky_order1" + +# pylint: disable=missing-function-docstring, redefined-outer-name + + +@pytest.fixture +def tmp_dir_cloud(example_cloud_path): + return os.path.join(example_cloud_path, "hipscat", "tmp") + + +@pytest.fixture +def test_data_dir_cloud(example_cloud_path): + return os.path.join(example_cloud_path, "hipscat", "data") + + +@pytest.fixture +def almanac_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, ALMANAC_DIR_NAME) + + +@pytest.fixture +def small_sky_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME) + + +@pytest.fixture +def small_sky_order1_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME) + + +@pytest.fixture +def base_catalog_info_file_cloud(test_data_dir_cloud) -> str: + return os.path.join(test_data_dir_cloud, "dataset", "catalog_info.json") + + +@pytest.fixture +def catalog_info_file_cloud(catalog_path_cloud) -> str: + return os.path.join(catalog_path_cloud, "catalog_info.json") diff --git a/tests/hipscat/inspection/test_almanac_cloud.py b/tests/hipscat/inspection/test_almanac_cloud.py new file mode 100644 index 0000000..e2c6dca --- /dev/null +++ b/tests/hipscat/inspection/test_almanac_cloud.py @@ -0,0 +1,21 @@ +import os + +from hipscat.inspection.almanac import Almanac + + +def test_default(almanac_dir_cloud, test_data_dir_cloud, example_cloud_storage_options): + """Test loading from a default directory""" + + os.environ["HIPSCAT_ALMANAC_DIR"] = "" + os.environ["HIPSCAT_DEFAULT_DIR"] = test_data_dir_cloud + + alms = Almanac(include_default_dir=True, storage_options=example_cloud_storage_options) + assert len(alms.catalogs()) == 0 + + os.environ["HIPSCAT_ALMANAC_DIR"] = almanac_dir_cloud + alms = Almanac(include_default_dir=True, storage_options=example_cloud_storage_options) + assert len(alms.catalogs()) == 8 + + os.environ.pop("HIPSCAT_ALMANAC_DIR") + alms = Almanac(include_default_dir=True, storage_options=example_cloud_storage_options) + assert len(alms.catalogs()) == 0 diff --git a/tests/hipscat/inspection/test_visualize_catalog_cloud.py b/tests/hipscat/inspection/test_visualize_catalog_cloud.py new file mode 100644 index 0000000..aeee397 --- /dev/null +++ b/tests/hipscat/inspection/test_visualize_catalog_cloud.py @@ -0,0 +1,9 @@ +from hipscat.catalog import Catalog +from hipscat.inspection import plot_pixels, plot_points + + +def test_generate_map_order1(small_sky_dir_cloud, example_cloud_storage_options): + """Basic test that map data can be generated (does not test that a plot is rendered)""" + cat = Catalog.read_from_hipscat(small_sky_dir_cloud, storage_options=example_cloud_storage_options) + plot_pixels(cat, draw_map=False) + plot_points(cat, draw_map=False) diff --git a/tests/hipscat/io/file_io/test_file_io_cloud.py b/tests/hipscat/io/file_io/test_file_io_cloud.py new file mode 100644 index 0000000..cc6845b --- /dev/null +++ b/tests/hipscat/io/file_io/test_file_io_cloud.py @@ -0,0 +1,63 @@ +import os + +import numpy as np +import pandas as pd +from hipscat.io.file_io import ( + get_file_pointer_from_path, + load_csv_to_pandas, + load_json_file, + load_parquet_to_pandas, + load_text_file, + write_dataframe_to_csv, + write_string_to_file, +) +from hipscat.io.paths import pixel_catalog_file + +from hipscat_cloudtests import TempCloudDirectory + + +def test_write_string_to_file(tmp_dir_cloud, example_cloud_storage_options): + with TempCloudDirectory(tmp_dir_cloud, "write_string", example_cloud_storage_options) as temp_path: + test_file_path = os.path.join(temp_path, "text_file.txt") + test_file_pointer = get_file_pointer_from_path(test_file_path) + test_string = "this is a test" + write_string_to_file( + test_file_pointer, + test_string, + encoding="utf-8", + storage_options=example_cloud_storage_options, + ) + data = load_text_file(test_file_path, encoding="utf-8", storage_options=example_cloud_storage_options) + assert data[0] == test_string + + +def test_load_json(small_sky_dir_local, small_sky_dir_cloud, example_cloud_storage_options): + catalog_cloud_path = os.path.join(small_sky_dir_cloud, "catalog_info.json") + catalog_info_path = os.path.join(small_sky_dir_local, "catalog_info.json") + catalog_info_pointer = get_file_pointer_from_path(catalog_info_path) + json_dict_cloud = load_json_file(catalog_cloud_path, storage_options=example_cloud_storage_options) + json_dict_local = load_json_file(catalog_info_pointer, encoding="utf-8") + assert json_dict_cloud == json_dict_local + + +def test_load_parquet_to_pandas(small_sky_dir_local, small_sky_dir_cloud, example_cloud_storage_options): + pixel_data_path = pixel_catalog_file(small_sky_dir_local, 0, 11) + pixel_data_path_cloud = pixel_catalog_file(small_sky_dir_cloud, 0, 11) + parquet_df = pd.read_parquet(pixel_data_path) + loaded_df = load_parquet_to_pandas(pixel_data_path_cloud, storage_options=example_cloud_storage_options) + pd.testing.assert_frame_equal(parquet_df, loaded_df) + + +def test_write_df_to_csv(tmp_dir_cloud, example_cloud_storage_options): + with TempCloudDirectory(tmp_dir_cloud, "write_df_to_csv", example_cloud_storage_options) as temp_path: + random_df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD")) + test_file_path = os.path.join(temp_path, "test.csv") + test_file_pointer = get_file_pointer_from_path(test_file_path) + write_dataframe_to_csv( + random_df, + test_file_pointer, + index=False, + storage_options=example_cloud_storage_options, + ) + loaded_df = load_csv_to_pandas(test_file_pointer, storage_options=example_cloud_storage_options) + pd.testing.assert_frame_equal(loaded_df, random_df) diff --git a/tests/hipscat/io/file_io/test_file_pointers_cloud.py b/tests/hipscat/io/file_io/test_file_pointers_cloud.py new file mode 100644 index 0000000..ce887aa --- /dev/null +++ b/tests/hipscat/io/file_io/test_file_pointers_cloud.py @@ -0,0 +1,109 @@ +import os + +from hipscat.io.file_io import ( + directory_has_contents, + does_file_or_directory_exist, + find_files_matching_path, + get_directory_contents, + get_file_pointer_from_path, + is_regular_file, +) + + +def test_file_or_dir_exist(small_sky_dir_cloud, example_cloud_storage_options): + small_sky_pointer = get_file_pointer_from_path(small_sky_dir_cloud) + assert does_file_or_directory_exist(small_sky_pointer, storage_options=example_cloud_storage_options) + catalog_info_string = os.path.join(small_sky_dir_cloud, "catalog_info.json") + catalog_info_pointer = get_file_pointer_from_path(catalog_info_string) + assert does_file_or_directory_exist(catalog_info_pointer, storage_options=example_cloud_storage_options) + + +def test_file_or_dir_exist_false(small_sky_dir_cloud, example_cloud_storage_options): + small_sky_pointer = get_file_pointer_from_path(small_sky_dir_cloud + "incorrect file") + assert not does_file_or_directory_exist(small_sky_pointer, storage_options=example_cloud_storage_options) + + +def test_is_regular_file(small_sky_dir_cloud, example_cloud_storage_options): + partition_info_file = os.path.join(small_sky_dir_cloud, "catalog_info.json") + assert is_regular_file(partition_info_file, storage_options=example_cloud_storage_options) + + assert not is_regular_file(small_sky_dir_cloud, storage_options=example_cloud_storage_options) + + partition_dir = os.path.join(small_sky_dir_cloud, "Norder=0") + assert not is_regular_file(partition_dir, storage_options=example_cloud_storage_options) + + +def test_find_files_matching_path(small_sky_dir_cloud, example_cloud_storage_options): + ## no_wildcard + assert ( + len( + find_files_matching_path( + small_sky_dir_cloud, + "catalog_info.json", + storage_options=example_cloud_storage_options, + ) + ) + == 1 + ) + + ## wilcard in the name + assert ( + len( + find_files_matching_path( + small_sky_dir_cloud, + "*.json", + storage_options=example_cloud_storage_options, + ) + ) + == 1 + ) + + +def test_find_files_matching_path_directory(small_sky_order1_dir_cloud, example_cloud_storage_options): + assert ( + len( + find_files_matching_path( + small_sky_order1_dir_cloud, + storage_options=example_cloud_storage_options, + ) + ) + == 1 + ) + + ## wildcard in directory - will match all files at indicated depth + assert ( + len( + find_files_matching_path( + small_sky_order1_dir_cloud, + "*", + "*", + "*", + storage_options=example_cloud_storage_options, + ) + ) + == 4 + ) + + +def test_directory_has_contents(small_sky_order1_dir_cloud, example_cloud_storage_options): + assert directory_has_contents(small_sky_order1_dir_cloud, storage_options=example_cloud_storage_options) + + +def test_get_directory_contents(small_sky_order1_dir_cloud, example_cloud_storage_options): + small_sky_contents = get_directory_contents( + small_sky_order1_dir_cloud, + include_protocol=True, + storage_options=example_cloud_storage_options, + ) + + expected = [ + "Norder=1", + "_common_metadata", + "_metadata", + "catalog_info.json", + "point_map.fits", + ] + + expected = [os.path.join(small_sky_order1_dir_cloud, file_name) for file_name in expected] + + assert small_sky_contents == expected diff --git a/tests/hipscat/io/test_write_metadata_cloud.py b/tests/hipscat/io/test_write_metadata_cloud.py new file mode 100644 index 0000000..bcf540a --- /dev/null +++ b/tests/hipscat/io/test_write_metadata_cloud.py @@ -0,0 +1,207 @@ +"""Tests of file IO (reads and writes)""" + +import os + +import hipscat.io.write_metadata as io +import hipscat.pixel_math as hist +import numpy.testing as npt +import pyarrow as pa +import pytest +from hipscat.catalog.catalog_info import CatalogInfo +from hipscat.io import file_io +from hipscat.io.parquet_metadata import write_parquet_metadata + +from hipscat_cloudtests import TempCloudDirectory, assert_text_file_matches + + +@pytest.fixture +def basic_catalog_parquet_metadata(): + return pa.schema( + [ + pa.field("id", pa.int64()), + pa.field("ra", pa.float64()), + pa.field("dec", pa.float64()), + pa.field("ra_error", pa.int64()), + pa.field("dec_error", pa.int64()), + pa.field("__index_level_0__", pa.int64()), + ] + ) + + +@pytest.fixture +def catalog_info_data() -> dict: + return { + "catalog_name": "test_name", + "catalog_type": "object", + "total_rows": 10, + "epoch": "J2000", + "ra_column": "ra", + "dec_column": "dec", + } + + +@pytest.fixture +def catalog_info(catalog_info_data) -> CatalogInfo: + return CatalogInfo(**catalog_info_data) + + +def test_write_catalog_info(tmp_dir_cloud, catalog_info, example_cloud_storage_options): + """Test that we accurately write out catalog metadata""" + with TempCloudDirectory(tmp_dir_cloud, "write_catalog_info", example_cloud_storage_options) as temp_path: + catalog_base_dir = temp_path + expected_lines = [ + "{", + ' "catalog_name": "test_name",', + ' "catalog_type": "object",', + ' "total_rows": 10,', + ' "epoch": "J2000",', + ' "ra_column": "ra",', + ' "dec_column": "dec"', + "}", + ] + + io.write_catalog_info( + dataset_info=catalog_info, + catalog_base_dir=catalog_base_dir, + storage_options=example_cloud_storage_options, + ) + metadata_filename = os.path.join(catalog_base_dir, "catalog_info.json") + assert_text_file_matches( + expected_lines, metadata_filename, storage_options=example_cloud_storage_options + ) + + +def test_write_provenance_info(tmp_dir_cloud, catalog_info, example_cloud_storage_options): + """Test that we accurately write out tool-provided generation metadata""" + with TempCloudDirectory( + tmp_dir_cloud, "write_provenance_info", example_cloud_storage_options + ) as temp_path: + catalog_base_dir = temp_path + expected_lines = [ + "{", + ' "catalog_name": "test_name",', + ' "catalog_type": "object",', + ' "total_rows": 10,', + ' "epoch": "J2000",', + ' "ra_column": "ra",', + ' "dec_column": "dec",', + r' "version": ".*",', # version matches digits + r' "generation_date": "[.\d]+",', # date matches date format + ' "tool_args": {', + ' "tool_name": "hipscat-import",', + ' "tool_version": "1.0.0",', + r' "input_file_names": \[', + ' "file1",', + ' "file2",', + ' "file3"', + " ]", + " }", + "}", + ] + + tool_args = { + "tool_name": "hipscat-import", + "tool_version": "1.0.0", + "input_file_names": ["file1", "file2", "file3"], + } + + io.write_provenance_info( + catalog_base_dir=catalog_base_dir, + dataset_info=catalog_info, + tool_args=tool_args, + storage_options=example_cloud_storage_options, + ) + metadata_filename = os.path.join(catalog_base_dir, "provenance_info.json") + assert_text_file_matches( + expected_lines, metadata_filename, storage_options=example_cloud_storage_options + ) + + +def test_write_parquet_metadata( + tmp_dir_cloud, + small_sky_dir_cloud, + basic_catalog_parquet_metadata, + example_cloud_storage_options, +): + """Use existing catalog parquet files and create new metadata files for it""" + with TempCloudDirectory( + tmp_dir_cloud, "write_parquet_metadata", example_cloud_storage_options + ) as temp_path: + catalog_base_dir = temp_path + + write_parquet_metadata( + catalog_path=small_sky_dir_cloud, + storage_options=example_cloud_storage_options, + output_path=catalog_base_dir, + ) + + check_parquet_schema( + os.path.join(catalog_base_dir, "_metadata"), + basic_catalog_parquet_metadata, + storage_options=example_cloud_storage_options, + ) + ## _common_metadata has 0 row groups + check_parquet_schema( + os.path.join(catalog_base_dir, "_common_metadata"), + basic_catalog_parquet_metadata, + 0, + storage_options=example_cloud_storage_options, + ) + + ## Re-write - should still have the same properties. + write_parquet_metadata( + catalog_path=small_sky_dir_cloud, + storage_options=example_cloud_storage_options, + output_path=catalog_base_dir, + ) + check_parquet_schema( + os.path.join(catalog_base_dir, "_metadata"), + basic_catalog_parquet_metadata, + storage_options=example_cloud_storage_options, + ) + ## _common_metadata has 0 row groups + check_parquet_schema( + os.path.join(catalog_base_dir, "_common_metadata"), + basic_catalog_parquet_metadata, + 0, + storage_options=example_cloud_storage_options, + ) + + +def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1, storage_options: dict = None): + """Check parquet schema against expectations""" + assert file_io.does_file_or_directory_exist(file_name, storage_options=storage_options) + + single_metadata = file_io.read_parquet_metadata(file_name, storage_options=storage_options) + schema = single_metadata.schema.to_arrow_schema() + + assert len(schema) == len( + expected_schema + ), f"object list not the same size ({len(schema)} vs {len(expected_schema)})" + + npt.assert_array_equal(schema.names, expected_schema.names) + + assert schema.equals(expected_schema, check_metadata=False) + + parquet_file = file_io.read_parquet_file(file_pointer=file_name, storage_options=storage_options) + assert parquet_file.metadata.num_row_groups == expected_num_row_groups + + for row_index in range(0, parquet_file.metadata.num_row_groups): + row_md = parquet_file.metadata.row_group(row_index) + for column_index in range(0, row_md.num_columns): + column_metadata = row_md.column(column_index) + assert column_metadata.file_path.endswith(".parquet") + + +def test_read_write_fits_point_map(tmp_dir_cloud, example_cloud_storage_options): + """Check that we write and can read a FITS file for spatial distribution.""" + with TempCloudDirectory(tmp_dir_cloud, "write_fits", example_cloud_storage_options) as temp_path: + initial_histogram = hist.empty_histogram(1) + filled_pixels = [51, 29, 51, 0] + initial_histogram[44:] = filled_pixels[:] + io.write_fits_map(temp_path, initial_histogram, storage_options=example_cloud_storage_options) + + output_file = os.path.join(temp_path, "point_map.fits") + + output = file_io.read_fits_image(output_file, storage_options=example_cloud_storage_options) + npt.assert_array_equal(output, initial_histogram) diff --git a/tests/lsdb/catalog/test_cone_search.py b/tests/lsdb/catalog/test_cone_search.py new file mode 100644 index 0000000..f162c2a --- /dev/null +++ b/tests/lsdb/catalog/test_cone_search.py @@ -0,0 +1,31 @@ +from astropy.coordinates import SkyCoord + + +def test_cone_search_filters_correct_points(small_sky_order1_catalog_cloud): + ra = 0 + dec = -80 + radius = 20 + center_coord = SkyCoord(ra, dec, unit="deg") + cone_search_catalog = small_sky_order1_catalog_cloud.cone_search(ra, dec, radius).compute() + print(len(cone_search_catalog)) + for _, row in small_sky_order1_catalog_cloud.compute().iterrows(): + row_ra = row[small_sky_order1_catalog_cloud.hc_structure.catalog_info.ra_column] + row_dec = row[small_sky_order1_catalog_cloud.hc_structure.catalog_info.dec_column] + sep = SkyCoord(row_ra, row_dec, unit="deg").separation(center_coord) + if sep.degree <= radius: + assert len(cone_search_catalog.loc[cone_search_catalog["id"] == row["id"]]) == 1 + else: + assert len(cone_search_catalog.loc[cone_search_catalog["id"] == row["id"]]) == 0 + + +def test_cone_search_filters_partitions(small_sky_order1_catalog_cloud): + ra = 0 + dec = -80 + radius = 20 + hc_conesearch = small_sky_order1_catalog_cloud.hc_structure.filter_by_cone(ra, dec, radius) + consearch_catalog = small_sky_order1_catalog_cloud.cone_search(ra, dec, radius) + assert len(hc_conesearch.get_healpix_pixels()) == len(consearch_catalog.get_healpix_pixels()) + assert len(hc_conesearch.get_healpix_pixels()) == consearch_catalog._ddf.npartitions + print(hc_conesearch.get_healpix_pixels()) + for pixel in hc_conesearch.get_healpix_pixels(): + assert pixel in consearch_catalog._ddf_pixel_map diff --git a/tests/lsdb/catalog/test_crossmatch.py b/tests/lsdb/catalog/test_crossmatch.py new file mode 100644 index 0000000..48d335c --- /dev/null +++ b/tests/lsdb/catalog/test_crossmatch.py @@ -0,0 +1,11 @@ +import pytest + + +def test_kdtree_crossmatch(small_sky_catalog_cloud, small_sky_xmatch_catalog_cloud, xmatch_correct_cloud): + xmatched = small_sky_catalog_cloud.crossmatch(small_sky_xmatch_catalog_cloud).compute() + assert len(xmatched) == len(xmatch_correct_cloud) + for _, correct_row in xmatch_correct_cloud.iterrows(): + assert correct_row["ss_id"] in xmatched["id_small_sky"].values + xmatch_row = xmatched[xmatched["id_small_sky"] == correct_row["ss_id"]] + assert xmatch_row["id_small_sky_xmatch"].values == correct_row["xmatch_id"] + assert xmatch_row["_DIST"].values == pytest.approx(correct_row["dist"]) diff --git a/tests/lsdb/conftest.py b/tests/lsdb/conftest.py new file mode 100644 index 0000000..d0900d0 --- /dev/null +++ b/tests/lsdb/conftest.py @@ -0,0 +1,61 @@ +import os + +import hipscat as hc +import lsdb +import pytest +from hipscat.io.file_io import file_io + +SMALL_SKY_DIR_NAME = "small_sky" +SMALL_SKY_XMATCH_NAME = "small_sky_xmatch" +SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" +XMATCH_CORRECT_FILE = "xmatch_correct.csv" +XMATCH_CORRECT_005_FILE = "xmatch_correct_0_005.csv" +XMATCH_MOCK_FILE = "xmatch_mock.csv" + + +@pytest.fixture +def test_data_dir_cloud(example_cloud_path): + return os.path.join(example_cloud_path, "lsdb", "data") + + +@pytest.fixture +def small_sky_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME) + + +@pytest.fixture +def small_sky_xmatch_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_XMATCH_NAME) + + +@pytest.fixture +def small_sky_order1_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME) + + +@pytest.fixture +def small_sky_catalog_cloud(small_sky_dir_cloud, example_cloud_storage_options): + return lsdb.read_hipscat(small_sky_dir_cloud, storage_options=example_cloud_storage_options) + + +@pytest.fixture +def small_sky_xmatch_catalog_cloud(small_sky_xmatch_dir_cloud, example_cloud_storage_options): + return lsdb.read_hipscat(small_sky_xmatch_dir_cloud, storage_options=example_cloud_storage_options) + + +@pytest.fixture +def small_sky_order1_hipscat_catalog_cloud(small_sky_order1_dir_cloud, example_cloud_storage_options): + return hc.catalog.Catalog.read_from_hipscat( + small_sky_order1_dir_cloud, storage_options=example_cloud_storage_options + ) + + +@pytest.fixture +def small_sky_order1_catalog_cloud(small_sky_order1_dir_cloud, example_cloud_storage_options): + return lsdb.read_hipscat(small_sky_order1_dir_cloud, storage_options=example_cloud_storage_options) + + +@pytest.fixture +def xmatch_correct_cloud(small_sky_xmatch_dir_cloud, example_cloud_storage_options): + pathway = os.path.join(small_sky_xmatch_dir_cloud, XMATCH_CORRECT_FILE) + return file_io.load_csv_to_pandas(pathway, storage_options=example_cloud_storage_options) diff --git a/tests/lsdb/loaders/hipscat/test_read_hipscat.py b/tests/lsdb/loaders/hipscat/test_read_hipscat.py new file mode 100644 index 0000000..1325fb6 --- /dev/null +++ b/tests/lsdb/loaders/hipscat/test_read_hipscat.py @@ -0,0 +1,16 @@ +import lsdb + + +def test_read_hipscat( + small_sky_order1_dir_cloud, + small_sky_order1_hipscat_catalog_cloud, + example_cloud_storage_options, +): + catalog = lsdb.read_hipscat(small_sky_order1_dir_cloud, storage_options=example_cloud_storage_options) + assert isinstance(catalog, lsdb.Catalog) + assert catalog.hc_structure.catalog_base_dir == small_sky_order1_hipscat_catalog_cloud.catalog_base_dir + assert catalog.get_healpix_pixels() == small_sky_order1_hipscat_catalog_cloud.get_healpix_pixels() + + catalog = lsdb.read_hipscat(small_sky_order1_dir_cloud, storage_options=example_cloud_storage_options) + for healpix_pixel in small_sky_order1_hipscat_catalog_cloud.get_healpix_pixels(): + catalog.get_partition(healpix_pixel.order, healpix_pixel.pixel)