Skip to content

Commit

Permalink
Merge pull request #33 from astronomy-commons/fix-smoke-tests
Browse files Browse the repository at this point in the history
Address smoke test failures
  • Loading branch information
camposandro authored Jul 31, 2024
2 parents 73ac873 + f67c412 commit 222d1ed
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 7 deletions.
5 changes: 3 additions & 2 deletions src/hipscat_cloudtests/file_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy.testing as npt
import pandas as pd
import pyarrow as pa
from hipscat.io.file_io.file_io import load_text_file
from hipscat.io.file_io.file_pointer import does_file_or_directory_exist

Expand Down Expand Up @@ -40,7 +41,7 @@ def assert_text_file_matches(expected_lines, file_name, storage_options: dict =


def assert_parquet_file_ids(
file_name, id_column, expected_ids, resort_ids=True, storage_options: dict = None
file_name, id_column, schema: pa.Schema, expected_ids, resort_ids=True, storage_options: dict = None
):
"""
Convenience method to read a parquet file and compare the object IDs to
Expand All @@ -54,7 +55,7 @@ def assert_parquet_file_ids(
is the same between the read IDs and expected_ids
storage_options (dict): dictionary of filesystem storage options
"""
data_frame = pd.read_parquet(file_name, engine="pyarrow", storage_options=storage_options)
data_frame = pd.read_parquet(file_name, engine="pyarrow", schema=schema, storage_options=storage_options)
assert id_column in data_frame.columns
ids = data_frame[id_column].tolist()
if resort_ids:
Expand Down
9 changes: 7 additions & 2 deletions tests/hipscat/io/file_io/test_file_io_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,16 @@ def test_load_json(small_sky_dir_local, small_sky_dir_cloud, storage_options):
assert json_dict_cloud == json_dict_local


def test_load_parquet_to_pandas(small_sky_dir_local, small_sky_dir_cloud, storage_options):
def test_load_parquet_to_pandas(
small_sky_catalog_cloud, small_sky_dir_local, small_sky_dir_cloud, storage_options
):
pixel_data_path = pixel_catalog_file(small_sky_dir_local, 0, 11)
pixel_data_path_cloud = pixel_catalog_file(small_sky_dir_cloud, 0, 11)
parquet_df = pd.read_parquet(pixel_data_path)
loaded_df = load_parquet_to_pandas(pixel_data_path_cloud, storage_options=storage_options)
catalog_schema = small_sky_catalog_cloud.hc_structure.schema
loaded_df = load_parquet_to_pandas(
pixel_data_path_cloud, schema=catalog_schema, storage_options=storage_options
)
pd.testing.assert_frame_equal(parquet_df, loaded_df)


Expand Down
5 changes: 4 additions & 1 deletion tests/hipscat/io/test_write_metadata_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import hipscat.pixel_math as hist
import numpy.testing as npt
import pyarrow as pa
import pyarrow.parquet as pq
import pytest
from hipscat.catalog.catalog_info import CatalogInfo
from hipscat.io import file_io
from hipscat.io.file_io.file_pointer import get_fs
from hipscat.io.parquet_metadata import write_parquet_metadata

from hipscat_cloudtests import assert_text_file_matches
Expand Down Expand Up @@ -175,7 +177,8 @@ def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1,

assert schema.equals(expected_schema, check_metadata=False)

parquet_file = file_io.read_parquet_file(file_pointer=file_name, storage_options=storage_options)
file_system, file_pointer = get_fs(file_name, storage_options=storage_options)
parquet_file = pq.ParquetFile(file_pointer, filesystem=file_system)
assert parquet_file.metadata.num_row_groups == expected_num_row_groups

for row_index in range(0, parquet_file.metadata.num_row_groups):
Expand Down
4 changes: 2 additions & 2 deletions tests/hipscat_import/test_run_catalog_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_catalog_import_write_to_cloud(
output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet")

expected_ids = [*range(700, 831)]
assert_parquet_file_ids(output_file, "id", expected_ids, storage_options=storage_options)
assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids, storage_options=storage_options)


@pytest.mark.dask
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_catalog_import_read_from_cloud(
output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet")

expected_ids = [*range(700, 831)]
assert_parquet_file_ids(output_file, "id", expected_ids)
assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids)


def test_read_csv_cloud(storage_options, small_sky_parts_dir_cloud):
Expand Down

0 comments on commit 222d1ed

Please sign in to comment.