From 0c500fc8cc02adac2273f97c3f1029b5bad5dceb Mon Sep 17 00:00:00 2001 From: jthielen Date: Tue, 8 Mar 2022 07:00:49 -0700 Subject: [PATCH] Add calculation of essential diagnostic variables to postprocess (#59) Co-authored-by: Anderson Banihirwe Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- tests/test_accessors.py | 11 +++++++++ tests/test_postprocess.py | 52 +++++++++++++++++++++++++++++++++++++++ xwrf/accessors.py | 25 ++++++++++++++++++- xwrf/postprocess.py | 47 +++++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 1 deletion(-) diff --git a/tests/test_accessors.py b/tests/test_accessors.py index e35c808f..e5ca65cf 100644 --- a/tests/test_accessors.py +++ b/tests/test_accessors.py @@ -48,3 +48,14 @@ def test_postprocess(name, cf_grid_mapping_name): assert ds['XLAT'].shape == ds['XLONG'].shape == (29, 31) assert ds['XLAT_U'].shape == ds['XLONG_U'].shape == (29, 32) assert ds['XLAT_V'].shape == ds['XLONG_V'].shape == (30, 31) + + # Check for diagnostic variable calculation + assert 'potential_temperature' in ds.data_vars + assert 'air_pressure' in ds.data_vars + assert 'geopotential' in ds.data_vars + assert 'geopotential_height' in ds.data_vars + assert 'T' not in ds.data_vars + assert 'P' not in ds.data_vars + assert 'PB' not in ds.data_vars + assert 'PH' not in ds.data_vars + assert 'PHB' not in ds.data_vars diff --git a/tests/test_postprocess.py b/tests/test_postprocess.py index e68d4815..a090ab85 100644 --- a/tests/test_postprocess.py +++ b/tests/test_postprocess.py @@ -110,3 +110,55 @@ def test_assign_coord_to_dim_of_different_name_keyerror(sample_dataset): def test_clean_brackets_from_units(sample_dataset, variable, bracket): ds = sample_dataset.pipe(xwrf.postprocess._clean_brackets_from_units) assert bracket not in ds[variable].attrs['units'] + + +@pytest.mark.parametrize('sample_dataset', ['lambert_conformal'], indirect=True) +def test_calc_base_diagnostics(sample_dataset): + subset = sample_dataset[['T', 'P', 'PB', 'PH', 'PHB']].isel(Time=0).load() + ds_dropped = subset.copy().pipe(xwrf.postprocess._calc_base_diagnostics) + ds_undropped = subset.copy().pipe(xwrf.postprocess._calc_base_diagnostics, drop=False) + + for ds in (ds_dropped, ds_undropped): + # Potential temperature + np.testing.assert_allclose(ds['air_potential_temperature'].max().item(), 508.78415) + np.testing.assert_allclose(ds['air_potential_temperature'].min().item(), 270.265106) + assert ds['air_potential_temperature'].attrs['units'] == 'K' + assert ds['air_potential_temperature'].attrs['standard_name'] == 'air_potential_temperature' + + # Air pressure + np.testing.assert_allclose(ds['air_pressure'].max().item(), 101824.89) + np.testing.assert_allclose(ds['air_pressure'].min().item(), 5269.97) + assert ds['air_pressure'].attrs['units'] == 'Pa' + assert ds['air_pressure'].attrs['standard_name'] == 'air_pressure' + + # Geopotential + np.testing.assert_allclose(ds['geopotential'].max().item(), 196788.02) + np.testing.assert_allclose(ds['geopotential'].min().item(), 0.0) + assert ds['geopotential'].attrs['units'] == 'm**2 s**-2' + assert ds['geopotential'].attrs['standard_name'] == 'geopotential' + + # Geopotential height + np.testing.assert_allclose(ds['geopotential_height'].max().item(), 20059.941) + np.testing.assert_allclose(ds['geopotential_height'].min().item(), 0.0) + assert ds['geopotential_height'].attrs['units'] == 'm' + assert ds['geopotential_height'].attrs['standard_name'] == 'geopotential_height' + + # Check dropped or not + assert 'T' not in ds_dropped + assert 'P' not in ds_dropped + assert 'PB' not in ds_dropped + assert 'PH' not in ds_dropped + assert 'PHB' not in ds_dropped + assert 'T' in ds_undropped + assert 'P' in ds_undropped + assert 'PB' in ds_undropped + assert 'PH' in ds_undropped + assert 'PHB' in ds_undropped + + +@pytest.mark.parametrize( + 'sample_dataset', ['dummy', 'polar_stereographic_1', 'tiny', 'met_em_sample'], indirect=True +) +def test_calc_base_diagnostics_skipping(sample_dataset): + ds = sample_dataset.pipe(xwrf.postprocess._calc_base_diagnostics) + xr.testing.assert_identical(sample_dataset, ds) diff --git a/xwrf/accessors.py b/xwrf/accessors.py index 3686e6d7..052f5b6b 100644 --- a/xwrf/accessors.py +++ b/xwrf/accessors.py @@ -4,6 +4,7 @@ from .postprocess import ( _assign_coord_to_dim_of_different_name, + _calc_base_diagnostics, _collapse_time_dim, _decode_times, _include_projection_coordinates, @@ -31,10 +32,30 @@ class WRFDataArrayAccessor(WRFAccessor): class WRFDatasetAccessor(WRFAccessor): """Adds a number of WRF specific methods to xarray.Dataset objects.""" - def postprocess(self, decode_times=True) -> xr.Dataset: + def postprocess( + self, + decode_times: bool = True, + calculate_diagnostic_variables: bool = True, + drop_diagnostic_variable_components: bool = True, + ) -> xr.Dataset: """ Postprocess the dataset. + Parameters + ---------- + decode_times : bool, optional + Decode the string-like wrfout times to xarray-friendly Pandas types. Defaults to True. + calculate_diagnostic_variables : bool, optional + Calculate four essential diagnostic variables (potential temperature, air pressure, + geopotential, and geopotential height) that are otherwise only present in wrfout files + as split components or dependent upon special adjustments. Defaults to True. If the + underlying fields on which any of these calculated fields depends is missing, that + calculated variable is skipped. These will be eagerly evalulated, unless your data has + been chunked with Dask, in which case these fields will also be Dask arrays. + drop_diagnostic_variable_components : bool, optional + Determine whether to drop the underlying fields used to calculate the diagnostic + variables. Defaults to True. + Returns ------- xarray.Dataset @@ -49,5 +70,7 @@ def postprocess(self, decode_times=True) -> xr.Dataset: ) if decode_times: ds = ds.pipe(_decode_times) + if calculate_diagnostic_variables: + ds = ds.pipe(_calc_base_diagnostics, drop=drop_diagnostic_variable_components) return ds.pipe(_rename_dims) diff --git a/xwrf/postprocess.py b/xwrf/postprocess.py index e95d91da..1df8cce2 100644 --- a/xwrf/postprocess.py +++ b/xwrf/postprocess.py @@ -142,3 +142,50 @@ def _rename_dims(ds: xr.Dataset) -> xr.Dataset: """Rename dims for more consistent semantics.""" rename_dim_map = {k: v for k, v in config.get('rename_dim_map').items() if k in ds.dims} return ds.rename(rename_dim_map) + + +def _calc_base_diagnostics(ds: xr.Dataset, drop: bool = True) -> xr.Dataset: + """Calculate the four basic fields that WRF does not have in physically meaningful form. + + Parameters + ---------- + dataset : xarray.Dataset + Dataset representing WRF data opened via normal backend, with chunking. + drop : bool + Decide whether to drop the components of origin after creating the diagnostic fields from + them. + + Notes + ----- + This operation should be called before destaggering. + """ + # Potential temperature + if 'T' in ds.data_vars: + ds['air_potential_temperature'] = ds['T'] + 300 + ds['air_potential_temperature'].attrs = { + 'units': 'K', + 'standard_name': 'air_potential_temperature', + } + if drop: + del ds['T'] + + # Pressure + if 'P' in ds.data_vars and 'PB' in ds.data_vars: + ds['air_pressure'] = ds['P'] + ds['PB'] + ds['air_pressure'].attrs = { + 'units': ds['P'].attrs.get('units', 'Pa'), + 'standard_name': 'air_pressure', + } + if drop: + del ds['P'], ds['PB'] + + # Geopotential and geopotential height + if 'PH' in ds.data_vars and 'PHB' in ds.data_vars: + ds['geopotential'] = ds['PH'] + ds['PHB'] + ds['geopotential'].attrs = {'units': 'm**2 s**-2', 'standard_name': 'geopotential'} + ds['geopotential_height'] = ds['geopotential'] / 9.81 + ds['geopotential_height'].attrs = {'units': 'm', 'standard_name': 'geopotential_height'} + if drop: + del ds['PH'], ds['PHB'] + + return ds