From 0c500fc8cc02adac2273f97c3f1029b5bad5dceb Mon Sep 17 00:00:00 2001
From: jthielen <jon@thielen.science>
Date: Tue, 8 Mar 2022 07:00:49 -0700
Subject: [PATCH] Add calculation of essential diagnostic variables to
 postprocess (#59)

Co-authored-by: Anderson Banihirwe <axbanihirwe@ualr.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 tests/test_accessors.py   | 11 +++++++++
 tests/test_postprocess.py | 52 +++++++++++++++++++++++++++++++++++++++
 xwrf/accessors.py         | 25 ++++++++++++++++++-
 xwrf/postprocess.py       | 47 +++++++++++++++++++++++++++++++++++
 4 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/tests/test_accessors.py b/tests/test_accessors.py
index e35c808f..e5ca65cf 100644
--- a/tests/test_accessors.py
+++ b/tests/test_accessors.py
@@ -48,3 +48,14 @@ def test_postprocess(name, cf_grid_mapping_name):
     assert ds['XLAT'].shape == ds['XLONG'].shape == (29, 31)
     assert ds['XLAT_U'].shape == ds['XLONG_U'].shape == (29, 32)
     assert ds['XLAT_V'].shape == ds['XLONG_V'].shape == (30, 31)
+
+    # Check for diagnostic variable calculation
+    assert 'potential_temperature' in ds.data_vars
+    assert 'air_pressure' in ds.data_vars
+    assert 'geopotential' in ds.data_vars
+    assert 'geopotential_height' in ds.data_vars
+    assert 'T' not in ds.data_vars
+    assert 'P' not in ds.data_vars
+    assert 'PB' not in ds.data_vars
+    assert 'PH' not in ds.data_vars
+    assert 'PHB' not in ds.data_vars
diff --git a/tests/test_postprocess.py b/tests/test_postprocess.py
index e68d4815..a090ab85 100644
--- a/tests/test_postprocess.py
+++ b/tests/test_postprocess.py
@@ -110,3 +110,55 @@ def test_assign_coord_to_dim_of_different_name_keyerror(sample_dataset):
 def test_clean_brackets_from_units(sample_dataset, variable, bracket):
     ds = sample_dataset.pipe(xwrf.postprocess._clean_brackets_from_units)
     assert bracket not in ds[variable].attrs['units']
+
+
+@pytest.mark.parametrize('sample_dataset', ['lambert_conformal'], indirect=True)
+def test_calc_base_diagnostics(sample_dataset):
+    subset = sample_dataset[['T', 'P', 'PB', 'PH', 'PHB']].isel(Time=0).load()
+    ds_dropped = subset.copy().pipe(xwrf.postprocess._calc_base_diagnostics)
+    ds_undropped = subset.copy().pipe(xwrf.postprocess._calc_base_diagnostics, drop=False)
+
+    for ds in (ds_dropped, ds_undropped):
+        # Potential temperature
+        np.testing.assert_allclose(ds['air_potential_temperature'].max().item(), 508.78415)
+        np.testing.assert_allclose(ds['air_potential_temperature'].min().item(), 270.265106)
+        assert ds['air_potential_temperature'].attrs['units'] == 'K'
+        assert ds['air_potential_temperature'].attrs['standard_name'] == 'air_potential_temperature'
+
+        # Air pressure
+        np.testing.assert_allclose(ds['air_pressure'].max().item(), 101824.89)
+        np.testing.assert_allclose(ds['air_pressure'].min().item(), 5269.97)
+        assert ds['air_pressure'].attrs['units'] == 'Pa'
+        assert ds['air_pressure'].attrs['standard_name'] == 'air_pressure'
+
+        # Geopotential
+        np.testing.assert_allclose(ds['geopotential'].max().item(), 196788.02)
+        np.testing.assert_allclose(ds['geopotential'].min().item(), 0.0)
+        assert ds['geopotential'].attrs['units'] == 'm**2 s**-2'
+        assert ds['geopotential'].attrs['standard_name'] == 'geopotential'
+
+        # Geopotential height
+        np.testing.assert_allclose(ds['geopotential_height'].max().item(), 20059.941)
+        np.testing.assert_allclose(ds['geopotential_height'].min().item(), 0.0)
+        assert ds['geopotential_height'].attrs['units'] == 'm'
+        assert ds['geopotential_height'].attrs['standard_name'] == 'geopotential_height'
+
+    # Check dropped or not
+    assert 'T' not in ds_dropped
+    assert 'P' not in ds_dropped
+    assert 'PB' not in ds_dropped
+    assert 'PH' not in ds_dropped
+    assert 'PHB' not in ds_dropped
+    assert 'T' in ds_undropped
+    assert 'P' in ds_undropped
+    assert 'PB' in ds_undropped
+    assert 'PH' in ds_undropped
+    assert 'PHB' in ds_undropped
+
+
+@pytest.mark.parametrize(
+    'sample_dataset', ['dummy', 'polar_stereographic_1', 'tiny', 'met_em_sample'], indirect=True
+)
+def test_calc_base_diagnostics_skipping(sample_dataset):
+    ds = sample_dataset.pipe(xwrf.postprocess._calc_base_diagnostics)
+    xr.testing.assert_identical(sample_dataset, ds)
diff --git a/xwrf/accessors.py b/xwrf/accessors.py
index 3686e6d7..052f5b6b 100644
--- a/xwrf/accessors.py
+++ b/xwrf/accessors.py
@@ -4,6 +4,7 @@
 
 from .postprocess import (
     _assign_coord_to_dim_of_different_name,
+    _calc_base_diagnostics,
     _collapse_time_dim,
     _decode_times,
     _include_projection_coordinates,
@@ -31,10 +32,30 @@ class WRFDataArrayAccessor(WRFAccessor):
 class WRFDatasetAccessor(WRFAccessor):
     """Adds a number of WRF specific methods to xarray.Dataset objects."""
 
-    def postprocess(self, decode_times=True) -> xr.Dataset:
+    def postprocess(
+        self,
+        decode_times: bool = True,
+        calculate_diagnostic_variables: bool = True,
+        drop_diagnostic_variable_components: bool = True,
+    ) -> xr.Dataset:
         """
         Postprocess the dataset.
 
+        Parameters
+        ----------
+        decode_times : bool, optional
+            Decode the string-like wrfout times to xarray-friendly Pandas types. Defaults to True.
+        calculate_diagnostic_variables : bool, optional
+            Calculate four essential diagnostic variables (potential temperature, air pressure,
+            geopotential, and geopotential height) that are otherwise only present in wrfout files
+            as split components or dependent upon special adjustments. Defaults to True. If the
+            underlying fields on which any of these calculated fields depends is missing, that
+            calculated variable is skipped. These will be eagerly evalulated, unless your data has
+            been chunked with Dask, in which case these fields will also be Dask arrays.
+        drop_diagnostic_variable_components : bool, optional
+            Determine whether to drop the underlying fields used to calculate the diagnostic
+            variables. Defaults to True.
+
         Returns
         -------
         xarray.Dataset
@@ -49,5 +70,7 @@ def postprocess(self, decode_times=True) -> xr.Dataset:
         )
         if decode_times:
             ds = ds.pipe(_decode_times)
+        if calculate_diagnostic_variables:
+            ds = ds.pipe(_calc_base_diagnostics, drop=drop_diagnostic_variable_components)
 
         return ds.pipe(_rename_dims)
diff --git a/xwrf/postprocess.py b/xwrf/postprocess.py
index e95d91da..1df8cce2 100644
--- a/xwrf/postprocess.py
+++ b/xwrf/postprocess.py
@@ -142,3 +142,50 @@ def _rename_dims(ds: xr.Dataset) -> xr.Dataset:
     """Rename dims for more consistent semantics."""
     rename_dim_map = {k: v for k, v in config.get('rename_dim_map').items() if k in ds.dims}
     return ds.rename(rename_dim_map)
+
+
+def _calc_base_diagnostics(ds: xr.Dataset, drop: bool = True) -> xr.Dataset:
+    """Calculate the four basic fields that WRF does not have in physically meaningful form.
+
+    Parameters
+    ----------
+    dataset : xarray.Dataset
+        Dataset representing WRF data opened via normal backend, with chunking.
+    drop : bool
+        Decide whether to drop the components of origin after creating the diagnostic fields from
+        them.
+
+    Notes
+    -----
+    This operation should be called before destaggering.
+    """
+    # Potential temperature
+    if 'T' in ds.data_vars:
+        ds['air_potential_temperature'] = ds['T'] + 300
+        ds['air_potential_temperature'].attrs = {
+            'units': 'K',
+            'standard_name': 'air_potential_temperature',
+        }
+        if drop:
+            del ds['T']
+
+    # Pressure
+    if 'P' in ds.data_vars and 'PB' in ds.data_vars:
+        ds['air_pressure'] = ds['P'] + ds['PB']
+        ds['air_pressure'].attrs = {
+            'units': ds['P'].attrs.get('units', 'Pa'),
+            'standard_name': 'air_pressure',
+        }
+        if drop:
+            del ds['P'], ds['PB']
+
+    # Geopotential and geopotential height
+    if 'PH' in ds.data_vars and 'PHB' in ds.data_vars:
+        ds['geopotential'] = ds['PH'] + ds['PHB']
+        ds['geopotential'].attrs = {'units': 'm**2 s**-2', 'standard_name': 'geopotential'}
+        ds['geopotential_height'] = ds['geopotential'] / 9.81
+        ds['geopotential_height'].attrs = {'units': 'm', 'standard_name': 'geopotential_height'}
+        if drop:
+            del ds['PH'], ds['PHB']
+
+    return ds