From eeda03304f2eb9b8b7260bb5954231fa625a62de Mon Sep 17 00:00:00 2001
From: Patrick Schmitt <patrick.schmitt@uibk.ac.at>
Date: Thu, 26 Sep 2024 09:04:02 +0200
Subject: [PATCH] add compute_fl_diagnostics_quantiles with tests

---
 oggm/core/flowline.py     | 77 +++++++++++++++++++++++++++++++++++++++
 oggm/tasks.py             |  1 +
 oggm/tests/test_models.py | 74 +++++++++++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+)

diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py
index f84c4d433..1cfd5bfff 100644
--- a/oggm/core/flowline.py
+++ b/oggm/core/flowline.py
@@ -4592,3 +4592,80 @@ def clean_merged_flowlines(gdir, buffer=None):
 
     # Finally write the flowlines
     gdir.write_pickle(allfls, 'model_flowlines')
+
+
+@entity_task(log)
+def compute_fl_diagnostics_quantiles(gdir,
+                                     input_filesuffixes,
+                                     quantiles=0.5,
+                                     output_filesuffix='_median',
+                                    ):
+    """Compute quantile fl_diagnostics (e.g. median flowline of projections)
+
+    This function takes a number of fl_diagnostic files and compute out of them
+    quantiles. This could be useful for example for calculating a median
+    flowline for different gcm projections which use the same scenario
+    (e.g. ssp126).
+
+    Parameters
+    ----------
+    gdir : :py:class:`oggm.GlacierDirectory`
+        the glacier directory to process
+    input_filesuffixes : list
+        a list of fl_diagnostic filesuffixes which should be considered for
+        computing
+    quantiles : float or list
+        The quantiles to compute. Could be a float for a single quantile or a
+        list of floats for severel quantile calculations.
+        Default is 0.5 (the median).
+    output_filesuffix : str
+        The output_filesuffix of the resulting fl_diagnostics.
+        Default is '_median'.
+    """
+
+    # if quantiles is a list with one element, only use this
+    if isinstance(quantiles, list):
+        if len(quantiles) == 1:
+            quantiles = quantiles[0]
+
+    # get filepath of all fl_diagnostic files
+    all_fl_diag_paths= []
+    for filesuffix in input_filesuffixes:
+        all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics',
+                                                   filesuffix=filesuffix))
+
+    # assume all have the same number of flowlines, extract the base structure
+    # and number of fl_ids from the first file
+    with xr.open_dataset(all_fl_diag_paths[0]) as ds:
+        ds_base = ds.load()
+        fl_ids = ds.flowlines.data
+
+    # help function to open all fl_diag files as one with open_mfdataset
+    def add_filename_coord(ds):
+        filepath = ds.encoding["source"]
+        filename = os.path.basename(filepath).split(".")[0]
+        return ds.expand_dims({'filename': [filename]})
+
+    # now loop through all flowlines and save back in the same structure
+    fl_diag_dss = []
+    for fl_id in fl_ids:
+        with xr.open_mfdataset(all_fl_diag_paths,
+                               preprocess=lambda ds: add_filename_coord(ds),
+                               group=f'fl_{fl_id}') as ds_fl:
+            with warnings.catch_warnings():
+                # ignore warnings for NaNs
+                warnings.simplefilter("ignore", category=RuntimeWarning)
+                fl_diag_dss.append(ds_fl.load().quantile(quantiles,
+                                                         dim='filename'))
+
+    # for writing into the nice output structure
+    output_filepath = gdir.get_filepath('fl_diagnostics',
+                                        filesuffix=output_filesuffix)
+    encode = {}
+    for v in fl_diag_dss[0]:
+        encode[v] = {'zlib': True, 'complevel': 5}
+
+    ds_base.to_netcdf(output_filepath, 'w')
+    for fl_id, ds in zip(fl_ids, fl_diag_dss):
+        ds.to_netcdf(output_filepath, 'a', group='fl_{}'.format(fl_id),
+                     encoding=encode)
diff --git a/oggm/tasks.py b/oggm/tasks.py
index 39f931115..a261859ff 100644
--- a/oggm/tasks.py
+++ b/oggm/tasks.py
@@ -62,6 +62,7 @@
 from oggm.core.flowline import run_from_climate_data
 from oggm.core.flowline import run_constant_climate
 from oggm.core.flowline import run_with_hydro
+from oggm.core.flowline import compute_fl_diagnostics_quantiles
 from oggm.core.dynamic_spinup import run_dynamic_spinup
 from oggm.core.dynamic_spinup import run_dynamic_melt_f_calibration
 from oggm.utils import copy_to_basedir
diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py
index 7b444a810..6e6b4fb21 100644
--- a/oggm/tests/test_models.py
+++ b/oggm/tests/test_models.py
@@ -3198,6 +3198,80 @@ def test_elevation_feedback(self, hef_gdir):
             plt.legend()
             plt.show()
 
+    @pytest.mark.slow
+    def test_fl_diag_quantiles(self, hef_gdir):
+        cfg.PARAMS['store_fl_diagnostics'] = True
+
+        # conduct three runs from which to calculate the quantiles
+        output_suffixes = ['_run1', '_run2', '_run3']
+        for i, output_suffix in enumerate(output_suffixes):
+            run_random_climate(hef_gdir, y0=1985-i*5, nyears=10,
+                               output_filesuffix=output_suffix, seed=i)
+
+        # only calculate the median
+        workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
+                                     hef_gdir,
+                                     input_filesuffixes=output_suffixes,
+                                     quantiles=0.5,
+                                     output_filesuffix='_median'
+                                     )
+
+        # calculate 5th and 95th quantiles together
+        workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
+                                     hef_gdir,
+                                     input_filesuffixes=output_suffixes,
+                                     quantiles=[0.05, 0.95],
+                                     output_filesuffix='_iqr'
+                                     )
+
+        ft = 'fl_diagnostics'
+        with xr.open_dataset(
+                hef_gdir.get_filepath(ft, filesuffix=output_suffixes[0])) as ds:
+            fl_ids = ds.flowlines.data
+
+        for fl_id in fl_ids:
+            # open data of current flowline
+            ds_runs = []
+            for output_suffix in output_suffixes:
+                fp = hef_gdir.get_filepath(ft, filesuffix=output_suffix)
+                with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                    ds_runs.append(ds.load())
+            fp = hef_gdir.get_filepath(ft, filesuffix='_median')
+            with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                ds_median = ds.load()
+            fp = hef_gdir.get_filepath(ft, filesuffix='_iqr')
+            with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                ds_iqr = ds.load()
+
+            # the median flowline should never be the smallest or largest
+            # value, compared to the values of the runs (as we have three runs)
+            variables_to_check = ['volume_m3', 'area_m2', 'thickness_m']
+            for var in variables_to_check:
+                var_das = []
+                for ds_run in ds_runs:
+                    var_das.append(ds_run[var])
+                var_stack = xr.concat(var_das, dim='runs')
+
+                var_min = var_stack.min(dim='runs')
+                var_max = var_stack.max(dim='runs')
+
+                var_median = ds_median[var]
+                is_median_equal_to_min = (var_median == var_min).any()
+                is_median_equal_to_max = (var_median == var_max).any()
+
+                assert is_median_equal_to_min
+                assert is_median_equal_to_max
+
+                # median should be larger/smaller than 5th/95th quantile
+                var_5th = ds_iqr.loc[{'quantile': 0.05}][var]
+                var_95th = ds_iqr.loc[{'quantile': 0.95}][var]
+
+                is_median_larger_than_5th_q = (var_median >= var_5th).all()
+                is_median_smaller_than_95th_q = (var_median <= var_95th).all()
+
+                assert is_median_larger_than_5th_q
+                assert is_median_smaller_than_95th_q
+
 
 @pytest.mark.usefixtures('with_class_wd')
 class TestDynamicSpinup: