From eeda03304f2eb9b8b7260bb5954231fa625a62de Mon Sep 17 00:00:00 2001 From: Patrick Schmitt Date: Thu, 26 Sep 2024 09:04:02 +0200 Subject: [PATCH] add compute_fl_diagnostics_quantiles with tests --- oggm/core/flowline.py | 77 +++++++++++++++++++++++++++++++++++++++ oggm/tasks.py | 1 + oggm/tests/test_models.py | 74 +++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py index f84c4d433..1cfd5bfff 100644 --- a/oggm/core/flowline.py +++ b/oggm/core/flowline.py @@ -4592,3 +4592,80 @@ def clean_merged_flowlines(gdir, buffer=None): # Finally write the flowlines gdir.write_pickle(allfls, 'model_flowlines') + + +@entity_task(log) +def compute_fl_diagnostics_quantiles(gdir, + input_filesuffixes, + quantiles=0.5, + output_filesuffix='_median', + ): + """Compute quantile fl_diagnostics (e.g. median flowline of projections) + + This function takes a number of fl_diagnostic files and compute out of them + quantiles. This could be useful for example for calculating a median + flowline for different gcm projections which use the same scenario + (e.g. ssp126). + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier directory to process + input_filesuffixes : list + a list of fl_diagnostic filesuffixes which should be considered for + computing + quantiles : float or list + The quantiles to compute. Could be a float for a single quantile or a + list of floats for severel quantile calculations. + Default is 0.5 (the median). + output_filesuffix : str + The output_filesuffix of the resulting fl_diagnostics. + Default is '_median'. + """ + + # if quantiles is a list with one element, only use this + if isinstance(quantiles, list): + if len(quantiles) == 1: + quantiles = quantiles[0] + + # get filepath of all fl_diagnostic files + all_fl_diag_paths= [] + for filesuffix in input_filesuffixes: + all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics', + filesuffix=filesuffix)) + + # assume all have the same number of flowlines, extract the base structure + # and number of fl_ids from the first file + with xr.open_dataset(all_fl_diag_paths[0]) as ds: + ds_base = ds.load() + fl_ids = ds.flowlines.data + + # help function to open all fl_diag files as one with open_mfdataset + def add_filename_coord(ds): + filepath = ds.encoding["source"] + filename = os.path.basename(filepath).split(".")[0] + return ds.expand_dims({'filename': [filename]}) + + # now loop through all flowlines and save back in the same structure + fl_diag_dss = [] + for fl_id in fl_ids: + with xr.open_mfdataset(all_fl_diag_paths, + preprocess=lambda ds: add_filename_coord(ds), + group=f'fl_{fl_id}') as ds_fl: + with warnings.catch_warnings(): + # ignore warnings for NaNs + warnings.simplefilter("ignore", category=RuntimeWarning) + fl_diag_dss.append(ds_fl.load().quantile(quantiles, + dim='filename')) + + # for writing into the nice output structure + output_filepath = gdir.get_filepath('fl_diagnostics', + filesuffix=output_filesuffix) + encode = {} + for v in fl_diag_dss[0]: + encode[v] = {'zlib': True, 'complevel': 5} + + ds_base.to_netcdf(output_filepath, 'w') + for fl_id, ds in zip(fl_ids, fl_diag_dss): + ds.to_netcdf(output_filepath, 'a', group='fl_{}'.format(fl_id), + encoding=encode) diff --git a/oggm/tasks.py b/oggm/tasks.py index 39f931115..a261859ff 100644 --- a/oggm/tasks.py +++ b/oggm/tasks.py @@ -62,6 +62,7 @@ from oggm.core.flowline import run_from_climate_data from oggm.core.flowline import run_constant_climate from oggm.core.flowline import run_with_hydro +from oggm.core.flowline import compute_fl_diagnostics_quantiles from oggm.core.dynamic_spinup import run_dynamic_spinup from oggm.core.dynamic_spinup import run_dynamic_melt_f_calibration from oggm.utils import copy_to_basedir diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py index 7b444a810..6e6b4fb21 100644 --- a/oggm/tests/test_models.py +++ b/oggm/tests/test_models.py @@ -3198,6 +3198,80 @@ def test_elevation_feedback(self, hef_gdir): plt.legend() plt.show() + @pytest.mark.slow + def test_fl_diag_quantiles(self, hef_gdir): + cfg.PARAMS['store_fl_diagnostics'] = True + + # conduct three runs from which to calculate the quantiles + output_suffixes = ['_run1', '_run2', '_run3'] + for i, output_suffix in enumerate(output_suffixes): + run_random_climate(hef_gdir, y0=1985-i*5, nyears=10, + output_filesuffix=output_suffix, seed=i) + + # only calculate the median + workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles, + hef_gdir, + input_filesuffixes=output_suffixes, + quantiles=0.5, + output_filesuffix='_median' + ) + + # calculate 5th and 95th quantiles together + workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles, + hef_gdir, + input_filesuffixes=output_suffixes, + quantiles=[0.05, 0.95], + output_filesuffix='_iqr' + ) + + ft = 'fl_diagnostics' + with xr.open_dataset( + hef_gdir.get_filepath(ft, filesuffix=output_suffixes[0])) as ds: + fl_ids = ds.flowlines.data + + for fl_id in fl_ids: + # open data of current flowline + ds_runs = [] + for output_suffix in output_suffixes: + fp = hef_gdir.get_filepath(ft, filesuffix=output_suffix) + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_runs.append(ds.load()) + fp = hef_gdir.get_filepath(ft, filesuffix='_median') + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_median = ds.load() + fp = hef_gdir.get_filepath(ft, filesuffix='_iqr') + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_iqr = ds.load() + + # the median flowline should never be the smallest or largest + # value, compared to the values of the runs (as we have three runs) + variables_to_check = ['volume_m3', 'area_m2', 'thickness_m'] + for var in variables_to_check: + var_das = [] + for ds_run in ds_runs: + var_das.append(ds_run[var]) + var_stack = xr.concat(var_das, dim='runs') + + var_min = var_stack.min(dim='runs') + var_max = var_stack.max(dim='runs') + + var_median = ds_median[var] + is_median_equal_to_min = (var_median == var_min).any() + is_median_equal_to_max = (var_median == var_max).any() + + assert is_median_equal_to_min + assert is_median_equal_to_max + + # median should be larger/smaller than 5th/95th quantile + var_5th = ds_iqr.loc[{'quantile': 0.05}][var] + var_95th = ds_iqr.loc[{'quantile': 0.95}][var] + + is_median_larger_than_5th_q = (var_median >= var_5th).all() + is_median_smaller_than_95th_q = (var_median <= var_95th).all() + + assert is_median_larger_than_5th_q + assert is_median_smaller_than_95th_q + @pytest.mark.usefixtures('with_class_wd') class TestDynamicSpinup: