From 353760c711bd3920590ef54c5407c9328ffda8a8 Mon Sep 17 00:00:00 2001 From: Klankers Date: Wed, 4 Sep 2024 19:58:25 -0700 Subject: [PATCH 1/6] Merge functional GP17-ANT vis with current flagging routine --- ctdcal/tools/data_qc.py | 102 +++++++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index 3541227e..fe2d7d8c 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -21,7 +21,7 @@ from ctdcal import get_ctdcal_config, io from ctdcal.common import load_user_config, validate_file -from ctdcal.fitting.common import get_node, df_node_to_BottleFlags, save_node +from ctdcal.fitting.common import df_node_to_BottleFlags, get_node, save_node cfg = get_ctdcal_config() USERCONFIG = 'ctdcal/cfg.yaml' @@ -48,11 +48,26 @@ btl_data["SSSCC"] = btl_data["STNNBR"].apply(lambda x: f"{x:03d}") + btl_data[ "CASTNO" ].apply(lambda x: f"{x:02d}") + +# Create salinity residuals btl_data["Residual"] = btl_data["SALNTY"] - btl_data["CTDSAL"] btl_data[["CTDPRS", "Residual"]] = btl_data[["CTDPRS", "Residual"]].round(4) btl_data["Comments"] = "" btl_data["New Flag"] = btl_data["SALNTY_FLAG_W"].copy() +# Temperature +btl_data["t_res"] = (btl_data["REFTMP"] - btl_data["CTDTMP"]).round(4) +btl_data["New T Flag"] = btl_data["REFTMP_FLAG_W"].copy() + +# Aaaand oxygen +if "OXYGEN" not in btl_data.columns: + btl_data["OXYGEN"] = np.nan + btl_data["OXYGEN_FLAG_W"] = 9 +btl_data["o_res"] = (btl_data["OXYGEN"] - btl_data["CTDOXY"]).round(4) +btl_data["New O Flag"] = btl_data["OXYGEN_FLAG_W"].copy() + +deltas_d = {"CTDSAL": "Residual", "CTDTMP": "t_res", "CTDOXY": "o_res"} + # update with old handcoded flags if file exists if FLAGFILE.exists(): salt_flags_manual = get_node(FLAGFILE, 'salt') @@ -96,7 +111,9 @@ value=ref_dict[parameter.value], disabled=True, ) + station = Select(title="Station", options=ssscc_list, value=ssscc_list[0]) + # explanation of flags: # https://cchdo.github.io/hdo-assets/documentation/manuals/pdf/90_1/chap4.pdf flag_list = MultiSelect( @@ -159,15 +176,8 @@ src_plot_btl = ColumnDataSource(data=dict(x=[], y=[])) # set up plots -# fig = figure( -# plot_height=800, -# plot_width=400, -# title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value), -# tools="pan,box_zoom,wheel_zoom,box_select,reset", -# y_axis_label="Pressure (dbar)", -# ) fig = figure( - height=800, + height=900, width=400, title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value), tools="pan,box_zoom,wheel_zoom,box_select,reset", @@ -215,8 +225,39 @@ ctd_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select upcast_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select -# define callback functions +threshes = {"CTDSAL":np.array([0.002, 0.005, 0.010, 0.020]), + "CTDTMP":np.array([0.002, 0.005, 0.010, 0.020]), + "CTDOXY":np.array([0.625, 1.250, 2.500, 5.000])} +# Residuals plot +src_plot_btl_del = ColumnDataSource(data=dict(x=[], y=[])) +fig2 = figure( + height=900, + width=400, + title="{} residual vs CTDPRS [Station {}]".format(parameter.value, station.value), + tools="pan,box_zoom,wheel_zoom,box_select,reset", + # y_axis_label="Pressure (dbar)", + y_range=fig.y_range +) +# thresh = np.array([0.002, 0.005, 0.010, 0.020]) +thresh = threshes[parameter.value] +p_range = np.array([6000, 2000, 1000, 500]) +thresh = np.append(thresh, thresh[-1]) +p_range = np.append(p_range, 0) +btl_sal2 = fig2.asterisk( + "x", + "y", + size=12, + line_width=1.5, + color="#0033CC", + source=src_plot_btl_del, +) +fig2.step(thresh, p_range) +fig2.step(-thresh, p_range) +fig2.select(BoxSelectTool).continuous = False +fig2.y_range.flipped = True # invert y-axis + +# define callback functions def update_selectors(): @@ -227,16 +268,21 @@ def update_selectors(): btl_data["SSSCC"] == station.value ) - ref_param.value = ref_dict[parameter.value] - # update table data current_table = btl_data[table_rows].reset_index() + + # print("Parameter: ", parameter.value, current_table[parameter.value][0]) + ref_param.value = ref_dict[parameter.value] + # print("Reference parameter: ", ref_param.value, current_table[ref_param.value][0]) + src_table.data = { # this causes edit_flag() to execute "SSSCC": current_table["SSSCC"], "SAMPNO": current_table["SAMPNO"], "CTDPRS": current_table["CTDPRS"], - parameter.value: current_table[parameter.value], - ref_param.value: current_table[ref_param.value], + "t_res": current_table["t_res"], + "o_res": current_table["o_res"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], @@ -259,11 +305,18 @@ def update_selectors(): "x": btl_data.loc[btl_rows, ref_param.value], "y": btl_data.loc[btl_rows, "CTDPRS"], } + src_plot_btl_del.data = { + "x": btl_data.loc[btl_rows, deltas_d[parameter.value]], + "y": btl_data.loc[btl_rows, "CTDPRS"], + } # update plot labels/axlims fig.title.text = "{} vs CTDPRS [Station {}]".format(parameter.value, station.value) fig.xaxis.axis_label = parameter.value + fig2.title.text = "{} Residual".format(parameter.value) + fig2.xaxis.axis_label = parameter.value + # deselect all datapoints btl_sal.data_source.selected.indices = [] src_table.selected.indices = [] @@ -390,18 +443,22 @@ def selected_from_table(attr, old, new): # build data tables columns = [] -fields = ["SSSCC", "SAMPNO", "CTDPRS", "CTDSAL", "SALNTY", "diff", "flag", "Comments"] +fields = ["SSSCC", "SAMPNO", "CTDPRS", "CTD Param", + "Reference", "t_res", "diff", "o_res", "flag", "Comments"] +ref_dict titles = [ "SSSCC", "Bottle", "CTDPRS", - "CTDSAL", - "SALNTY", - "Residual", + "CTD Param", + "Reference", + "t_res", + "s_res", + "o_res", "Flag", "Comments", ] -widths = [50, 40, 65, 65, 65, 65, 15, 200] +widths = [50, 40, 65, 65, 65, 65, 65, 65, 15, 200] for (field, title, width) in zip(fields, titles, widths): if field == "flag": strfmt_in = {"text_align": "center", "font_style": "bold"} @@ -444,7 +501,8 @@ def selected_from_table(attr, old, new): source=src_table, columns=columns, index_width=20, - width=565 + 50 + 20, # sum of col widths + fudge factor + idx width + # width is originally 565 + 50 + 20 = 635 + width=565 + 150 + 20, # sum of col widths + fudge factor + idx width height=600, editable=True, fit_columns=False, @@ -461,7 +519,7 @@ def selected_from_table(attr, old, new): sortable=False, ) data_table_title = Div(text="""All Station Data:""", width=200, height=15) -data_table_changed_title = Div(text="""Flagged Data:""", width=200, height=15) +data_table_changed_title = Div(text="""Flagged Salinity Data:""", width=200, height=15) controls = column( parameter, @@ -483,7 +541,7 @@ def selected_from_table(attr, old, new): data_table_title, data_table, data_table_changed_title, data_table_changed ) -curdoc().add_root(row(controls, tables, fig)) +curdoc().add_root(row(controls, tables, fig, fig2)) curdoc().title = "CTDO Data Flagging Tool" update_selectors() From c623721c9dd9d0dcf16783f4a0bc4b17077c0c8f Mon Sep 17 00:00:00 2001 From: Klankers Date: Wed, 4 Sep 2024 19:59:08 -0700 Subject: [PATCH 2/6] Black --- ctdcal/tools/data_qc.py | 61 +++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index fe2d7d8c..aa9124d3 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -24,9 +24,9 @@ from ctdcal.fitting.common import df_node_to_BottleFlags, get_node, save_node cfg = get_ctdcal_config() -USERCONFIG = 'ctdcal/cfg.yaml' +USERCONFIG = "ctdcal/cfg.yaml" user_cfg = load_user_config(validate_file(USERCONFIG)) -FLAGFILE = Path(user_cfg.datadir, 'flag', user_cfg.bottleflags_man) +FLAGFILE = Path(user_cfg.datadir, "flag", user_cfg.bottleflags_man) # TODO: abstract parts of this to a separate file # TODO: following above, make parts reusable? @@ -70,9 +70,16 @@ # update with old handcoded flags if file exists if FLAGFILE.exists(): - salt_flags_manual = get_node(FLAGFILE, 'salt') + salt_flags_manual = get_node(FLAGFILE, "salt") salt_flags_manual_df = pd.DataFrame.from_dict(salt_flags_manual) - salt_flags_manual_df = salt_flags_manual_df.rename(columns={"value": "New Flag", "cast_id": "SSSCC", "bottle_num": "SAMPNO", "notes": "Comments"}) + salt_flags_manual_df = salt_flags_manual_df.rename( + columns={ + "value": "New Flag", + "cast_id": "SSSCC", + "bottle_num": "SAMPNO", + "notes": "Comments", + } + ) # there's gotta be a better way... but this is good enough for now btl_data = btl_data.merge(salt_flags_manual_df, on=["SSSCC", "SAMPNO"], how="left") @@ -225,9 +232,11 @@ ctd_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select upcast_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select -threshes = {"CTDSAL":np.array([0.002, 0.005, 0.010, 0.020]), - "CTDTMP":np.array([0.002, 0.005, 0.010, 0.020]), - "CTDOXY":np.array([0.625, 1.250, 2.500, 5.000])} +threshes = { + "CTDSAL": np.array([0.002, 0.005, 0.010, 0.020]), + "CTDTMP": np.array([0.002, 0.005, 0.010, 0.020]), + "CTDOXY": np.array([0.625, 1.250, 2.500, 5.000]), +} # Residuals plot src_plot_btl_del = ColumnDataSource(data=dict(x=[], y=[])) @@ -237,7 +246,7 @@ title="{} residual vs CTDPRS [Station {}]".format(parameter.value, station.value), tools="pan,box_zoom,wheel_zoom,box_select,reset", # y_axis_label="Pressure (dbar)", - y_range=fig.y_range + y_range=fig.y_range, ) # thresh = np.array([0.002, 0.005, 0.010, 0.020]) thresh = threshes[parameter.value] @@ -259,6 +268,7 @@ # define callback functions + def update_selectors(): print("exec update_selectors()") @@ -399,14 +409,19 @@ def save_data(): df_out = pd.DataFrame.from_dict(src_table_changes.data) # minor changes to columns/names/etc. - df_out = df_out.rename(columns={"flag_new": "value", "SSSCC": "cast_id", "SAMPNO": "bottle_num", "Comments": "notes"}).drop( - columns=["flag_old", "diff"] - ) + df_out = df_out.rename( + columns={ + "flag_new": "value", + "SSSCC": "cast_id", + "SAMPNO": "bottle_num", + "Comments": "notes", + } + ).drop(columns=["flag_old", "diff"]) # save it salt = df_node_to_BottleFlags(df_out) flagfile = validate_file(FLAGFILE, create=True) - save_node(flagfile, salt, 'salt', create_new=True) + save_node(flagfile, salt, "salt", create_new=True) def exit_bokeh(): @@ -443,8 +458,18 @@ def selected_from_table(attr, old, new): # build data tables columns = [] -fields = ["SSSCC", "SAMPNO", "CTDPRS", "CTD Param", - "Reference", "t_res", "diff", "o_res", "flag", "Comments"] +fields = [ + "SSSCC", + "SAMPNO", + "CTDPRS", + "CTD Param", + "Reference", + "t_res", + "diff", + "o_res", + "flag", + "Comments", +] ref_dict titles = [ "SSSCC", @@ -459,7 +484,7 @@ def selected_from_table(attr, old, new): "Comments", ] widths = [50, 40, 65, 65, 65, 65, 65, 65, 15, 200] -for (field, title, width) in zip(fields, titles, widths): +for field, title, width in zip(fields, titles, widths): if field == "flag": strfmt_in = {"text_align": "center", "font_style": "bold"} elif field == "Comments": @@ -479,7 +504,7 @@ def selected_from_table(attr, old, new): fields = ["SSSCC", "SAMPNO", "diff", "flag_old", "flag_new", "Comments"] titles = ["SSSCC", "Bottle", "Residual", "Old", "New", "Comments"] widths = [50, 40, 65, 15, 15, 375] -for (field, title, width) in zip(fields, titles, widths): +for field, title, width in zip(fields, titles, widths): if field == "flag_old": strfmt_in = {"text_align": "center", "font_style": "bold"} elif field == "flag_new": @@ -519,7 +544,9 @@ def selected_from_table(attr, old, new): sortable=False, ) data_table_title = Div(text="""All Station Data:""", width=200, height=15) -data_table_changed_title = Div(text="""Flagged Salinity Data:""", width=200, height=15) +data_table_changed_title = Div( + text="""Flagged Salinity Data:""", width=200, height=15 +) controls = column( parameter, From 39989fa5b26f79eadb3a78faeeb47944082517ad Mon Sep 17 00:00:00 2001 From: Klankers Date: Wed, 4 Sep 2024 20:04:47 -0700 Subject: [PATCH 3/6] Restore original flag table name --- ctdcal/tools/data_qc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index aa9124d3..8e6a5f31 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -545,7 +545,7 @@ def selected_from_table(attr, old, new): ) data_table_title = Div(text="""All Station Data:""", width=200, height=15) data_table_changed_title = Div( - text="""Flagged Salinity Data:""", width=200, height=15 + text="""Flagged Data:""", width=200, height=15 ) controls = column( From 6ba51e390ede06bb8caccf7fe58a8d7914c50e5c Mon Sep 17 00:00:00 2001 From: Klankers Date: Fri, 6 Sep 2024 11:57:34 -0700 Subject: [PATCH 4/6] Adjust y-range to autoscale when refreshed --- ctdcal/tools/data_qc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index 8e6a5f31..ef43371b 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -12,6 +12,7 @@ DataTable, Div, MultiSelect, + Range1d, Select, StringFormatter, TableColumn, @@ -224,6 +225,7 @@ legend_label="Upcast CTD sample", ) fig.select(BoxSelectTool).continuous = False +# fig.y_range = Range1d(max_y_value, 0) fig.y_range.flipped = True # invert y-axis fig.legend.location = "bottom_right" fig.legend.border_line_width = 3 @@ -235,7 +237,7 @@ threshes = { "CTDSAL": np.array([0.002, 0.005, 0.010, 0.020]), "CTDTMP": np.array([0.002, 0.005, 0.010, 0.020]), - "CTDOXY": np.array([0.625, 1.250, 2.500, 5.000]), + "CTDOXY": np.array([0.625, 1.250, 2.500, 5.000]), # Not advised to follow this - biology happens } # Residuals plot @@ -264,7 +266,8 @@ fig2.step(thresh, p_range) fig2.step(-thresh, p_range) fig2.select(BoxSelectTool).continuous = False -fig2.y_range.flipped = True # invert y-axis +fig2.y_range = fig.y_range +# fig2.y_range.flipped = True # invert y-axis # define callback functions @@ -327,6 +330,11 @@ def update_selectors(): fig2.title.text = "{} Residual".format(parameter.value) fig2.xaxis.axis_label = parameter.value + # Set the y-range from 0 to the maximum value + max_y = ctd_data.loc[ctd_rows, "CTDPRS"].max() + fig.y_range = Range1d(max_y + 0.05 * max_y, 0) + fig2.y_range = fig.y_range + # deselect all datapoints btl_sal.data_source.selected.indices = [] src_table.selected.indices = [] From 7fcc4c72e4ffa1b11aa9f1804da0a0ea3b5c039b Mon Sep 17 00:00:00 2001 From: Klankers Date: Fri, 6 Sep 2024 12:11:57 -0700 Subject: [PATCH 5/6] Reset x-axes when figure is reset --- ctdcal/tools/data_qc.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index ef43371b..2070d7e4 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -335,6 +335,15 @@ def update_selectors(): fig.y_range = Range1d(max_y + 0.05 * max_y, 0) fig2.y_range = fig.y_range + # Reset the x-axes when plots refresh + max_x_fig = ctd_data.loc[ctd_rows, parameter.value].max() + min_x_fig = ctd_data.loc[ctd_rows, parameter.value].min() + fig.x_range = Range1d(min_x_fig, max_x_fig) + + max_x_fig2 = max(btl_data.loc[btl_rows, deltas_d[parameter.value]].max(), 0.025) + min_x_fig2 = min(btl_data.loc[btl_rows, deltas_d[parameter.value]].min(), -0.025) + fig2.x_range = Range1d(min_x_fig2, max_x_fig2) + # deselect all datapoints btl_sal.data_source.selected.indices = [] src_table.selected.indices = [] From 022545abb70fea3b54062a994466cae115fa486c Mon Sep 17 00:00:00 2001 From: Klankers Date: Fri, 6 Sep 2024 12:34:28 -0700 Subject: [PATCH 6/6] Fix table entries that go missing with flag/comment updates --- ctdcal/tools/data_qc.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index 2070d7e4..4e8b779e 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -393,6 +393,10 @@ def apply_flag(): "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), + "o_res": current_table["o_res"], + "t_res": current_table["t_res"], } @@ -415,6 +419,10 @@ def apply_comment(): "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), + "o_res": current_table["o_res"], + "t_res": current_table["t_res"], }