diff --git a/ctdcal/tools/data_qc.py b/ctdcal/tools/data_qc.py index 3541227e..4e8b779e 100644 --- a/ctdcal/tools/data_qc.py +++ b/ctdcal/tools/data_qc.py @@ -12,6 +12,7 @@ DataTable, Div, MultiSelect, + Range1d, Select, StringFormatter, TableColumn, @@ -21,12 +22,12 @@ from ctdcal import get_ctdcal_config, io from ctdcal.common import load_user_config, validate_file -from ctdcal.fitting.common import get_node, df_node_to_BottleFlags, save_node +from ctdcal.fitting.common import df_node_to_BottleFlags, get_node, save_node cfg = get_ctdcal_config() -USERCONFIG = 'ctdcal/cfg.yaml' +USERCONFIG = "ctdcal/cfg.yaml" user_cfg = load_user_config(validate_file(USERCONFIG)) -FLAGFILE = Path(user_cfg.datadir, 'flag', user_cfg.bottleflags_man) +FLAGFILE = Path(user_cfg.datadir, "flag", user_cfg.bottleflags_man) # TODO: abstract parts of this to a separate file # TODO: following above, make parts reusable? @@ -48,16 +49,38 @@ btl_data["SSSCC"] = btl_data["STNNBR"].apply(lambda x: f"{x:03d}") + btl_data[ "CASTNO" ].apply(lambda x: f"{x:02d}") + +# Create salinity residuals btl_data["Residual"] = btl_data["SALNTY"] - btl_data["CTDSAL"] btl_data[["CTDPRS", "Residual"]] = btl_data[["CTDPRS", "Residual"]].round(4) btl_data["Comments"] = "" btl_data["New Flag"] = btl_data["SALNTY_FLAG_W"].copy() +# Temperature +btl_data["t_res"] = (btl_data["REFTMP"] - btl_data["CTDTMP"]).round(4) +btl_data["New T Flag"] = btl_data["REFTMP_FLAG_W"].copy() + +# Aaaand oxygen +if "OXYGEN" not in btl_data.columns: + btl_data["OXYGEN"] = np.nan + btl_data["OXYGEN_FLAG_W"] = 9 +btl_data["o_res"] = (btl_data["OXYGEN"] - btl_data["CTDOXY"]).round(4) +btl_data["New O Flag"] = btl_data["OXYGEN_FLAG_W"].copy() + +deltas_d = {"CTDSAL": "Residual", "CTDTMP": "t_res", "CTDOXY": "o_res"} + # update with old handcoded flags if file exists if FLAGFILE.exists(): - salt_flags_manual = get_node(FLAGFILE, 'salt') + salt_flags_manual = get_node(FLAGFILE, "salt") salt_flags_manual_df = pd.DataFrame.from_dict(salt_flags_manual) - salt_flags_manual_df = salt_flags_manual_df.rename(columns={"value": "New Flag", "cast_id": "SSSCC", "bottle_num": "SAMPNO", "notes": "Comments"}) + salt_flags_manual_df = salt_flags_manual_df.rename( + columns={ + "value": "New Flag", + "cast_id": "SSSCC", + "bottle_num": "SAMPNO", + "notes": "Comments", + } + ) # there's gotta be a better way... but this is good enough for now btl_data = btl_data.merge(salt_flags_manual_df, on=["SSSCC", "SAMPNO"], how="left") @@ -96,7 +119,9 @@ value=ref_dict[parameter.value], disabled=True, ) + station = Select(title="Station", options=ssscc_list, value=ssscc_list[0]) + # explanation of flags: # https://cchdo.github.io/hdo-assets/documentation/manuals/pdf/90_1/chap4.pdf flag_list = MultiSelect( @@ -159,15 +184,8 @@ src_plot_btl = ColumnDataSource(data=dict(x=[], y=[])) # set up plots -# fig = figure( -# plot_height=800, -# plot_width=400, -# title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value), -# tools="pan,box_zoom,wheel_zoom,box_select,reset", -# y_axis_label="Pressure (dbar)", -# ) fig = figure( - height=800, + height=900, width=400, title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value), tools="pan,box_zoom,wheel_zoom,box_select,reset", @@ -207,6 +225,7 @@ legend_label="Upcast CTD sample", ) fig.select(BoxSelectTool).continuous = False +# fig.y_range = Range1d(max_y_value, 0) fig.y_range.flipped = True # invert y-axis fig.legend.location = "bottom_right" fig.legend.border_line_width = 3 @@ -215,6 +234,41 @@ ctd_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select upcast_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select +threshes = { + "CTDSAL": np.array([0.002, 0.005, 0.010, 0.020]), + "CTDTMP": np.array([0.002, 0.005, 0.010, 0.020]), + "CTDOXY": np.array([0.625, 1.250, 2.500, 5.000]), # Not advised to follow this - biology happens +} + +# Residuals plot +src_plot_btl_del = ColumnDataSource(data=dict(x=[], y=[])) +fig2 = figure( + height=900, + width=400, + title="{} residual vs CTDPRS [Station {}]".format(parameter.value, station.value), + tools="pan,box_zoom,wheel_zoom,box_select,reset", + # y_axis_label="Pressure (dbar)", + y_range=fig.y_range, +) +# thresh = np.array([0.002, 0.005, 0.010, 0.020]) +thresh = threshes[parameter.value] +p_range = np.array([6000, 2000, 1000, 500]) +thresh = np.append(thresh, thresh[-1]) +p_range = np.append(p_range, 0) +btl_sal2 = fig2.asterisk( + "x", + "y", + size=12, + line_width=1.5, + color="#0033CC", + source=src_plot_btl_del, +) +fig2.step(thresh, p_range) +fig2.step(-thresh, p_range) +fig2.select(BoxSelectTool).continuous = False +fig2.y_range = fig.y_range +# fig2.y_range.flipped = True # invert y-axis + # define callback functions @@ -227,16 +281,21 @@ def update_selectors(): btl_data["SSSCC"] == station.value ) - ref_param.value = ref_dict[parameter.value] - # update table data current_table = btl_data[table_rows].reset_index() + + # print("Parameter: ", parameter.value, current_table[parameter.value][0]) + ref_param.value = ref_dict[parameter.value] + # print("Reference parameter: ", ref_param.value, current_table[ref_param.value][0]) + src_table.data = { # this causes edit_flag() to execute "SSSCC": current_table["SSSCC"], "SAMPNO": current_table["SAMPNO"], "CTDPRS": current_table["CTDPRS"], - parameter.value: current_table[parameter.value], - ref_param.value: current_table[ref_param.value], + "t_res": current_table["t_res"], + "o_res": current_table["o_res"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], @@ -259,11 +318,32 @@ def update_selectors(): "x": btl_data.loc[btl_rows, ref_param.value], "y": btl_data.loc[btl_rows, "CTDPRS"], } + src_plot_btl_del.data = { + "x": btl_data.loc[btl_rows, deltas_d[parameter.value]], + "y": btl_data.loc[btl_rows, "CTDPRS"], + } # update plot labels/axlims fig.title.text = "{} vs CTDPRS [Station {}]".format(parameter.value, station.value) fig.xaxis.axis_label = parameter.value + fig2.title.text = "{} Residual".format(parameter.value) + fig2.xaxis.axis_label = parameter.value + + # Set the y-range from 0 to the maximum value + max_y = ctd_data.loc[ctd_rows, "CTDPRS"].max() + fig.y_range = Range1d(max_y + 0.05 * max_y, 0) + fig2.y_range = fig.y_range + + # Reset the x-axes when plots refresh + max_x_fig = ctd_data.loc[ctd_rows, parameter.value].max() + min_x_fig = ctd_data.loc[ctd_rows, parameter.value].min() + fig.x_range = Range1d(min_x_fig, max_x_fig) + + max_x_fig2 = max(btl_data.loc[btl_rows, deltas_d[parameter.value]].max(), 0.025) + min_x_fig2 = min(btl_data.loc[btl_rows, deltas_d[parameter.value]].min(), -0.025) + fig2.x_range = Range1d(min_x_fig2, max_x_fig2) + # deselect all datapoints btl_sal.data_source.selected.indices = [] src_table.selected.indices = [] @@ -313,6 +393,10 @@ def apply_flag(): "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), + "o_res": current_table["o_res"], + "t_res": current_table["t_res"], } @@ -335,6 +419,10 @@ def apply_comment(): "diff": current_table["Residual"], "flag": current_table["New Flag"], "Comments": current_table["Comments"], + "CTD Param": current_table[parameter.value].round(4), + "Reference": current_table[ref_param.value].round(4), + "o_res": current_table["o_res"], + "t_res": current_table["t_res"], } @@ -346,14 +434,19 @@ def save_data(): df_out = pd.DataFrame.from_dict(src_table_changes.data) # minor changes to columns/names/etc. - df_out = df_out.rename(columns={"flag_new": "value", "SSSCC": "cast_id", "SAMPNO": "bottle_num", "Comments": "notes"}).drop( - columns=["flag_old", "diff"] - ) + df_out = df_out.rename( + columns={ + "flag_new": "value", + "SSSCC": "cast_id", + "SAMPNO": "bottle_num", + "Comments": "notes", + } + ).drop(columns=["flag_old", "diff"]) # save it salt = df_node_to_BottleFlags(df_out) flagfile = validate_file(FLAGFILE, create=True) - save_node(flagfile, salt, 'salt', create_new=True) + save_node(flagfile, salt, "salt", create_new=True) def exit_bokeh(): @@ -390,19 +483,33 @@ def selected_from_table(attr, old, new): # build data tables columns = [] -fields = ["SSSCC", "SAMPNO", "CTDPRS", "CTDSAL", "SALNTY", "diff", "flag", "Comments"] +fields = [ + "SSSCC", + "SAMPNO", + "CTDPRS", + "CTD Param", + "Reference", + "t_res", + "diff", + "o_res", + "flag", + "Comments", +] +ref_dict titles = [ "SSSCC", "Bottle", "CTDPRS", - "CTDSAL", - "SALNTY", - "Residual", + "CTD Param", + "Reference", + "t_res", + "s_res", + "o_res", "Flag", "Comments", ] -widths = [50, 40, 65, 65, 65, 65, 15, 200] -for (field, title, width) in zip(fields, titles, widths): +widths = [50, 40, 65, 65, 65, 65, 65, 65, 15, 200] +for field, title, width in zip(fields, titles, widths): if field == "flag": strfmt_in = {"text_align": "center", "font_style": "bold"} elif field == "Comments": @@ -422,7 +529,7 @@ def selected_from_table(attr, old, new): fields = ["SSSCC", "SAMPNO", "diff", "flag_old", "flag_new", "Comments"] titles = ["SSSCC", "Bottle", "Residual", "Old", "New", "Comments"] widths = [50, 40, 65, 15, 15, 375] -for (field, title, width) in zip(fields, titles, widths): +for field, title, width in zip(fields, titles, widths): if field == "flag_old": strfmt_in = {"text_align": "center", "font_style": "bold"} elif field == "flag_new": @@ -444,7 +551,8 @@ def selected_from_table(attr, old, new): source=src_table, columns=columns, index_width=20, - width=565 + 50 + 20, # sum of col widths + fudge factor + idx width + # width is originally 565 + 50 + 20 = 635 + width=565 + 150 + 20, # sum of col widths + fudge factor + idx width height=600, editable=True, fit_columns=False, @@ -461,7 +569,9 @@ def selected_from_table(attr, old, new): sortable=False, ) data_table_title = Div(text="""All Station Data:""", width=200, height=15) -data_table_changed_title = Div(text="""Flagged Data:""", width=200, height=15) +data_table_changed_title = Div( + text="""Flagged Data:""", width=200, height=15 +) controls = column( parameter, @@ -483,7 +593,7 @@ def selected_from_table(attr, old, new): data_table_title, data_table, data_table_changed_title, data_table_changed ) -curdoc().add_root(row(controls, tables, fig)) +curdoc().add_root(row(controls, tables, fig, fig2)) curdoc().title = "CTDO Data Flagging Tool" update_selectors()