Skip to content

Commit

Permalink
Merge pull request #124 from SIO-ODF/2024_qc_fixes
Browse files Browse the repository at this point in the history
2024 qc fixes
  • Loading branch information
Klankers authored Sep 9, 2024
2 parents a829d02 + cc5f582 commit a196c1b
Showing 1 changed file with 141 additions and 31 deletions.
172 changes: 141 additions & 31 deletions ctdcal/tools/data_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DataTable,
Div,
MultiSelect,
Range1d,
Select,
StringFormatter,
TableColumn,
Expand All @@ -21,12 +22,12 @@

from ctdcal import get_ctdcal_config, io
from ctdcal.common import load_user_config, validate_file
from ctdcal.fitting.common import get_node, df_node_to_BottleFlags, save_node
from ctdcal.fitting.common import df_node_to_BottleFlags, get_node, save_node

cfg = get_ctdcal_config()
USERCONFIG = 'ctdcal/cfg.yaml'
USERCONFIG = "ctdcal/cfg.yaml"
user_cfg = load_user_config(validate_file(USERCONFIG))
FLAGFILE = Path(user_cfg.datadir, 'flag', user_cfg.bottleflags_man)
FLAGFILE = Path(user_cfg.datadir, "flag", user_cfg.bottleflags_man)

# TODO: abstract parts of this to a separate file
# TODO: following above, make parts reusable?
Expand All @@ -48,16 +49,38 @@
btl_data["SSSCC"] = btl_data["STNNBR"].apply(lambda x: f"{x:03d}") + btl_data[
"CASTNO"
].apply(lambda x: f"{x:02d}")

# Create salinity residuals
btl_data["Residual"] = btl_data["SALNTY"] - btl_data["CTDSAL"]
btl_data[["CTDPRS", "Residual"]] = btl_data[["CTDPRS", "Residual"]].round(4)
btl_data["Comments"] = ""
btl_data["New Flag"] = btl_data["SALNTY_FLAG_W"].copy()

# Temperature
btl_data["t_res"] = (btl_data["REFTMP"] - btl_data["CTDTMP"]).round(4)
btl_data["New T Flag"] = btl_data["REFTMP_FLAG_W"].copy()

# Aaaand oxygen
if "OXYGEN" not in btl_data.columns:
btl_data["OXYGEN"] = np.nan
btl_data["OXYGEN_FLAG_W"] = 9
btl_data["o_res"] = (btl_data["OXYGEN"] - btl_data["CTDOXY"]).round(4)
btl_data["New O Flag"] = btl_data["OXYGEN_FLAG_W"].copy()

deltas_d = {"CTDSAL": "Residual", "CTDTMP": "t_res", "CTDOXY": "o_res"}

# update with old handcoded flags if file exists
if FLAGFILE.exists():
salt_flags_manual = get_node(FLAGFILE, 'salt')
salt_flags_manual = get_node(FLAGFILE, "salt")
salt_flags_manual_df = pd.DataFrame.from_dict(salt_flags_manual)
salt_flags_manual_df = salt_flags_manual_df.rename(columns={"value": "New Flag", "cast_id": "SSSCC", "bottle_num": "SAMPNO", "notes": "Comments"})
salt_flags_manual_df = salt_flags_manual_df.rename(
columns={
"value": "New Flag",
"cast_id": "SSSCC",
"bottle_num": "SAMPNO",
"notes": "Comments",
}
)

# there's gotta be a better way... but this is good enough for now
btl_data = btl_data.merge(salt_flags_manual_df, on=["SSSCC", "SAMPNO"], how="left")
Expand Down Expand Up @@ -96,7 +119,9 @@
value=ref_dict[parameter.value],
disabled=True,
)

station = Select(title="Station", options=ssscc_list, value=ssscc_list[0])

# explanation of flags:
# https://cchdo.github.io/hdo-assets/documentation/manuals/pdf/90_1/chap4.pdf
flag_list = MultiSelect(
Expand Down Expand Up @@ -159,15 +184,8 @@
src_plot_btl = ColumnDataSource(data=dict(x=[], y=[]))

# set up plots
# fig = figure(
# plot_height=800,
# plot_width=400,
# title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value),
# tools="pan,box_zoom,wheel_zoom,box_select,reset",
# y_axis_label="Pressure (dbar)",
# )
fig = figure(
height=800,
height=900,
width=400,
title="{} vs CTDPRS [Station {}]".format(parameter.value, station.value),
tools="pan,box_zoom,wheel_zoom,box_select,reset",
Expand Down Expand Up @@ -207,6 +225,7 @@
legend_label="Upcast CTD sample",
)
fig.select(BoxSelectTool).continuous = False
# fig.y_range = Range1d(max_y_value, 0)
fig.y_range.flipped = True # invert y-axis
fig.legend.location = "bottom_right"
fig.legend.border_line_width = 3
Expand All @@ -215,6 +234,41 @@
ctd_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select
upcast_sal.nonselection_glyph.fill_alpha = 1 # makes CTDSAL *not* change on select

threshes = {
"CTDSAL": np.array([0.002, 0.005, 0.010, 0.020]),
"CTDTMP": np.array([0.002, 0.005, 0.010, 0.020]),
"CTDOXY": np.array([0.625, 1.250, 2.500, 5.000]), # Not advised to follow this - biology happens
}

# Residuals plot
src_plot_btl_del = ColumnDataSource(data=dict(x=[], y=[]))
fig2 = figure(
height=900,
width=400,
title="{} residual vs CTDPRS [Station {}]".format(parameter.value, station.value),
tools="pan,box_zoom,wheel_zoom,box_select,reset",
# y_axis_label="Pressure (dbar)",
y_range=fig.y_range,
)
# thresh = np.array([0.002, 0.005, 0.010, 0.020])
thresh = threshes[parameter.value]
p_range = np.array([6000, 2000, 1000, 500])
thresh = np.append(thresh, thresh[-1])
p_range = np.append(p_range, 0)
btl_sal2 = fig2.asterisk(
"x",
"y",
size=12,
line_width=1.5,
color="#0033CC",
source=src_plot_btl_del,
)
fig2.step(thresh, p_range)
fig2.step(-thresh, p_range)
fig2.select(BoxSelectTool).continuous = False
fig2.y_range = fig.y_range
# fig2.y_range.flipped = True # invert y-axis

# define callback functions


Expand All @@ -227,16 +281,21 @@ def update_selectors():
btl_data["SSSCC"] == station.value
)

ref_param.value = ref_dict[parameter.value]

# update table data
current_table = btl_data[table_rows].reset_index()

# print("Parameter: ", parameter.value, current_table[parameter.value][0])
ref_param.value = ref_dict[parameter.value]
# print("Reference parameter: ", ref_param.value, current_table[ref_param.value][0])

src_table.data = { # this causes edit_flag() to execute
"SSSCC": current_table["SSSCC"],
"SAMPNO": current_table["SAMPNO"],
"CTDPRS": current_table["CTDPRS"],
parameter.value: current_table[parameter.value],
ref_param.value: current_table[ref_param.value],
"t_res": current_table["t_res"],
"o_res": current_table["o_res"],
"CTD Param": current_table[parameter.value].round(4),
"Reference": current_table[ref_param.value].round(4),
"diff": current_table["Residual"],
"flag": current_table["New Flag"],
"Comments": current_table["Comments"],
Expand All @@ -259,11 +318,32 @@ def update_selectors():
"x": btl_data.loc[btl_rows, ref_param.value],
"y": btl_data.loc[btl_rows, "CTDPRS"],
}
src_plot_btl_del.data = {
"x": btl_data.loc[btl_rows, deltas_d[parameter.value]],
"y": btl_data.loc[btl_rows, "CTDPRS"],
}

# update plot labels/axlims
fig.title.text = "{} vs CTDPRS [Station {}]".format(parameter.value, station.value)
fig.xaxis.axis_label = parameter.value

fig2.title.text = "{} Residual".format(parameter.value)
fig2.xaxis.axis_label = parameter.value

# Set the y-range from 0 to the maximum value
max_y = ctd_data.loc[ctd_rows, "CTDPRS"].max()
fig.y_range = Range1d(max_y + 0.05 * max_y, 0)
fig2.y_range = fig.y_range

# Reset the x-axes when plots refresh
max_x_fig = ctd_data.loc[ctd_rows, parameter.value].max()
min_x_fig = ctd_data.loc[ctd_rows, parameter.value].min()
fig.x_range = Range1d(min_x_fig, max_x_fig)

max_x_fig2 = max(btl_data.loc[btl_rows, deltas_d[parameter.value]].max(), 0.025)
min_x_fig2 = min(btl_data.loc[btl_rows, deltas_d[parameter.value]].min(), -0.025)
fig2.x_range = Range1d(min_x_fig2, max_x_fig2)

# deselect all datapoints
btl_sal.data_source.selected.indices = []
src_table.selected.indices = []
Expand Down Expand Up @@ -313,6 +393,10 @@ def apply_flag():
"diff": current_table["Residual"],
"flag": current_table["New Flag"],
"Comments": current_table["Comments"],
"CTD Param": current_table[parameter.value].round(4),
"Reference": current_table[ref_param.value].round(4),
"o_res": current_table["o_res"],
"t_res": current_table["t_res"],
}


Expand All @@ -335,6 +419,10 @@ def apply_comment():
"diff": current_table["Residual"],
"flag": current_table["New Flag"],
"Comments": current_table["Comments"],
"CTD Param": current_table[parameter.value].round(4),
"Reference": current_table[ref_param.value].round(4),
"o_res": current_table["o_res"],
"t_res": current_table["t_res"],
}


Expand All @@ -346,14 +434,19 @@ def save_data():
df_out = pd.DataFrame.from_dict(src_table_changes.data)

# minor changes to columns/names/etc.
df_out = df_out.rename(columns={"flag_new": "value", "SSSCC": "cast_id", "SAMPNO": "bottle_num", "Comments": "notes"}).drop(
columns=["flag_old", "diff"]
)
df_out = df_out.rename(
columns={
"flag_new": "value",
"SSSCC": "cast_id",
"SAMPNO": "bottle_num",
"Comments": "notes",
}
).drop(columns=["flag_old", "diff"])

# save it
salt = df_node_to_BottleFlags(df_out)
flagfile = validate_file(FLAGFILE, create=True)
save_node(flagfile, salt, 'salt', create_new=True)
save_node(flagfile, salt, "salt", create_new=True)


def exit_bokeh():
Expand Down Expand Up @@ -390,19 +483,33 @@ def selected_from_table(attr, old, new):

# build data tables
columns = []
fields = ["SSSCC", "SAMPNO", "CTDPRS", "CTDSAL", "SALNTY", "diff", "flag", "Comments"]
fields = [
"SSSCC",
"SAMPNO",
"CTDPRS",
"CTD Param",
"Reference",
"t_res",
"diff",
"o_res",
"flag",
"Comments",
]
ref_dict
titles = [
"SSSCC",
"Bottle",
"CTDPRS",
"CTDSAL",
"SALNTY",
"Residual",
"CTD Param",
"Reference",
"t_res",
"s_res",
"o_res",
"Flag",
"Comments",
]
widths = [50, 40, 65, 65, 65, 65, 15, 200]
for (field, title, width) in zip(fields, titles, widths):
widths = [50, 40, 65, 65, 65, 65, 65, 65, 15, 200]
for field, title, width in zip(fields, titles, widths):
if field == "flag":
strfmt_in = {"text_align": "center", "font_style": "bold"}
elif field == "Comments":
Expand All @@ -422,7 +529,7 @@ def selected_from_table(attr, old, new):
fields = ["SSSCC", "SAMPNO", "diff", "flag_old", "flag_new", "Comments"]
titles = ["SSSCC", "Bottle", "Residual", "Old", "New", "Comments"]
widths = [50, 40, 65, 15, 15, 375]
for (field, title, width) in zip(fields, titles, widths):
for field, title, width in zip(fields, titles, widths):
if field == "flag_old":
strfmt_in = {"text_align": "center", "font_style": "bold"}
elif field == "flag_new":
Expand All @@ -444,7 +551,8 @@ def selected_from_table(attr, old, new):
source=src_table,
columns=columns,
index_width=20,
width=565 + 50 + 20, # sum of col widths + fudge factor + idx width
# width is originally 565 + 50 + 20 = 635
width=565 + 150 + 20, # sum of col widths + fudge factor + idx width
height=600,
editable=True,
fit_columns=False,
Expand All @@ -461,7 +569,9 @@ def selected_from_table(attr, old, new):
sortable=False,
)
data_table_title = Div(text="""<b>All Station Data:</b>""", width=200, height=15)
data_table_changed_title = Div(text="""<b>Flagged Data:</b>""", width=200, height=15)
data_table_changed_title = Div(
text="""<b>Flagged Data:</b>""", width=200, height=15
)

controls = column(
parameter,
Expand All @@ -483,7 +593,7 @@ def selected_from_table(attr, old, new):
data_table_title, data_table, data_table_changed_title, data_table_changed
)

curdoc().add_root(row(controls, tables, fig))
curdoc().add_root(row(controls, tables, fig, fig2))
curdoc().title = "CTDO Data Flagging Tool"

update_selectors()

0 comments on commit a196c1b

Please sign in to comment.