Add decide_graph_type

This also renames all features to y_FEATURE So instead of THROUGHPUT the column will be y_THROUGHPUT That allows to find "outputs"/features easily without any metadata given to the dataset
tbarbette · Apr 22, 2024 · 02c88c6 · 02c88c6
1 parent 029c5cc
commit 02c88c6
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 14 deletions.
diff --git a/npf/graph_choice.py b/npf/graph_choice.py
@@ -1,13 +1,13 @@
 from npf import npf
 
 
-def decide_graph_type(grapher, key, vars_all, vars_values, result_type, ndyn, isubplot):
+def decide_graph_type(config, n_values, data_for_key, result_type, ndyn, isubplot):
     graph_type = False
     if ndyn == 0:
-        graph_type = "boxplot" if len(vars_all) == 1 else "simple_bar"
-    elif ndyn == 1 and len(vars_all) > 2 and npf.all_num(vars_values[key]):
+        graph_type = "boxplot" if n_values == 1 else "simple_bar"
+    elif ndyn == 1 and n_values > 2 and npf.all_num(data_for_key):
         graph_type = "line"
-    graph_types = grapher.config("graph_type", [])
+    graph_types = config("graph_type", [])
 
     if len(graph_types) > 0 and (type(graph_types[0]) is tuple or type(graph_types) is tuple):
         if type(graph_types) is tuple:
@@ -35,4 +35,4 @@ def decide_graph_type(grapher, key, vars_all, vars_values, result_type, ndyn, is
               "as a line without dynamic variables")
         graph_type = "simple_bar"
 
-    return graph_type
+    return graph_type if graph_type else "bar"
diff --git a/npf/grapher.py b/npf/grapher.py
@@ -528,6 +528,7 @@ def extract_variable_to_series(self, key, vars_values, all_results, dyns, build,
             series.append((script, nb, newserie))
             self.glob_legend_title = self.var_name(key)
         vars_all = list(new_varsall)
+
         if len(dyns) == 1:
             key = dyns[0]
             do_sort = True
@@ -694,13 +695,14 @@ def results_divide(res,a,b):
                     try:
 
                         labels = [k[1] if type(k) is tuple else k for k,v in x.variables.items()]
-                        x_vars = [[v[1] if type(v) is tuple else v for k,v in x.variables.items()]]
+                        x_vars = [[(v[1] if type(v) is tuple else v) for k,v in x.variables.items()]]
                         x_vars=pd.DataFrame(x_vars,index=[0],columns=labels)
                         x_vars=pd.concat([pd.DataFrame({'build' :build.pretty_name()},index=[0]), pd.DataFrame({'test_index' :i},index=[0]), x_vars],axis=1)
+
                         vals = all_results[x]
                         if not vals:
                             continue
-                        x_data=pd.DataFrame.from_dict(vals,orient='index').transpose() #Use orient='index' to handle lists with different lengths
+                        x_data=pd.DataFrame.from_dict( {"y_"+k: v for k, v in vals.items()},orient='index').transpose() #Use orient='index' to handle lists with different lengths
                         if len(x_data) == 0:
                             continue
                         x_data['run_index']=x_data.index
@@ -914,7 +916,7 @@ def results_divide(res,a,b):
 
         # Export to Jupyter notebook
         if options.notebook is not None:
-            prepare_notebook_export(series, all_results_df, options.notebook)
+            prepare_notebook_export(series, all_results_df, options.notebook, self.config)
 
 
     def graph_group(self, series, vars_values, filename, fileprefix, title):
@@ -1026,6 +1028,9 @@ def lam(x):
 
 
         versions = []
+        """Vars_all is the set of all variable combination that have some value. Taking the iperf case, it will be
+        [ZERO_COPY=0, PARALLEL=1], [ZERO_COPY=0, PARALLEL=2], ... [ZERO_COPY=1, PARALLEL=8],
+        """
         vars_all = OrderedSet()
         for i, (test, build, all_results) in enumerate(series):
             versions.append(build.pretty_name())
@@ -1386,7 +1391,7 @@ def generate_plot_for_graph(self, i, i_subplot, figure, n_cols, n_lines, vars_va
                     horizontal = False
                     default_add_legend = True
 
-                    graph_type = decide_graph_type(self, key, VARS_ALL, vars_values, result_type, NDYN, ISUBPLOT)
+                    graph_type = decide_graph_type(self.config, n_values=len(VARS_ALL), data_for_key=vars_values[key], result_type=result_type, ndyn=NDYN, isubplot=ISUBPLOT)
 
 
                     try:

diff --git a/npf/types/notebook/notebook.py b/npf/types/notebook/notebook.py
@@ -1,3 +1,4 @@
+from typing import List
 import nbformat as nbf
 from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
 from jupyter_client.kernelspec import NoSuchKernel
@@ -8,31 +9,53 @@
 import os
 import time
 
+import pandas as pd
+from npf.graph_choice import decide_graph_type
+
 INDENT_DATA = True
 
 
-def prepare_notebook_export(datasets, all_results_df, path):
+def prepare_notebook_export(datasets: List[tuple], all_results_df:pd.DataFrame, path:str, config):
     # SIMTODO: (help) why could there be multiple datasets?
+    # TODO: with npf-compare there might be multiple dataset. Try the netperf vs iperf experiment from the examples
     dataset = datasets[0]
     test, build, runs = dataset
-    var_names = dict(datasets[0][0].config["var_names"])
-
+    var_names = dict(test.config["var_names"])
     x_vars = list(test.variables.dynamics().keys())
-    y_vars = list(list(runs.values())[0].keys())
+
+    y_vars = list(filter(lambda x:x.startswith("y_"),all_results_df.columns))
 
     # variables that get replaced in the template notebook
     variables = {
         "name": test.get_title(),
         "x_vars": x_vars,
         "x_names": get_name(x_vars, var_names),
         "y_vars": y_vars,
-        "y_names": get_name(y_vars, var_names),
+        "y_names": get_name([y[2:] for y in y_vars], var_names),
         "data": dumps(all_results_df.to_dict(orient="records"), indent=4 if INDENT_DATA else None),
         "dir_name": os.path.dirname(path),
         "file_path": ".".join(path.split(".")[:-1]),  # remove extension
         "file_name": ".".join(path.split("/")[-1].split(".")[:-1]),
     }
 
+    key = x_vars[0]
+    # TODO : Select a suitable key when there are multiple values
+
+
+    # TODO : there might be many result types
+    result_type = y_vars[0]
+
+    n_values = len(all_results_df[x_vars].value_counts())
+
+    # graph type
+    graph_type = decide_graph_type(config,
+                                   n_values,
+                                   data_for_key=all_results_df[key].unique(),
+                                   result_type=result_type,
+                                   ndyn=len(x_vars), isubplot=0)
+
+    print("Graph type : ", graph_type)
+
     # read template notebook
     with open("npf/types/notebook/template.ipynb") as f:
         nb = nbf.read(f, as_version=4)