ISA-tools · ptth222 · Mar 16, 2024 · Mar 17, 2024 · Mar 21, 2024 · May 20, 2024
diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py
@@ -16,6 +16,7 @@
 )
 from isatools.isatab.defaults import log
 from isatools.isatab.graph import _all_end_to_end_paths, _longest_path_and_attrs
+from isatools.model.utils import _build_paths_and_indexes
 from isatools.isatab.utils import (
     get_comment_column,
     get_pv_columns,
@@ -260,24 +261,21 @@ def flatten(current_list):
 
             columns = []
 
-            # start_nodes, end_nodes = _get_start_end_nodes(a_graph)
-            paths = _all_end_to_end_paths(
-                a_graph, [x for x in a_graph.nodes()
-                          if isinstance(a_graph.indexes[x], Sample)])
+            paths, indexes = _build_paths_and_indexes(assay_obj.process_sequence)
             if len(paths) == 0:
                 log.info("No paths found, skipping writing assay file")
                 continue
-            if _longest_path_and_attrs(paths, a_graph.indexes) is None:
+            if _longest_path_and_attrs(paths, indexes) is None:
                 raise IOError(
                     "Could not find any valid end-to-end paths in assay graph")
 
             protocol_in_path_count = 0
-            for node_index in _longest_path_and_attrs(paths, a_graph.indexes):
-                node = a_graph.indexes[node_index]
+            output_label_in_path_counts = {}
+            name_label_in_path_counts = {}
+            for node_index in _longest_path_and_attrs(paths, indexes):
+                node = indexes[node_index]
                 if isinstance(node, Sample):
                     olabel = "Sample Name"
-                    # olabel = "Sample Name.{}".format(sample_in_path_count)
-                    # sample_in_path_count += 1
                     columns.append(olabel)
                     columns += flatten(
                         map(lambda x: get_comment_column(olabel, x),
@@ -307,28 +305,22 @@ def flatten(current_list):
                             protocol_type = node.executes_protocol.protocol_type.lower()
 
                         if protocol_type in protocol_types_dict and\
-                            protocol_types_dict[protocol_type][HEADER]:
+                           protocol_types_dict[protocol_type][HEADER]:
                             oname_label = protocol_types_dict[protocol_type][HEADER]
-                        else:
-                            oname_label = None
-
-                        if oname_label is not None:
-                            columns.append(oname_label)
-
-                            if node.executes_protocol.protocol_type.term.lower() in \
-                                    protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
+                            if oname_label not in name_label_in_path_counts:
+                                name_label_in_path_counts[oname_label] = 0
+                                
+                            new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label])
+                            columns.append(new_oname_label)
+                            name_label_in_path_counts[oname_label] += 1
+                                
+                            if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
                                 columns.append("Array Design REF")
-
+                        
                     columns += flatten(
                         map(lambda x: get_comment_column(olabel, x),
                             node.comments))
 
-                    for output in [x for x in node.outputs if isinstance(x, DataFile)]:
-                        if output.label not in columns:
-                            columns.append(output.label)
-                        columns += flatten(
-                            map(lambda x: get_comment_column(output.label, x),
-                                output.comments))
                 elif isinstance(node, Material):
                     olabel = node.type
                     columns.append(olabel)
@@ -340,7 +332,16 @@ def flatten(current_list):
                             node.comments))
 
                 elif isinstance(node, DataFile):
-                    pass  # handled in process
+                    output_label = node.label
+                    if output_label not in output_label_in_path_counts:
+                        output_label_in_path_counts[output_label] = 0
+                    new_output_label = output_label + "." + str(output_label_in_path_counts[output_label])
+
+                    columns.append(new_output_label)
+                    output_label_in_path_counts[output_label] += 1
+                    columns += flatten(
+                        map(lambda x: get_comment_column(new_output_label, x),
+                            node.comments))
 
             omap = get_object_column_map(columns, columns)
 
@@ -355,8 +356,10 @@ def pbar(x):
                     df_dict[k].extend([""])
 
                 protocol_in_path_count = 0
+                output_label_in_path_counts = {}
+                name_label_in_path_counts = {}
                 for node_index in path_:
-                    node = a_graph.indexes[node_index]
+                    node = indexes[node_index]
                     if isinstance(node, Process):
                         olabel = "Protocol REF.{}".format(protocol_in_path_count)
                         protocol_in_path_count += 1
@@ -368,18 +371,19 @@ def pbar(x):
                                 protocol_type = node.executes_protocol.protocol_type.lower()
 
                             if protocol_type in protocol_types_dict and\
-                                protocol_types_dict[protocol_type][HEADER]:
+                               protocol_types_dict[protocol_type][HEADER]:
                                 oname_label = protocol_types_dict[protocol_type][HEADER]
-                            else:
-                                oname_label = None
-
-                            if oname_label is not None:
-                                df_dict[oname_label][-1] = node.name
+                                if oname_label not in name_label_in_path_counts:
+                                    name_label_in_path_counts[oname_label] = 0
+
+                                new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label])
+                                df_dict[new_oname_label][-1] = node.name
+                                name_label_in_path_counts[oname_label] += 1
+
+                                if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
+                                    df_dict["Array Design REF"][-1] = \
+                                        node.array_design_ref
 
-                                if node.executes_protocol.protocol_type.term.lower() in \
-                                        protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
-                                    df_dict["Array Design REF"][-1] = node.array_design_ref
-
                         if node.date is not None:
                             df_dict[olabel + ".Date"][-1] = node.date
                         if node.performer is not None:
@@ -391,23 +395,8 @@ def pbar(x):
                             colabel = "{0}.Comment[{1}]".format(olabel, co.name)
                             df_dict[colabel][-1] = co.value
 
-                        for output in [x for x in node.outputs if isinstance(x, DataFile)]:
-                            output_by_type = []
-                            delim = ";"
-                            olabel = output.label
-                            if output.label not in columns:
-                                columns.append(output.label)
-                            output_by_type.append(output.filename)
-                            df_dict[olabel][-1] = delim.join(map(str, output_by_type))
-
-                            for co in output.comments:
-                                colabel = "{0}.Comment[{1}]".format(olabel, co.name)
-                                df_dict[colabel][-1] = co.value
-
                     elif isinstance(node, Sample):
                         olabel = "Sample Name"
-                        # olabel = "Sample Name.{}".format(sample_in_path_count)
-                        # sample_in_path_count += 1
                         df_dict[olabel][-1] = node.name
                         for co in node.comments:
                             colabel = "{0}.Comment[{1}]".format(
@@ -434,7 +423,17 @@ def pbar(x):
                             df_dict[colabel][-1] = co.value
 
                     elif isinstance(node, DataFile):
-                        pass  # handled in process
+                        output_label = node.label
+                        if output_label not in output_label_in_path_counts:
+                            output_label_in_path_counts[output_label] = 0
+                        new_output_label = output_label + "." + str(output_label_in_path_counts[output_label])
+                        df_dict[new_output_label][-1] = node.filename
+                        output_label_in_path_counts[output_label] += 1
+
+                        for co in node.comments:
+                            colabel = "{0}.Comment[{1}]".format(
+                                new_output_label, co.name)
+                            df_dict[colabel][-1] = co.value
 
             DF = DataFrame(columns=columns)
             DF = DF.from_dict(data=df_dict)
@@ -482,6 +481,11 @@ def pbar(x):
                     columns[i] = "Protocol REF"
                 elif "." in col:
                     columns[i] = col[:col.rindex(".")]
+                else:
+                    for output_label in output_label_in_path_counts:
+                        if output_label in col:
+                            columns[i] = output_label
+                            break
 
             log.debug("Rendered {} paths".format(len(DF.index)))
             if len(DF.index) > 1:
@@ -521,8 +525,6 @@ def write_value_columns(df_dict, label, x):
                 elif x.unit.term_source.name:
                     df_dict[label + ".Unit.Term Source REF"][-1] = x.unit.term_source.name
 
-            # df_dict[label + ".Unit.Term Source REF"][-1] = \
-            #     x.unit.term_source.name if x.unit.term_source else ""
             df_dict[label + ".Unit.Term Accession Number"][-1] = \
                 x.unit.term_accession
         else:

diff --git a/isatools/isatab/load/ProcessSequenceFactory.py b/isatools/isatab/load/ProcessSequenceFactory.py
@@ -1,3 +1,5 @@
+import re
+
 from isatools.isatab.utils import process_keygen, find_lt, find_gt, pairwise, get_object_column_map, get_value
 from isatools.isatab.defaults import (
     log,
@@ -146,7 +148,7 @@ def create_from_df(self, DF):
         except KeyError:
             pass
 
-        for data_col in [x for x in DF.columns if x.endswith(" File")]:
+        for data_col in [x for x in DF.columns if x in _LABELS_DATA_NODES]:
             filenames = [x for x in DF[data_col].drop_duplicates() if x != '']
             data.update(dict(map(lambda x: (':'.join([data_col, x]), DataFile(filename=x, label=data_col)), filenames)))
 
@@ -167,7 +169,7 @@ def get_node_by_label_and_key(labl, this_key):
                 n = samples[lk]
             elif labl in ('Extract Name', 'Labeled Extract Name'):
                 n = other_material[lk]
-            elif labl.endswith(' File'):
+            elif labl in _LABELS_DATA_NODES:
                 n = data[lk]
             return n
 
@@ -410,7 +412,7 @@ def get_node_by_label_and_key(labl, this_key):
                     process_key = process_keygen(protocol_ref, column_group, _cg, DF.columns, object_series, _, DF)
                     process_key_sequence.append(process_key)
 
-                if object_label.endswith(' File'):
+                if object_label in _LABELS_DATA_NODES:
                     data_node = None
                     try:
                         data_node = get_node_by_label_and_key(object_label, str(object_series[object_label]))

diff --git a/isatools/isatab/utils.py b/isatools/isatab/utils.py
@@ -496,7 +496,7 @@ def get_object_column_map(isatab_header, df_columns):
     """
     labels = _LABELS_MATERIAL_NODES + _LABELS_DATA_NODES
     if set(isatab_header) == set(df_columns):
-        object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x]
+        object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x or ' File' in x]
     else:
         object_index = [i for i, x in enumerate(isatab_header) if x in labels + ['Protocol REF']]