diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py index 786bf56a..7db800c0 100644 --- a/isatools/isatab/dump/write.py +++ b/isatools/isatab/dump/write.py @@ -16,6 +16,7 @@ ) from isatools.isatab.defaults import log from isatools.isatab.graph import _all_end_to_end_paths, _longest_path_and_attrs +from isatools.model.utils import _build_paths_and_indexes from isatools.isatab.utils import ( get_comment_column, get_pv_columns, @@ -260,24 +261,21 @@ def flatten(current_list): columns = [] - # start_nodes, end_nodes = _get_start_end_nodes(a_graph) - paths = _all_end_to_end_paths( - a_graph, [x for x in a_graph.nodes() - if isinstance(a_graph.indexes[x], Sample)]) + paths, indexes = _build_paths_and_indexes(assay_obj.process_sequence) if len(paths) == 0: log.info("No paths found, skipping writing assay file") continue - if _longest_path_and_attrs(paths, a_graph.indexes) is None: + if _longest_path_and_attrs(paths, indexes) is None: raise IOError( "Could not find any valid end-to-end paths in assay graph") protocol_in_path_count = 0 - for node_index in _longest_path_and_attrs(paths, a_graph.indexes): - node = a_graph.indexes[node_index] + output_label_in_path_counts = {} + name_label_in_path_counts = {} + for node_index in _longest_path_and_attrs(paths, indexes): + node = indexes[node_index] if isinstance(node, Sample): olabel = "Sample Name" - # olabel = "Sample Name.{}".format(sample_in_path_count) - # sample_in_path_count += 1 columns.append(olabel) columns += flatten( map(lambda x: get_comment_column(olabel, x), @@ -307,28 +305,22 @@ def flatten(current_list): protocol_type = node.executes_protocol.protocol_type.lower() if protocol_type in protocol_types_dict and\ - protocol_types_dict[protocol_type][HEADER]: + protocol_types_dict[protocol_type][HEADER]: oname_label = protocol_types_dict[protocol_type][HEADER] - else: - oname_label = None - - if oname_label is not None: - columns.append(oname_label) - - if node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nucleic acid hybridization"][SYNONYMS]: + if oname_label not in name_label_in_path_counts: + name_label_in_path_counts[oname_label] = 0 + + new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label]) + columns.append(new_oname_label) + name_label_in_path_counts[oname_label] += 1 + + if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]: columns.append("Array Design REF") - + columns += flatten( map(lambda x: get_comment_column(olabel, x), node.comments)) - for output in [x for x in node.outputs if isinstance(x, DataFile)]: - if output.label not in columns: - columns.append(output.label) - columns += flatten( - map(lambda x: get_comment_column(output.label, x), - output.comments)) elif isinstance(node, Material): olabel = node.type columns.append(olabel) @@ -340,7 +332,16 @@ def flatten(current_list): node.comments)) elif isinstance(node, DataFile): - pass # handled in process + output_label = node.label + if output_label not in output_label_in_path_counts: + output_label_in_path_counts[output_label] = 0 + new_output_label = output_label + "." + str(output_label_in_path_counts[output_label]) + + columns.append(new_output_label) + output_label_in_path_counts[output_label] += 1 + columns += flatten( + map(lambda x: get_comment_column(new_output_label, x), + node.comments)) omap = get_object_column_map(columns, columns) @@ -355,8 +356,10 @@ def pbar(x): df_dict[k].extend([""]) protocol_in_path_count = 0 + output_label_in_path_counts = {} + name_label_in_path_counts = {} for node_index in path_: - node = a_graph.indexes[node_index] + node = indexes[node_index] if isinstance(node, Process): olabel = "Protocol REF.{}".format(protocol_in_path_count) protocol_in_path_count += 1 @@ -368,18 +371,19 @@ def pbar(x): protocol_type = node.executes_protocol.protocol_type.lower() if protocol_type in protocol_types_dict and\ - protocol_types_dict[protocol_type][HEADER]: + protocol_types_dict[protocol_type][HEADER]: oname_label = protocol_types_dict[protocol_type][HEADER] - else: - oname_label = None - - if oname_label is not None: - df_dict[oname_label][-1] = node.name + if oname_label not in name_label_in_path_counts: + name_label_in_path_counts[oname_label] = 0 + + new_oname_label = oname_label + "." + str(name_label_in_path_counts[oname_label]) + df_dict[new_oname_label][-1] = node.name + name_label_in_path_counts[oname_label] += 1 + + if protocol_type in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]: + df_dict["Array Design REF"][-1] = \ + node.array_design_ref - if node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nucleic acid hybridization"][SYNONYMS]: - df_dict["Array Design REF"][-1] = node.array_design_ref - if node.date is not None: df_dict[olabel + ".Date"][-1] = node.date if node.performer is not None: @@ -391,23 +395,8 @@ def pbar(x): colabel = "{0}.Comment[{1}]".format(olabel, co.name) df_dict[colabel][-1] = co.value - for output in [x for x in node.outputs if isinstance(x, DataFile)]: - output_by_type = [] - delim = ";" - olabel = output.label - if output.label not in columns: - columns.append(output.label) - output_by_type.append(output.filename) - df_dict[olabel][-1] = delim.join(map(str, output_by_type)) - - for co in output.comments: - colabel = "{0}.Comment[{1}]".format(olabel, co.name) - df_dict[colabel][-1] = co.value - elif isinstance(node, Sample): olabel = "Sample Name" - # olabel = "Sample Name.{}".format(sample_in_path_count) - # sample_in_path_count += 1 df_dict[olabel][-1] = node.name for co in node.comments: colabel = "{0}.Comment[{1}]".format( @@ -434,7 +423,17 @@ def pbar(x): df_dict[colabel][-1] = co.value elif isinstance(node, DataFile): - pass # handled in process + output_label = node.label + if output_label not in output_label_in_path_counts: + output_label_in_path_counts[output_label] = 0 + new_output_label = output_label + "." + str(output_label_in_path_counts[output_label]) + df_dict[new_output_label][-1] = node.filename + output_label_in_path_counts[output_label] += 1 + + for co in node.comments: + colabel = "{0}.Comment[{1}]".format( + new_output_label, co.name) + df_dict[colabel][-1] = co.value DF = DataFrame(columns=columns) DF = DF.from_dict(data=df_dict) @@ -482,6 +481,11 @@ def pbar(x): columns[i] = "Protocol REF" elif "." in col: columns[i] = col[:col.rindex(".")] + else: + for output_label in output_label_in_path_counts: + if output_label in col: + columns[i] = output_label + break log.debug("Rendered {} paths".format(len(DF.index))) if len(DF.index) > 1: @@ -521,8 +525,6 @@ def write_value_columns(df_dict, label, x): elif x.unit.term_source.name: df_dict[label + ".Unit.Term Source REF"][-1] = x.unit.term_source.name - # df_dict[label + ".Unit.Term Source REF"][-1] = \ - # x.unit.term_source.name if x.unit.term_source else "" df_dict[label + ".Unit.Term Accession Number"][-1] = \ x.unit.term_accession else: diff --git a/isatools/isatab/load/ProcessSequenceFactory.py b/isatools/isatab/load/ProcessSequenceFactory.py index f9453595..46006b5e 100644 --- a/isatools/isatab/load/ProcessSequenceFactory.py +++ b/isatools/isatab/load/ProcessSequenceFactory.py @@ -1,3 +1,5 @@ +import re + from isatools.isatab.utils import process_keygen, find_lt, find_gt, pairwise, get_object_column_map, get_value from isatools.isatab.defaults import ( log, @@ -146,7 +148,7 @@ def create_from_df(self, DF): except KeyError: pass - for data_col in [x for x in DF.columns if x.endswith(" File")]: + for data_col in [x for x in DF.columns if x in _LABELS_DATA_NODES]: filenames = [x for x in DF[data_col].drop_duplicates() if x != ''] data.update(dict(map(lambda x: (':'.join([data_col, x]), DataFile(filename=x, label=data_col)), filenames))) @@ -167,7 +169,7 @@ def get_node_by_label_and_key(labl, this_key): n = samples[lk] elif labl in ('Extract Name', 'Labeled Extract Name'): n = other_material[lk] - elif labl.endswith(' File'): + elif labl in _LABELS_DATA_NODES: n = data[lk] return n @@ -410,7 +412,7 @@ def get_node_by_label_and_key(labl, this_key): process_key = process_keygen(protocol_ref, column_group, _cg, DF.columns, object_series, _, DF) process_key_sequence.append(process_key) - if object_label.endswith(' File'): + if object_label in _LABELS_DATA_NODES: data_node = None try: data_node = get_node_by_label_and_key(object_label, str(object_series[object_label])) diff --git a/isatools/isatab/utils.py b/isatools/isatab/utils.py index ed06f6af..807436f8 100644 --- a/isatools/isatab/utils.py +++ b/isatools/isatab/utils.py @@ -496,7 +496,7 @@ def get_object_column_map(isatab_header, df_columns): """ labels = _LABELS_MATERIAL_NODES + _LABELS_DATA_NODES if set(isatab_header) == set(df_columns): - object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x] + object_index = [i for i, x in enumerate(df_columns) if x in labels or 'Protocol REF' in x or ' File' in x] else: object_index = [i for i, x in enumerate(isatab_header) if x in labels + ['Protocol REF']] diff --git a/isatools/model/utils.py b/isatools/model/utils.py index bf24ca15..df8bc001 100644 --- a/isatools/model/utils.py +++ b/isatools/model/utils.py @@ -1,3 +1,5 @@ +import itertools + import networkx as nx import os @@ -17,6 +19,192 @@ def find(predictor, iterable): return None, it +def _compute_combinations(identifier_list, identifiers_to_objects): + """Compute the combinations of identifiers in identifier_list. + + Return a list of the combinations of identifiers in identifier_list based + on the input/output type. + + :param list identifier_list: a list of identifiers to create combinations out of. + :param dict identifiers_to_objects: a dictionary mapping the identifiers to objects, used to determine IO type. + :returns: a list of tuples where each tuple is a combination of the identifiers. + """ + io_types = {} + for identifier in identifier_list: + io_object = identifiers_to_objects[identifier] + if isinstance(io_object, DataFile): + label = io_object.label + if label not in io_types: + io_types[label] = [identifier] + else: + io_types[label].append(identifier) + else: + if "Material" not in io_types: + io_types["Material"] = [identifier] + else: + io_types["Material"].append(identifier) + combinations = [item for item in list(itertools.product(*[values for values in io_types.values()])) if item] + return combinations + + +def _expand_path(path, identifiers_to_objects, dead_end_outputs): + """Expand the path by adding additional nodes if possible. + + :param list path: a list of identifiers representing a path to be expanded. + :param dict identifiers_to_objects: a dictionary mapping the identifiers to objects, used to determine IO type. + :param set dead_end_outputs: a set of identifiers that are outputs which are not correspondingly inputs. + :returns: a list of lists where each list is an expansion of the path, and a boolean that is true if the path was able to be expanded. + """ + new_paths = [] + path_len = len(path) + path_modified = False + for i, identifier in enumerate(path): + node = identifiers_to_objects[identifier] + + # If the node is a process at beginning of the path, add a path for each of its inputs. + if i == 0 and isinstance(node, Process): + identifier_list = [input_.sequence_identifier for input_ in node.inputs] + combinations = _compute_combinations(identifier_list, identifiers_to_objects) + for combo in combinations: + new_path = list(combo) + path + path_modified = True + if new_path not in new_paths: + new_paths.append(new_path) + continue + + # If the node is a process at the end of the path, add a path for each of its outputs. + if i == path_len - 1 and isinstance(node, Process): + identifier_list = [output.sequence_identifier for output in node.outputs] + combinations = _compute_combinations(identifier_list, identifiers_to_objects) + for combo in combinations: + new_path = path + list(combo) + path_modified = True + if new_path not in new_paths: + new_paths.append(new_path) + continue + + # If the node is a process in the middle of the path and the next node in the path is also a process, + # add paths for each output that is also an input to the next process. + if i + 1 < path_len and isinstance(identifiers_to_objects[path[i+1]], Process) and i > 0 and isinstance(node, Process): + output_sequence_identifiers = {output.sequence_identifier for output in node.outputs} + input_sequence_identifiers = {input_.sequence_identifier for input_ in identifiers_to_objects[path[i+1]].inputs} + identifier_intersection = output_sequence_identifiers.intersection(input_sequence_identifiers) + + combinations = _compute_combinations(identifier_intersection, identifiers_to_objects) + for combo in combinations: + new_path = path[0:i+1] + list(combo) + path[i+1:] + path_modified = True + if new_path not in new_paths: + new_paths.append(new_path) + + # Add outputs that aren't later used as inputs. + for output in output_sequence_identifiers.intersection(dead_end_outputs): + new_path = path[:i+1] + [output] + path_modified = True + if new_path not in new_paths: + new_paths.append(new_path) + continue + return new_paths, path_modified + + +def _build_paths_and_indexes(process_sequence=None): + """Find all the paths within process_sequence and all the nodes. + + :param list process_sequence: a list of processes. + :returns: The paths from source/sample to end points and a mapping of sequence_identifier to object. + """ + # Determining paths depends on processes having next and prev sequence, so add + # them if they aren't there based on inputs and outputs. + inputs_to_process = {id(p_input):{"process":process, "input":p_input} for process in process_sequence for p_input in process.inputs} + outputs_to_process = {id(output):{"process":process, "output":output} for process in process_sequence for output in process.outputs} + for output, output_dict in outputs_to_process.items(): + if output in inputs_to_process: + if not inputs_to_process[output]["process"].prev_process: + inputs_to_process[output]["process"].prev_process = output_dict["process"] + if not output_dict["process"].next_process: + output_dict["process"].next_process = inputs_to_process[output]["process"] + + paths = [] + identifiers_to_objects = {} + all_inputs = set() + all_outputs = set() + # For each process in the process sequence create a list of sequence identifiers representing + # the path obtained by simply following the next and prev sequences. Also create a dictionary, + # identifiers_to_objects to be able to easily reference an object from its identifier later. + for process in process_sequence: + + identifiers_to_objects[process.sequence_identifier] = process + for output in process.outputs: + identifiers_to_objects[output.sequence_identifier] = output + all_outputs.add(output.sequence_identifier) + for input_ in process.inputs: + identifiers_to_objects[input_.sequence_identifier] = input_ + all_inputs.add(input_.sequence_identifier) + + + original_process = process + + right_processes = [] + while next_process := process.next_process: + right_processes.append(next_process.sequence_identifier) + process = next_process + + left_processes = [] + process = original_process + while prev_process := process.prev_process: + left_processes.append(prev_process.sequence_identifier) + process = prev_process + left_processes = list(reversed(left_processes)) + + paths.append(left_processes + [original_process.sequence_identifier] + right_processes) + + + # Trim paths down to only the unique paths. + unique_paths = [list(x) for x in set(tuple(x) for x in paths)] + paths = unique_paths + dead_end_outputs = all_outputs - all_inputs + + # Paths have to be expanded out combinatorially based on inputs and outputs. + # paths is only processes, so expand them out to include inputs and outputs. + str_path_to_path = {} + was_path_modified = {} + paths_seen = [] + paths_seen_twice = [] + # Keep looping until there are no new paths created. + while True: + new_paths = [] + paths_seen_changed = False + for path in paths: + str_path = str(path) + str_path_to_path[str_path] = path + if path not in paths_seen: + paths_seen.append(path) + paths_seen_changed = True + else: + paths_seen_twice.append(path) + continue + expanded_paths, path_modified = _expand_path(path, identifiers_to_objects, dead_end_outputs) + new_paths += expanded_paths + # This is supposed to catch different length paths. + if not path_modified and path not in new_paths: + new_paths.append(path) + + # Keep track of which paths are modified to use as a filter later. + if str_path in was_path_modified: + if path_modified: + was_path_modified[str_path] = path_modified + else: + was_path_modified[str_path] = path_modified + if not paths_seen_changed: + break + paths = new_paths + + # Ultimately only keep the paths created in the loop that were never modified. + paths = [str_path_to_path[path] for path, was_modified in was_path_modified.items() if not was_modified] + + return paths, identifiers_to_objects + + def _build_assay_graph(process_sequence=None): """:obj:`networkx.DiGraph` Returns a directed graph object based on a given ISA process sequence.""" diff --git a/isatools/tests/utils.py b/isatools/tests/utils.py index 3597534d..8a2ace36 100644 --- a/isatools/tests/utils.py +++ b/isatools/tests/utils.py @@ -103,12 +103,13 @@ def _assert_df_equal(x, y): break else: try: - for x, y in zip(sorted(dfx), sorted(dfy)): + for x, y in zip(dfx, dfy): if not _assert_df_equal(x, y): eq = False break except ValueError as e: log.error(e) + return False return eq else: diff --git a/tests/convert/test_json2isatab.py b/tests/convert/test_json2isatab.py index 08f22271..608e5a04 100644 --- a/tests/convert/test_json2isatab.py +++ b/tests/convert/test_json2isatab.py @@ -109,28 +109,28 @@ def test_json2isatab_validate_first(self): def test_json2isatab_convert_bii_i_1_investigation(self): with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 'i_investigation.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 'i_investigation.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) def test_json2isatab_convert_bii_i_1_study_table(self): with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 's_BII-S-1.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 's_BII-S-1.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) def test_json2isatab_convert_bii_i_1_study2_table(self): with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 's_BII-S-2.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1', 's_BII-S-2.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) def test_json2isatab_convert_bii_i_1_assay_table_metabolome(self): with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 'a_metabolome.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 'a_metabolome1.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) @@ -139,7 +139,7 @@ def test_json2isatab_convert_bii_i_1_assay_table_microarray(self): # FIXME: ArrayExpress comments come out twice (on Assay AND Derived Data File output from assay), # missing Data Transformation Name and Factor Values with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 'a_microarray.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 'a_microarray.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) @@ -147,7 +147,7 @@ def test_json2isatab_convert_bii_i_1_assay_table_microarray(self): def test_json2isatab_convert_bii_i_1_assay_table_proteome(self): # FIXME: Same duplication problem as above with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 'a_proteome.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 'a_proteome.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) @@ -155,7 +155,7 @@ def test_json2isatab_convert_bii_i_1_assay_table_proteome(self): def test_json2isatab_convert_bii_i_1_assay_table_transcriptome(self): # FIXME: Has inserted Protocol REFs but Array Design REF, Scan Name, Factor Values with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as json_fp: - json2isatab.convert(json_fp, self._tmp_dir) + json2isatab.convert(json_fp, self._tmp_dir, validate_first=False) with open(os.path.join(self._tmp_dir, 'a_transcriptome.txt')) as out_fp: with open(os.path.join(self._tab_data_dir, 'BII-I-1_written_by_isatab', 'a_transcriptome.txt')) as reference_fp: self.assertTrue(assert_tab_content_equal(out_fp, reference_fp)) @@ -164,7 +164,7 @@ def test_json2isatab_convert_write_factor_values_in_assay_table(self): with open(os.path.join(self._json_data_dir, "BII-I-1", "BII-I-1.json")) as json_fp: json2isatab.convert( - json_fp, self._tmp_dir, write_factor_values_in_assay_table=True + json_fp, self._tmp_dir, write_factor_values_in_assay_table=True, validate_first=False ) with open( os.path.join(self._tmp_dir, "a_transcriptome.txt")) as out_fp: diff --git a/tests/isatab/test_isatab.py b/tests/isatab/test_isatab.py index 9c586264..05f3b97b 100644 --- a/tests/isatab/test_isatab.py +++ b/tests/isatab/test_isatab.py @@ -1598,6 +1598,64 @@ def test_sample_protocol_ref_material_protocol_multiple_output_data(self): self.assertIn(expected_line1, dumps_out) self.assertIn(expected_line3, dumps_out) + + def test_sample_protocol_ref_material_protocol_multiple_process_multiple_files(self): + investigation = Investigation() + study = Study( + filename='s_test.txt', + protocols=[Protocol(name='protocol1', protocol_type="mass spectrometry"), + Protocol(name='protocol2', protocol_type="data transformation"), + Protocol(name='protocol3', protocol_type="data transformation")] + ) + sample1 = Sample(name='sample1') + sample2 = Sample(name='sample2') + data1 = DataFile(filename='datafile1.raw', label='Raw Data File') + data2 = DataFile(filename='datafile2.raw', label='Derived Data File') + data3 = DataFile(filename='datafile3.raw', label='Derived Data File') + data4 = DataFile(filename='datafile4.raw', label='Raw Data File') + data5 = DataFile(filename='datafile5.raw', label='Derived Data File') + + process1 = Process(executes_protocol = study.protocols[0], name = "process1") + process1.inputs = [sample1] + process1.outputs = [data1] + + process2 = Process(executes_protocol = study.protocols[1], name = "process2") + process2.inputs = [data1] + process2.outputs = [data2] + plink(process1, process2) + + process3 = Process(executes_protocol = study.protocols[2], name = "process3") + process3.inputs = [data2] + process3.outputs = [data3] + plink(process2, process3) + + process4 = Process(executes_protocol = study.protocols[0], name = "process4") + process4.inputs = [sample2] + process4.outputs = [data4] + + process5 = Process(executes_protocol = study.protocols[2], name = "process5") + process5.inputs = [data4] + process5.outputs = [data5] + plink(process4, process5) + + assay = Assay(filename='a_test.txt') + assay.process_sequence = [process1, process2, process3, process4, process5] + study.assays = [assay] + investigation.studies = [study] + + expected_line1 = ("Sample Name\tProtocol REF\tMS Assay Name\tRaw Data File\tProtocol REF" + "\tData Transformation Name\tDerived Data File\tProtocol REF" + "\tData Transformation Name\tDerived Data File") + expected_line2 = ("sample1\tprotocol1\tprocess1\tdatafile1.raw\tprotocol2" + "\tprocess2\tdatafile2.raw\tprotocol3\tprocess3\tdatafile3.raw") + expected_line3 = """sample2\tprotocol1\tprocess4\tdatafile4.raw\tprotocol3\tprocess5\tdatafile5.raw""" + dumps_out = replace_windows_newlines(isatab.dumps(investigation)) + # with open('C:/Users/Sparda/Desktop/isatools/test.txt', 'wb') as outFile: + # outFile.write(dumps_out.encode("utf-8")) + + self.assertIn(expected_line1, dumps_out) + self.assertIn(expected_line2, dumps_out) + self.assertIn(expected_line3, dumps_out) class UnitTestIsaTabLoad(unittest.TestCase): diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py index 401385c0..1b73d599 100644 --- a/tests/isatab/validate/test_core.py +++ b/tests/isatab/validate/test_core.py @@ -24,14 +24,13 @@ def test_mtbls267(self): data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'MTBLS267-partial') with open(path.join(data_path, 'i_Investigation.txt'), 'r') as data_file: r = validate(fp=data_file, config_dir=self.default_conf, origin="mzml2isa") - print(r['warnings']) - self.assertEqual(len(r['errors']), 5) + self.assertEqual(len(r['errors']), 4) def test_mtbls_1846(self): data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'mtbls', 'MTBLS1846') with open(path.join(data_path, 'i_Investigation.txt'), 'r') as data_file: r = validate(fp=data_file, config_dir=self.default_conf) - self.assertEqual(len(r['errors']), 33) + self.assertEqual(len(r['errors']), 20) def test_bii_i_1(self): data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'BII-I-1')