diff --git a/metax/peptide_annotator/pep2taxafunc.py b/metax/peptide_annotator/pep_to_taxafunc.py similarity index 97% rename from metax/peptide_annotator/pep2taxafunc.py rename to metax/peptide_annotator/pep_to_taxafunc.py index cc4cb42..f5a3159 100644 --- a/metax/peptide_annotator/pep2taxafunc.py +++ b/metax/peptide_annotator/pep_to_taxafunc.py @@ -53,14 +53,21 @@ import sqlite3 class Pep2TaxaFunc: - def __init__(self, db_path: str|None = None, threshold: float = 1.0, genome_mode: bool = True, conn = None): + def __init__( + self, + db_path: str | None = None, + threshold: float = 1.0, + genome_mode: bool = True, + conn=None, + protein_genome_separator="_", + ): if db_path is None and conn is None: raise ValueError('Please provide the path of the database or the connection of the database') self.db_path = db_path self.threshold = threshold self.genome_mode = genome_mode - self.protein_genome_separator = '_' + self.protein_genome_separator = protein_genome_separator self.conn = conn or self.open_eggnog_db() diff --git a/metax/peptide_annotator/peptable_annotator.py b/metax/peptide_annotator/peptable_annotator.py index c27289b..48ade6a 100644 --- a/metax/peptide_annotator/peptable_annotator.py +++ b/metax/peptide_annotator/peptable_annotator.py @@ -7,16 +7,16 @@ import threading import sqlite3 if __name__ == '__main__': - from pep2taxafunc import Pep2TaxaFunc + from pep_to_taxafunc import Pep2TaxaFunc from convert_id_to_name import add_pathway_name_to_df, add_ec_name_to_df, add_ko_name_to_df else: - from .pep2taxafunc import Pep2TaxaFunc + from .pep_to_taxafunc import Pep2TaxaFunc from .convert_id_to_name import add_pathway_name_to_df, add_ec_name_to_df, add_ko_name_to_df class PeptideAnnotator: def __init__(self, db_path:str, peptide_path: str, output_path: str, - threshold=1.0, genome_mode=True, protein_separator=';', + threshold=1.0, genome_mode=True, protein_separator=';', protein_genome_separator = '_', protein_col='Proteins', peptide_col='Sequence', sample_col_prefix='Intensity_'): self.db_path = db_path @@ -25,7 +25,8 @@ def __init__(self, db_path:str, peptide_path: str, output_path: str, self.threshold = round(float(threshold), 4) self.genome_mode = genome_mode - self.protein_separator = protein_separator + self.protein_separator = protein_separator # the separator between proteins in the proteins group column + self.protein_genome_separator = protein_genome_separator # the separator between protein and genome in each protein ID self.protein_col = protein_col self.peptide_col = peptide_col self.sample_col_prefix = sample_col_prefix @@ -43,6 +44,7 @@ def get_pep2taxafunc(self): threshold=self.threshold, genome_mode=self.genome_mode, conn=self.get_connection(), + protein_genome_separator = self.protein_genome_separator ) return self.thread_local.p2tf @@ -184,9 +186,9 @@ def run_annotate(self): return df_res if __name__ == '__main__': - final_peptides_path = 'C:/Users/max/Desktop/MetaX_Suite/MetaX/metax/metax/data/example_data/Example_final_peptide.tsv' - output_path = 'C:/Users/max/Desktop/Example_OTF.tsv' - db_path = 'C:/Users/max/Desktop/MetaX_Suite/MetaX-human-gut-new.db' + final_peptides_path = 'peptides.tsv' + output_path = 'OTF.tsv' + db_path = 'metax.db' threshold = 1 t0 = time.time() @@ -197,9 +199,10 @@ def run_annotate(self): threshold=threshold, genome_mode=True, protein_separator=';', + protein_genome_separator = '-', protein_col='Proteins', peptide_col='Sequence', - sample_col_prefix='Intensity_' + sample_col_prefix='CHFL' ) annotator.run_annotate()