-
Notifications
You must be signed in to change notification settings - Fork 0
/
NeoLizard_cli.py
executable file
·136 lines (116 loc) · 5.1 KB
/
NeoLizard_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python
import os
import logging
from lib.logger_config import configure_lib_logger, configure_logger
from lib.argparse_config import parse_the_args
from lib.path_handler import PathHandler
from lib.cmd_runner import CommandRunner
from lib.qc import QCPipeline
from lib.m2a import MAFtoAVInputConverter
from lib.annovar_functions import AnnovarPipeline
from lib.cropping_flanks import CroppingFlanksPipeline
from lib.MHCflurry_prediction import MHCflurryPipeline
from lib.lizard import print_lizard
from lib.cutadapt import CutadaptPipeline
from lib.data_gathering import PipelineData
from lib.HLA import HLAPipeline
from lib.database_operations import DatabaseOperations
def main():
"""
Main function of the NeoLizard pipeline.
Runs all requested steps.
author : mxvp
"""
args = parse_the_args() # Argparse
os.makedirs(args.output, exist_ok=True)
logfile = os.path.join(args.output, "NeoLizard.log") # File to write ALL logs to
configure_logger(logfile) # Configure the root logger
configure_lib_logger(logfile) # Configure the logger for lib files
pathing = PathHandler(args.input, args.output)
if not pathing.validate_paths():
logging.error("Invalid input paths. Aborting!")
return
command_runner = CommandRunner()
pipeline_data = PipelineData(pathing)
HLA_pipeline = HLAPipeline(pathing, command_runner)
for arg, value in vars(args).items():
if arg == "qc" and value == True:
# perform qc
qc_pipeline = QCPipeline(pathing, command_runner)
qc_pipeline.run_pipeline()
if arg == "cutadapt" and value == True:
# Perform cutadapt
cutadapt_pipeline = CutadaptPipeline(pathing, command_runner)
cutadapt_pipeline.run_cutadapt_pipeline(
args.cutadapt_commands, args.cutadapt_remove
)
if arg == "cmd" and value != None:
# Perform custom command
output_folder = pathing.output_subfolder(
value[0][0]
) # Cmd is a list of lists. Every list contains the requested string.
command_runner.configure_command(
pathing.input_path, output_folder, value[0]
)
pathing.update_input_input(output_folder)
args.cmd[0].pop(0)
if arg == "m2a" and value == True:
if args.TCGA_alleles:
# Gather HLA alleles from maf files
pipeline_data.link_HLA_ID_TCGA_to_MAF_samples()
# Temp custom source...
HLA_dict = HLA_pipeline.process_TCGA_HLA(
custom_source="./resources/panCancer_hla.tsv"
)
pipeline_data.link_HLA_TCGA_to_samples(HLA_dict)
# Perform MAF to AVInput conversion
m2a_pipeline = MAFtoAVInputConverter(pathing)
m2a_pipeline.run_pipeline()
# Gather sample names and mutation names here (filename to "." and filename to ". + lineX")
pipeline_data.link_samples_to_mutation_from_avinput()
if arg == "annovar_annotate_variation" and value == True:
# Perform annovar_annotate
annovar_pipeline = AnnovarPipeline(pathing, command_runner)
annovar_pipeline.run_annotate_variation_pipeline(
args.annovar_annotate_variation_commands
)
if arg == "annovar_coding_change" and value == True:
# Perform annovar_coding change
annovar_pipeline = AnnovarPipeline(pathing, command_runner)
annovar_pipeline.run_coding_change_pipeline(
args.annovar_coding_change_commands
)
# Link mutations to transcripts
pipeline_data.link_mutation_to_transcripts()
# Link transcripts to HLA_alleles
if args.TCGA_alleles:
pipeline_data.link_transcript_to_TCGA_HLA_alleles()
if arg == "mhcflurry" and value == True:
# Perform MHCflurry binding affinity prediction. Add_flanks and alleles will be used in pipeline.
cropping_flanks_pipeline = CroppingFlanksPipeline(pathing)
mhcflurry_pipeline = MHCflurryPipeline(pathing)
flank_length = min(args.peptide_lengths) - 1
sequences, flanks = cropping_flanks_pipeline.cropping_flanks_pipeline_run(
flank_length
)
if args.TCGA_alleles:
alleles = pipeline_data.transcripts_alleles
else:
alleles = args.custom_alleles
mhcflurry_pipeline.run_mhcflurry_pipeline(
sequences, flanks, args.peptide_lengths, args.add_flanks, alleles
)
if arg == "store_db" and value == True:
# Add data to PostgreSQL database.
database_operations = DatabaseOperations(
args.db_username,
args.db_password,
args.db_host,
args.db_name,
pipeline_data,
pathing,
)
database_operations.run_add_data_pipeline()
if __name__ == "__main__":
main()
print_lizard()