-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dc3c172
commit 5ca857b
Showing
92 changed files
with
9,998 additions
and
2,245 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,7 @@ IRIS.egg-info/ | |
build/ | ||
dist/ | ||
__pycache__/ | ||
conda_env_2/ | ||
conda_env_3/ | ||
/.snakemake/ | ||
/readline/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
import numpy as np | ||
import sys | ||
import os, glob, pyBigWig, argparse | ||
from scipy import stats | ||
import statsmodels.stats.weightstats as smw | ||
from . import config | ||
import warnings | ||
warnings.filterwarnings("ignore") | ||
|
||
#python retreive_SJ_info.py test_sj.para SE event_list_test.txt sj_info | ||
def read_SJMatrix_index(fn,outdir): | ||
index = {} | ||
for line in open(outdir+'/'+fn.split('/')[-1]+'.idx', 'r'): | ||
ele = line.strip().split() | ||
index[ele[0]] = int(ele[1]) | ||
return index | ||
|
||
def fetch_SJMatrix(eid, fn, delim, index, head_only): | ||
with open(fn, 'r') as f: | ||
if head_only: | ||
ele = f.readline().strip().split(delim) | ||
retrieved_text = np.asarray([ x.split('.aln')[0] for x in ele ]) | ||
else: | ||
f.seek(index[eid], 0) | ||
retrieved_text = np.asarray(f.readline().strip().split(delim)) | ||
return retrieved_text | ||
|
||
|
||
def loadParametersRow(filter_para, panel_list): | ||
filter_cutoffs='' | ||
if filter_para.strip()!='': | ||
filter_cutoffs = map(float,filter_para.strip().split(' ')[0].split(',')) | ||
filter_panel_list = filter_para.strip().split(' ')[1].split(',') | ||
panel_list+=filter_panel_list | ||
else: | ||
filter_panel_list =[] | ||
return filter_cutoffs, filter_panel_list, panel_list | ||
|
||
|
||
|
||
def readEventRow(row, header_line): | ||
if header_line=='' or header_line==False: | ||
rs=row.strip().split('\t') | ||
return rs | ||
else: | ||
rs=row.strip().split('\t') | ||
return dict(zip(header_line, rs)) | ||
|
||
def convertAS2SJevent(line_dict, splicing_event_type):#only for this script. Diff | ||
as_event=line_dict['as_event'] | ||
event_s=as_event.split(':') | ||
if splicing_event_type=='SE': | ||
event_row_list=[event_s[2]+':'+str(int(event_s[6])+1)+':'+event_s[4], event_s[2]+':'+str(int(event_s[5])+1)+':'+event_s[7], event_s[2]+':'+str(int(event_s[6])+1)+':'+event_s[7]] | ||
|
||
elif splicing_event_type=='A5SS':# Only use one junction for inc. Need to improve by updating db later | ||
event_row_list=[line_dict['chr']+':'+str(int(event_s[5])+1)+':'+event_s[8], line_dict['chr']+':'+str(int(event_s[7])+1)+':'+event_s[8]] | ||
|
||
elif splicing_event_type=='A3SS': # Only use one junction for inc. Need to improve by updating db later | ||
event_row_list=[line_dict['chr']+':'+str(int(event_s[9])+1)+':'+event_s[4],line_dict['chr']+':'+str(int(event_s[9])+1)+':'+event_s[6]] | ||
|
||
else: | ||
exit('[Error] Splicing event type not supported. Exiting.') | ||
return event_row_list, as_event | ||
|
||
def summarizeSJ2ASevent(event_list_fin, splicing_event_type, sig_junction, outdir, out_prefix): | ||
fout_summary_fname=outdir+'/SJ.'+out_prefix+'.'+splicing_event_type+'.summary_by_sig_event.txt' | ||
fout_summary=open(fout_summary_fname,'w') | ||
for event_idx, event_row in enumerate(open(event_list_fin)): | ||
if event_idx==0: | ||
header_list=readEventRow(event_row,'') | ||
continue | ||
line_dict=readEventRow(event_row, header_list) | ||
event_row_list, as_event=convertAS2SJevent(line_dict, splicing_event_type) | ||
as_event_result=[] | ||
as_event_result_list=[] | ||
for k in event_row_list: | ||
if k not in sig_junction: | ||
as_event_result.append(False) | ||
else: | ||
as_event_result.append(True) | ||
as_event_result_list.append(k) | ||
if as_event_result[0]==as_event_result[1]==as_event_result[2]==True: | ||
fout_summary.write(as_event+'\tAll junctions\t'+';'.join(as_event_result_list)+'\n') | ||
elif as_event_result[0]==as_event_result[1]==as_event_result[2]==False: | ||
continue | ||
else: | ||
if as_event_result[0]==as_event_result[1]!=as_event_result[2]: | ||
fout_summary.write(as_event+'\tOnly alternative junctions\t'+';'.join(as_event_result_list)+'\n') | ||
else: | ||
fout_summary.write(as_event+'\tOther combination\t'+';'.join(as_event_result_list)+'\n') | ||
fout_summary.close() | ||
return fout_summary_fname | ||
|
||
|
||
|
||
|
||
def main(args): | ||
###Loading Parameters#### | ||
para_fin=args.parameter_file | ||
splicing_event_type=args.splicing_event_type | ||
if splicing_event_type!='SE': | ||
exit('[Error] Invalid AS event type.') | ||
event_list_fin=args.screening_result_event_list | ||
outdir=args.outdir.rstrip('/') | ||
|
||
os.system('mkdir -p '+outdir) | ||
fetching_sj_col=1 | ||
out_prefix,db_dir,filter1_para,filter2_para,filter3_para=[l.strip() for l in open(para_fin)][:5] | ||
db_dir=db_dir.rstrip('/') | ||
if os.path.isdir(db_dir+'_sjc'): #automatically use db_sjc if in the same dir. Otherwise, use the user input db_dir | ||
db_dir=db_dir+'_sjc' | ||
panel_list=[out_prefix] | ||
|
||
filter1_cutoffs, filter1_panel_list, panel_list = loadParametersRow(filter1_para, panel_list) | ||
filter2_cutoffs, filter2_panel_list, panel_list = loadParametersRow(filter2_para, panel_list) | ||
filter3_cutoffs, filter3_panel_list, panel_list = loadParametersRow(filter3_para, panel_list) | ||
tumor_dict=dict.fromkeys(filter2_panel_list,'') | ||
tumor_dict[out_prefix]='' | ||
pvalue_cutoff_normal=''; pvalue_cutoff_tumor='' | ||
filter1_group_cutoff=''; filter2_group_cutoff=''; filter3_group_cutoff=''; | ||
if filter1_cutoffs!='': | ||
pvalue_cutoff_normal,filter1_group_cutoff=filter1_cutoffs[3:] | ||
if filter2_cutoffs!='': | ||
pvalue_cutoff_tumor,filter2_group_cutoff=filter2_cutoffs[3:] | ||
if filter3_cutoffs!='': | ||
pvalue_cutoff_normal,filter3_group_cutoff=filter3_cutoffs[3:] | ||
|
||
inc_read_cov_cutoff=int(args.inc_read_cov_cutoff)#2 | ||
event_read_cov_cutoff=int(args.event_read_cov_cutoff)#10 | ||
|
||
##Load IRIS reference panels to 'fin_list', 'index' | ||
index={} | ||
fin_list={} | ||
for group_name in panel_list: | ||
fin_list[group_name]=db_dir+'/'+group_name+'/sjc_matrix/SJ_count.'+group_name+'.txt' | ||
for group in fin_list.keys(): | ||
if not os.path.isfile(fin_list[group]+'.idx'): | ||
exit('[Error] Need to index '+fin_list[group]) | ||
index[group]=read_SJMatrix_index(fin_list[group],'/'.join(fin_list[group].split('/')[:-1])) | ||
|
||
print('[INFO] Done loading index '+' '.join(panel_list)) | ||
tot=config.file_len(event_list_fin)-1 | ||
fout_ijc=open(outdir+'/'+event_list_fin.split('/')[-1]+'.ijc_info.txt','w') | ||
header_line=[] | ||
sample_size={} | ||
|
||
header_line=['ijc_ratio', 'mean_ijc_by_group', 'percent_sample_imbalanced', 'sample_imbalanced_by_group'] | ||
header_list=[] | ||
print('[INFO] Retrieving inclusion junction info') | ||
for event_idx, event_row in enumerate(open(event_list_fin)): | ||
config.update_progress(event_idx/(0.0+tot)) | ||
if event_idx==0: | ||
header_list=readEventRow(event_row,'') | ||
fout_ijc.write(event_row.strip()+'\t'+'\t'.join(header_line)+'\n') | ||
continue | ||
line_dict=readEventRow(event_row, header_list) | ||
event_row_list, as_event=convertAS2SJevent(line_dict, splicing_event_type) | ||
|
||
forms={} | ||
if splicing_event_type=='SE': | ||
inc1, inc2, skp= event_row_list | ||
forms={'inc1':inc1,'inc2':inc2,'skp':skp} | ||
else: | ||
exit('[Error] Invalid AS event type.') | ||
#Initiate psi matrix by each row to 'sj' | ||
|
||
sj={} | ||
for form in forms: | ||
k=forms[form] | ||
sj[form]={} | ||
for group in panel_list: | ||
random_key=index[group].keys()[0] | ||
sample_names=map(str,fetch_SJMatrix(random_key,fin_list[group],'\t',index[group],True)[fetching_sj_col:]) | ||
sample_size[group]=len(sample_names) | ||
if k in index[group]: | ||
sj[form][group]=map(int,fetch_SJMatrix(k,fin_list[group],'\t',index[group], False)[fetching_sj_col:]) | ||
else: | ||
sj[form][group]=[0]*sample_size[group] | ||
|
||
imbalance={} | ||
total_sample={} | ||
inc_ratio={} | ||
i1_list={} | ||
i2_list={} | ||
for group in panel_list: | ||
i1=map(int,sj['inc1'][group]) | ||
i2=map(int,sj['inc2'][group]) | ||
s=map(int,sj['skp'][group]) | ||
i1_list[group]=[] | ||
i2_list[group]=[] | ||
imbalance[group]=0 | ||
total_sample[group]=0 | ||
for pidx in range(0, len(sj['inc1'][group])): | ||
if i1[pidx]+i2[pidx]<inc_read_cov_cutoff or i1[pidx]+i2[pidx]+s[pidx]<event_read_cov_cutoff: | ||
continue | ||
total_sample[group]+=1 | ||
ratio=i1[pidx]/(i2[pidx]+0.01) | ||
if ratio>2 or ratio<0.5: | ||
imbalance[group]+=1 | ||
i1_list[group].append(i1[pidx]) | ||
i2_list[group].append(i2[pidx]) | ||
|
||
i_by_group='|'.join([str(round(np.mean(i1_list[group]),1))+','+str(round(np.mean(i2_list[group]),1)) for group in panel_list]) | ||
i1_sum=sum(sum(i1_list[g]) for g in panel_list) | ||
i2_sum=sum(sum(i2_list[g]) for g in panel_list) | ||
ratio=round(i1_sum/(i2_sum+0.01),3) | ||
|
||
imbalance_by_group='|'.join([str(imbalance[group])+','+str(total_sample[group]) for group in panel_list]) | ||
imb_count=sum(imbalance[group] for group in panel_list) | ||
tot_count=sum(total_sample[group] for group in panel_list) | ||
imb_perc='-' | ||
if tot_count!=0: | ||
imb_perc=round(imb_count/(tot_count+0.0),3) | ||
fout_ijc.write(event_row.rstrip()+'\t'+'\t'.join([as_event,str(ratio), i_by_group, str(imb_perc),imbalance_by_group])+'\n') | ||
|
||
fout_ijc.close() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import sys, glob, os | ||
from . import config | ||
#append annotation to all screening result and epitope result in a specified screening folder | ||
def loadPTsummary(fin): | ||
PT_event={} | ||
for l in open(fin): | ||
ls=l.strip().split('\t') | ||
PT_event[ls[0]]=ls[1]+'|'+ls[2]+'|'+ls[3]+'|'+ls[4] | ||
return PT_event | ||
|
||
|
||
def annotateRAbyPTEvent(fin, event_col, CPM_event, fout_fname): | ||
fout=open(fout_fname,'w') | ||
for n,l in enumerate(open(fin)): | ||
if n==0: | ||
annotation='cpm_test_summary' | ||
fout.write(l.strip()+'\t'+annotation+'\n') | ||
continue | ||
ls=l.strip().split('\t') | ||
annotation='-' | ||
if CPM_event.has_key(ls[event_col]): | ||
annotation=CPM_event[ls[event_col]] | ||
fout.write(l.strip()+'\t'+annotation+'\n') | ||
fout.close() | ||
|
||
def main(args): | ||
CPMsummary=args.cpm_summary | ||
CPM_event=loadPTsummary(CPMsummary) | ||
print('[INFO] Number of event in CPM screen summary:'+str(len(CPM_event))) | ||
splicing_event_type=args.splicing_event_type | ||
screening_result_dir=args.outdir.rstrip('/') | ||
|
||
epitope_file_junction_file_list= glob.glob(screening_result_dir+'/'+splicing_event_type+'.*/epitope_summary.junction-based.txt') | ||
epitope_file_peptide_file_list= glob.glob(screening_result_dir+'/'+splicing_event_type+'.*/epitope_summary.peptide-based.txt') | ||
extracellular_as_file_list= glob.glob(screening_result_dir+'/*'+splicing_event_type+'.*.ExtraCellularAS.txt') | ||
screening_file_list=glob.glob(screening_result_dir+'/*.'+splicing_event_type+'.tier1.txt')+glob.glob(screening_result_dir+'/*.'+splicing_event_type+'.tier2tier3.txt') | ||
|
||
|
||
for fin_fname in screening_file_list: | ||
print '[INFO] Integrating CPM test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, CPM_event, fin_fname+'.integratedCPM.txt') | ||
|
||
for fin_fname in extracellular_as_file_list: | ||
print '[INFO] Integrating CPM test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, CPM_event, fin_fname+'.integratedCPM.txt') | ||
|
||
for fin_fname in epitope_file_peptide_file_list: | ||
print '[INFO] Integrating CPM test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 1, CPM_event, fin_fname+'.integratedCPM.txt') | ||
|
||
for fin_fname in epitope_file_junction_file_list: | ||
print '[INFO] Integrating CPM test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, CPM_event, fin_fname+'.integratedCPM.txt') | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import sys, glob, os | ||
from . import config | ||
#append annotation to all screening result and epitope result in a specified screening folder | ||
def loadPTsummary(fin): | ||
PT_event={} | ||
for l in open(fin): | ||
ls=l.strip().split('\t') | ||
PT_event[ls[0]]=ls[1]+'|'+ls[2] | ||
return PT_event | ||
|
||
def loadIJCsummary(fin): | ||
IJC_info={} | ||
for i,l in enumerate(open(fin)): | ||
if i==0: | ||
continue | ||
ls=l.strip().split('\t') | ||
IJC_info[ls[0]]=ls[-4]+'\t'+ls[-3]+'\t'+ls[-2]+'\t'+ls[-1] | ||
return IJC_info | ||
|
||
def annotateRAbyPTEvent(fin, event_col, PT_event, fout_fname, IJC_info): | ||
fout=open(fout_fname,'w') | ||
for n,l in enumerate(open(fin)): | ||
if n==0: | ||
annotation='sjc_test_summary' | ||
if IJC_info!={}: | ||
annotation+='\t'+'\t'.join(['ijc_ratio', 'mean_ijc_by_group', 'percent_sample_imbalanced', 'sample_imbalanced_by_group']) | ||
fout.write(l.strip()+'\t'+annotation+'\n') | ||
continue | ||
ls=l.strip().split('\t') | ||
annotation='-' | ||
if PT_event.has_key(ls[event_col]): | ||
annotation=PT_event[ls[event_col]] | ||
if IJC_info!={}: | ||
if IJC_info.has_key(ls[event_col]): | ||
annotation+='\t'+IJC_info[ls[event_col]] | ||
else: | ||
annotation+='\t'+'\t'.join(['-']*4) | ||
fout.write(l.strip()+'\t'+annotation+'\n') | ||
fout.close() | ||
|
||
def main(args): | ||
PTsummary=args.sjc_summary | ||
PT_event=loadPTsummary(PTsummary) | ||
print('[INFO] Number of event in SJ count screen summary:'+str(len(PT_event))) | ||
splicing_event_type=args.splicing_event_type | ||
screening_result_dir=args.outdir.rstrip('/') | ||
|
||
epitope_file_junction_file_list= glob.glob(screening_result_dir+'/'+splicing_event_type+'.*/epitope_summary.junction-based.txt') | ||
epitope_file_peptide_file_list= glob.glob(screening_result_dir+'/'+splicing_event_type+'.*/epitope_summary.peptide-based.txt') | ||
extracellular_as_file_list= glob.glob(screening_result_dir+'/*'+splicing_event_type+'.*.ExtraCellularAS.txt') | ||
screening_file_list=glob.glob(screening_result_dir+'/*.'+splicing_event_type+'.tier1.txt')+glob.glob(screening_result_dir+'/*.'+splicing_event_type+'.tier2tier3.txt') | ||
|
||
#optional, if ijc imbalance | ||
add_ijc_info=args.add_ijc_info | ||
use_existing_result=args.use_existing_result#use existing ijc result | ||
para_file=args.parameter_file | ||
event_list_file=args.screening_result_event_list | ||
inc_read_cov_cutoff=args.inc_read_cov_cutoff#2 | ||
event_read_cov_cutoff=args.event_read_cov_cutoff#10 | ||
IJC_info={} | ||
if add_ijc_info or use_existing_result: | ||
if para_file=='' or event_list_file=='': | ||
exit('[Error] Specify parameters and event list file for retrieving inclusion junction information.') | ||
|
||
if add_ijc_info and use_existing_result==False: | ||
cmd='IRIS annotate_ijc -p '+para_file+' --splicing-event-type '+splicing_event_type+' -e '+event_list_file+' -o '+screening_result_dir+' --inc-read-cov-cutoff '+str(inc_read_cov_cutoff)+' --event-read-cov-cutoff '+str(event_read_cov_cutoff) | ||
print('[INFO] Annotating inclusion junction info to '+event_list_file) | ||
print(cmd) | ||
os.system(cmd) | ||
if use_existing_result or add_ijc_info: | ||
file_path=screening_result_dir+'/'+event_list_file.split('/')[-1]+'.ijc_info.txt' | ||
if os.path.exists(file_path)==False: | ||
exit('[Error] Result IJC annotation file not found in path '+file_path) | ||
print('[INFO] Loading inclusion junction info from '+file_path) | ||
IJC_info=loadIJCsummary(file_path) | ||
|
||
for fin_fname in screening_file_list: | ||
print '[INFO] Integrating SJC test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, PT_event, fin_fname+'.integratedSJC.txt',IJC_info) | ||
|
||
for fin_fname in extracellular_as_file_list: | ||
print '[INFO] Integrating SJC test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, PT_event, fin_fname+'.integratedSJC.txt',IJC_info) | ||
|
||
for fin_fname in epitope_file_peptide_file_list: | ||
print '[INFO] Integrating SJC test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 1, PT_event, fin_fname+'.integratedSJC.txt',IJC_info) | ||
|
||
for fin_fname in epitope_file_junction_file_list: | ||
print '[INFO] Integrating SJC test result to', fin_fname | ||
annotateRAbyPTEvent(fin_fname, 0, PT_event, fin_fname+'.integratedSJC.txt',IJC_info) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.