-
Notifications
You must be signed in to change notification settings - Fork 1
/
ise_stats_with_nis.py
83 lines (59 loc) · 2.24 KB
/
ise_stats_with_nis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 9 18:11:16 2023
@author: ahmed
"""
#########################################
import pandas as pd
import glob
import os
import argparse
import warnings
#########################################
my_parser = argparse.ArgumentParser(description='Welcome!')
#print("example: $ python bakta_stat.py -i ./txt")
my_parser.add_argument('-i','--input_dir',
action='store',
metavar='input_dir',
type=str,
help="input_dir")
my_parser.add_argument('-p','--prefix',
action='store',
metavar='prefix',
type=str,
help="prefix")
###########################################
# Execute the parse_args() method
args = my_parser.parse_args()
###########################################
path = args.input_dir
file_name = args.prefix
warnings.filterwarnings("ignore")
############################################
#path = "/home/ahmed/ISEScan_stats/summ_folder/"
#file_name ="hello"
############################################
all_files = glob.glob(os.path.join(path, "*.sum"))
df = [] # pd.concat takes a list of dataframes as an agrument
for csv in all_files:
frame = pd.read_csv(csv,sep='\s+')
frame['filename'] = os.path.basename(csv)
df.append(frame)
df = pd.concat(df, ignore_index=True)
total = df['seqid'] == 'total'
df = df[~total]
df1 = df[['seqid','family']]
df1["Isolate"] = df["filename"].str.removesuffix(".fna.sum")
df1.columns = ["Elements","copy","Isolate"]
df9 = df1.groupby(by=["Isolate","Elements"])['copy'].sum().to_frame().reset_index()
df10=df9[['Elements','copy']]
df10 = df1.groupby(by=["Elements"])['copy'].sum().to_frame().reset_index()
df10 = df10.sort_values('copy', ascending=False)
#df10.to_excel("%s_copy_frequency.xlsx"%(file_name),index=False)
df10.to_csv("%s_frequency.csv"%(file_name),index=False,sep=',')
#df2.to_excel("frequency.xlsx",index=False)
df11 = pd.crosstab(index=df9['Isolate'],columns=df9["Elements"],values=df9['copy'],aggfunc=sum)
df11 = df11.fillna(0)
df11.to_csv("%s_heatmap.csv"%(file_name),index=True,sep=',')
#df11.to_excel("%s_heatmap_copy.csv"%(file_name),index=True)