-
Notifications
You must be signed in to change notification settings - Fork 0
/
fill_database.py
71 lines (58 loc) · 2.44 KB
/
fill_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import codecs
#from prokarotic_declarative import Species, Base, Gene, Bacteria_Gene, Primer
from prokarotic_declarative import Species, Base
engine = create_engine('sqlite:///prokarotic.db')
engine.raw_connection().connection.text_factory = str
# Bind the engine to the metadata of the Base class so that the
# declaratives can be accessed through a DBSession instance
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
# A DBSession() instance establishes all conversations with the database
# and represents a "staging zone" for all the objects loaded into the
# database session object. Any change made against the objects in the
# session won't be persisted into the database until you call
# session.commit(). If you're not happy about the changes, you can
# revert all of them back to the last commit by calling
# session.rollback()
session = DBSession()
def is_na(key, value):
# Handles na issues as well as conversion to int/float
if value == 'na' or value == 'NA' or value == 'Na': #value = value.ascii_lowercase()
value = 0
elif value != 'na' and key != 'GC': #this will need to be update in the future
value = int(value.replace(',', ''))
elif value != 'na' and key == 'GC':
value = float(value.replace(',', ''))
return value
def insert_values(fileLine):
# Insert a species of bacteria (E. coli) in the species table
sLine = fileLine.split('\t')
name_genus = 'E. coli'
Serotype = sLine[0]
Strain = sLine[1]
Accession = sLine[2]
G_size = is_na('G_size', sLine[3])
count_plasmid = is_na('plasmid_count', sLine[4])
GC = is_na('GC', sLine[5])
count_gene = is_na('gene_count', sLine[6])
sTechnology = sLine[7]
additional_info = sLine[8]
new_species = Species(genus_name=name_genus, serotype=Serotype, genome_size=G_size, strain=Strain, accession=Accession, GC_percentage=GC, gene_count=count_gene, plasmid_count=count_plasmid, sequencing_technology=sTechnology, description=additional_info)
session.add(new_species)
session.commit()
# Insert an Address in the gene table
#new_gene = Gene(gene_name='stx2', species=new_species)
#session.add(new_gene)
#session.commit()
count = 2
with codecs.open('mimiDb.txt', 'r', encoding='utf8') as inputFile:
#with open('mimiDb.txt', 'r') as inputFile:
firstLine = inputFile.readline()
lines = inputFile.readlines()
for line in lines:
print count
line = line.replace('\n', '')
insert_values(line)
count+=1