Skip to content

Commit

Permalink
Merge pull request #8 from VEuPathDB/complex_edit_fix
Browse files Browse the repository at this point in the history
adding solution for cpmplex split-merge.
  • Loading branch information
mbc32 authored Mar 29, 2021
2 parents 498967f + 140b575 commit 61531a4
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 15 deletions.
15 changes: 10 additions & 5 deletions allocation_service/annotation_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,16 @@ def setup(self, index):
def _create_genes(self, event, index):
genes = list()
for gene_model in event:
gene = ProteinCodingGene(gene_model, index)
if gene_model['id'] not in index:
gene = ProteinCodingGene(gene_model, index)
if gene.source != 'reference':
self.new_gene_count += 1
self.created_genes.append(gene)
else:
gene = index[gene_model['id']]
genes.append(gene)
self.gene_event_index[gene.source_id] = genes

if gene.source != 'reference':
self.new_gene_count += 1
self.created_genes.append(gene)
return genes

def _allocate_to_gene(self, osid_id, gene_id):
Expand All @@ -98,10 +101,12 @@ def update_ancestors(self):
ancestors = list()
for gene in event:
if gene.source == 'reference':
gene.known_events.add(self.event_type)
ancestors.append(gene)
for gene in event:
if gene.source != 'reference':
gene.ancestors = ancestors
for ancestor_gene in ancestors:
gene.ancestors.add(ancestor_gene)


class CreateGeneModelEvent(AnnotationEvent):
Expand Down
12 changes: 8 additions & 4 deletions allocation_service/event_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,18 @@ def write_event_file(self):
for annotation_event_type in self.event_collection.annotation_event_list:
event_type = annotation_event_type.event_type
for event in annotation_event_type.event_list:
flag = event_type + '_written_to_history_file'
for gene in event:
if gene.source_id != 'reference':
if gene.source != 'reference' and flag not in gene.status_flags:
gene.status_flags.add(flag)
if len(gene.ancestors) == 0 and gene.allocated_id:
self.file_handle.write(gene.allocated_id
+ "\t" + event_type + "\t" + '' + "\n")
else:
for ancestor in gene.ancestors:
self.file_handle.write(gene.allocated_id
+ "\t" + event_type + "\t" + ancestor.source_id + "\n")
if event_type in ancestor.known_events:
self.file_handle.write(gene.allocated_id
+ "\t" + event_type + "\t" + ancestor.source_id + "\n")

self.file_handle.close()

Expand All @@ -121,8 +124,9 @@ def __init__(self, session_database, application_id, production_database_id, com

for event in annotation_event_type.event_list:
for gene in event:
if gene.source_id != 'reference' and gene.allocated_id:
if gene.allocated_id and "written_to_session_database" not in gene.status_flags:
self.add_feature(gene, 'gene')
gene.status_flags.add("written_to_session_database")
for mrna in gene.mrnas:
self.add_feature(mrna, 'transcript')

Expand Down
4 changes: 3 additions & 1 deletion allocation_service/genomic_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def __init__(self, model, index):
self.source_id = str()
self.allocated_id = str()
self.osid_id = int
self.ancestors = list()
self.ancestors = set()
self.status_flags = set()
self.known_events = set()
self.setup_model(model)
self._register_my_self(index)

Expand Down
56 changes: 51 additions & 5 deletions allocation_service/tests/test_allocation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,28 @@ def get_organism_id(organism_name):
@staticmethod
def get_gene_id(organism_id, generate_genes):
_ = organism_id

if generate_genes == 2:
if generate_genes == 3:
return 1, [{"geneId": "ABC00015", "transcripts": [], "proteins": []},
{"geneId": "ABC00016", "transcripts": [], "proteins": []},
{"geneId": "ABC00017", "transcripts": [], "proteins": []}]
elif generate_genes == 2:
return 1, [{"geneId": "ABC00015", "transcripts": [], "proteins": []},
{"geneId": "ABC00016", "transcripts": [], "proteins": []}]
{"geneId": "ABC00016", "transcripts": [], "proteins": []}]
elif generate_genes == 1:
return 1, [{"geneId": "ABC00015", "transcripts": [], "proteins": []}]
else:
print("Error " + str(generate_genes))

@staticmethod
def get_transcripts(id_set_id, transcript_patch):
_ = id_set_id
if len(transcript_patch) == 2:
if len(transcript_patch) == 3:
return [{"geneId": "ABC00015", "transcripts": ['ABC00015_R001'], "proteins": ['ABC00015_P001']},
{"geneId": "ABC00016", "transcripts": ['ABC00016_R001'], "proteins": ['ABC00016_P001']},
{"geneId": "ABC00017", "transcripts": ['ABC00017_R001'], "proteins": ['ABC00017_P001']}]
elif len(transcript_patch) == 2:
return [{"geneId": "ABC00015", "transcripts": ['ABC00015_R001'], "proteins": ['ABC00015_P001']},
{"geneId": "ABC00016", "transcripts": ['ABC00016_R001'], "proteins": ['ABC00016_P001']}]
{"geneId": "ABC00016", "transcripts": ['ABC00016_R001'], "proteins": ['ABC00016_P001']}]
elif len(transcript_patch) == 1:
return [{"geneId": "ABC00015", "transcripts": ['ABC00015_R001'], "proteins": ['ABC00015_P001']}]

Expand All @@ -54,6 +63,39 @@ def get_annotations_events(event_type):
merge_1 = [ref_model1, ref_model2, merge_model1]
events.append(merge_1)

return events
elif event_type == 'complex_split':
events = list()
ref_model_s1 = {"source": "reference", "id": "AARA004952", "children": [{"id": "AARA004952_R0001", "version": 2,
"children": [{"id": "AARA004952_P0001", "version": 2}]}]}
split_model_s1a = {"source": "apollo", "id": "dd6f006e-613d-4507-84ec-d00e2097cd88", "children": [{"id": "DHEYODH-DHYERS-dd6f006e", "version": 2,
"children": [{"id": None, "version": 2}]}]}
split_model_s1b = {"source": "apollo", "id": "5d6f2e78-566e-4a3b-8534-d3422b77734d", "children": [{"id": "DHEYODH-DHYERS-5d6f2e78", "version": 2,
"children": [{"id": None, "version": 2}]}]}
complex_split1 = [ref_model_s1, split_model_s1a, split_model_s1b]
events.append(complex_split1)

ref_model_s2 = {"source": "reference", "id": "AARA004953", "children": [{"id": "AARA004953_R0001", "version": 2,
"children": [{"id": "AARA004953_P0001", "version": 2}]}]}
split_model_s2a = {"source": "apollo", "id": "fd03de20-5f52-49a7-88b8-6f79443ff90b", "children": [{"id": "DHEYODH-DHYERS-fd03de20", "version": 2,
"children": [{"id": None, "version": 2}]}]}
split_model_s2b = {"source": "apollo", "id": "5d6f2e78-566e-4a3b-8534-d3422b77734d", "children": [{"id": "DHEYODH-DHYERS-5d6f2e78", "version": 2,
"children": [{"id": None, "version": 2}]}]}
complex_split2 = [ref_model_s2, split_model_s2a, split_model_s2b]
events.append(complex_split2)

return events

elif event_type == 'complex_merge':
events = list()
ref_model_m1 = {"source": "reference", "id": "AARA004952", "children": [{"id": "AARA004952_R0001", "version": 2,
"children": [{"id": "AARA004952_P0001", "version": 2}]}]}
ref_model_m2 = {"source": "reference", "id": "AARA004953", "children": [{"id": "AARA004953_R0001", "version": 2,
"children": [{"id": "AARA004953_P0001", "version": 2}]}]}
merge_model_m12 = {"source": "apollo", "id": "5d6f2e78-566e-4a3b-8534-d3422b77734d", "children": [{"id": "DHEYODH-DHYERS-5d6f2e78", "version": 2,
"children": [{"id": None, "version": 2}]}]}
complex_merge = [ref_model_m1, ref_model_m2, merge_model_m12]
events.append(complex_merge)
return events
else:
return False
Expand Down Expand Up @@ -81,6 +123,10 @@ def test_create_event_collection(self):
self.assertEqual('ABC00015_R001', event_collection.get_allocated_id('DHEYODH-DHYERS'))
self.assertEqual('ABC00015_P001', event_collection.get_allocated_id('DHEYODH-DHYERS-CDS'))

event_collection = EventCollection('test', event_connection, stable_id_service)
event_collection.event_types = {'complex_split', 'complex_merge'}
event_collection.create()


class EventFileTestCase(unittest.TestCase):

Expand Down

0 comments on commit 61531a4

Please sign in to comment.