Skip to content

Commit

Permalink
fixing cohorts
Browse files Browse the repository at this point in the history
  • Loading branch information
costero-e committed Dec 16, 2024
1 parent bf040f0 commit 10a2609
Show file tree
Hide file tree
Showing 7 changed files with 407 additions and 37 deletions.
44 changes: 33 additions & 11 deletions cohorts_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def generate(dict_properties, list_of_headers):
for propk, propv in dict_of_properties.items():
if propk == new_item:
propv = re.sub(r'\s', '', propv)
respropv = json.loads(propv)
respropv = json.loads(propv)
item_dict[ki][ki1]=respropv
else:
else:
for ki2, vi2 in vi1.items():
new_item = ""
new_item = key + "|" + ki + "|" + ki1 + "|" + ki2
Expand All @@ -132,11 +132,12 @@ def generate(dict_properties, list_of_headers):
new_item = key + "|" + ki + "|" + ki1
for propk, propv in dict_of_properties.items():
if propk == new_item:
#print(propk)
if propk.endswith('availability'):
print(propv)
vi_dict[ki1]=bool(propv)
item_dict[ki]=vi_dict
elif propk.endswith('availabilityCount'):
print(propv)
vi_dict[ki1]=int(propv)
item_dict[ki]=vi_dict
else:
Expand Down Expand Up @@ -251,11 +252,14 @@ def generate(dict_properties, list_of_headers):
for propk, propv in dict_of_properties.items():
if propk == new_item:
try:
value_dict[kd][0][kd1]={}
value_dict[kd][0][kd1][kd2]=propv
except Exception:
value_dict[kd][kd1]={}
value_dict[kd][kd1][kd2]=propv
try:
value_dict[kd][0][kd1]={}
value_dict[kd][0][kd1][kd2]=propv
except Exception:
value_dict[kd][kd1]={}
value_dict[kd][kd1][kd2]=propv
elif isinstance(vd1, list):
arrayofkdvs=[]
new_item = ""
Expand Down Expand Up @@ -285,7 +289,14 @@ def generate(dict_properties, list_of_headers):
else:
try:
jsonedpropv=json.loads(propv)
dicty[kd1]=[jsonedpropv]
if kd1 == 'ageOfOnset':
dicty[kd1]=jsonedpropv
elif kd1 == 'onset':
dicty[kd1]=jsonedpropv
elif kd1 == 'resolution':
dicty[kd1]=jsonedpropv
else:
dicty[kd1]=[jsonedpropv]
if dicty not in arrayofkdvs:
arrayofkdvs.append(dicty)
value_dict[kd]=arrayofkdvs
Expand Down Expand Up @@ -329,10 +340,16 @@ def generate(dict_properties, list_of_headers):
if kd not in arrayofkdvs:
value_dict[kd]=[]
try:
if dicty not in value_dict[kd]:
value_dict[kd].append(dicty)
value_dict[kd].append(dicty)
except Exception:
value_dict[kd]=dicty
if key == 'inclusionCriteria' or key == 'exclusionCriteria':
try:
value_dict[kd].append(dicty)
except Exception:
value_dict[kd]=[]
value_dict[kd].append(dicty)
else:
value_dict[kd]=dicty


if value_dict != {}:
Expand Down Expand Up @@ -376,7 +393,10 @@ def generate(dict_properties, list_of_headers):
for propk, propv in dict_of_properties.items():
if propk == new_item:
value_dict[kd]=propv
definitivedict[key]=value_dict
if key == 'cohortDataTypes':
definitivedict[key]=[value_dict]
else:
definitivedict[key]=value_dict
if value == {}:
new_item = ""
new_item = key
Expand All @@ -397,6 +417,8 @@ def generate(dict_properties, list_of_headers):
except Exception:
propv = propv
definitivedict[key]=propv

print(definitivedict)
Cohorts(**definitivedict)
definitivedict["datasetId"]=conf.datasetId
total_dict.append(definitivedict)
Expand Down
4 changes: 2 additions & 2 deletions csv/examples/cohorts.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
cohortDataTypes|id,cohortDataTypes|label,cohortDesign|id,cohortDesign|label,cohortSize,cohortType,collectionEvents|eventAgeRange|availability,collectionEvents|eventAgeRange|availabilityCount,collectionEvents|eventAgeRange|distribution,collectionEvents|eventDataTypes|availability,collectionEvents|eventDataTypes|availabilityCount,collectionEvents|eventDataTypes|distribution,collectionEvents|eventDiseases|availability,collectionEvents|eventDiseases|availabilityCount,collectionEvents|eventDiseases|distribution,collectionEvents|eventEthnicities|availability,collectionEvents|eventEthnicities|availabilityCount,collectionEvents|eventEthnicities|distribution,collectionEvents|eventGenders|availability,collectionEvents|eventGenders|availabilityCount,collectionEvents|eventGenders|distribution,collectionEvents|eventLocations|availability,collectionEvents|eventLocations|availabilityCount,collectionEvents|eventLocations|distribution,collectionEvents|eventPhenotypes|availability,collectionEvents|eventPhenotypes|availabilityCount,collectionEvents|eventPhenotypes|distribution,collectionEvents|eventTimeline|availability,collectionEvents|eventTimeline|availabilityCount,collectionEvents|eventTimeline|distribution,exclusionCriteria|ageRange|end|iso8601duration,exclusionCriteria|ageRange|start|iso8601duration,exclusionCriteria|diseaseConditions|ageOfOnset,exclusionCriteria|diseaseConditions|diseaseCode|id,exclusionCriteria|diseaseConditions|diseaseCode|label,exclusionCriteria|diseaseConditions|familyHistory,exclusionCriteria|diseaseConditions|notes,exclusionCriteria|diseaseConditions|severity|id,exclusionCriteria|diseaseConditions|severity|label,exclusionCriteria|diseaseConditions|stage|id,exclusionCriteria|diseaseConditions|stage|label,exclusionCriteria|ethnicities|id,exclusionCriteria|ethnicities|label,exclusionCriteria|genders|id,exclusionCriteria|genders|label,exclusionCriteria|locations|id,exclusionCriteria|locations|label,exclusionCriteria|phenotypicConditions|evidence|evidenceCode,exclusionCriteria|phenotypicConditions|evidence|reference,exclusionCriteria|phenotypicConditions|excluded,exclusionCriteria|phenotypicConditions|featureType|id,exclusionCriteria|phenotypicConditions|featureType|label,exclusionCriteria|phenotypicConditions|modifiers,exclusionCriteria|phenotypicConditions|notes,exclusionCriteria|phenotypicConditions|onset,exclusionCriteria|phenotypicConditions|resolution,exclusionCriteria|phenotypicConditions|severity|id,exclusionCriteria|phenotypicConditions|severity|label,exclusionCriteria|type|availability,exclusionCriteria|type|availabilityCount,id,inclusionCriteria|ageRange|end|iso8601duration,inclusionCriteria|ageRange|start|iso8601duration,inclusionCriteria|diseaseConditions|ageOfOnset,inclusionCriteria|diseaseConditions|diseaseCode|id,inclusionCriteria|diseaseConditions|diseaseCode|label,inclusionCriteria|diseaseConditions|familyHistory,inclusionCriteria|diseaseConditions|notes,inclusionCriteria|diseaseConditions|severity|id,inclusionCriteria|diseaseConditions|severity|label,inclusionCriteria|diseaseConditions|stage|id,inclusionCriteria|diseaseConditions|stage|label,inclusionCriteria|ethnicities|id,inclusionCriteria|ethnicities|label,inclusionCriteria|genders|id,inclusionCriteria|genders|label,inclusionCriteria|locations|id,inclusionCriteria|locations|label,inclusionCriteria|phenotypicConditions|evidence|evidenceCode,inclusionCriteria|phenotypicConditions|evidence|reference,inclusionCriteria|phenotypicConditions|excluded,inclusionCriteria|phenotypicConditions|featureType|id,inclusionCriteria|phenotypicConditions|featureType|label,inclusionCriteria|phenotypicConditions|modifiers,inclusionCriteria|phenotypicConditions|notes,inclusionCriteria|phenotypicConditions|onset,inclusionCriteria|phenotypicConditions|resolution,inclusionCriteria|phenotypicConditions|severity|id,inclusionCriteria|phenotypicConditions|severity|label,inclusionCriteria|type|availability,inclusionCriteria|type|availabilityCount,name
,,,,,study-defined,,,,,,,TRUE,1705,"{""diseases"": {""acute bronchitis"": 121,""agranulocytosis"": 111,""asthma"": 134,""bipolar affective disorder"": 134,""cardiomyopathy"": 133,""dental caries"": 139,""eating disorders"": 134,""fibrosis and cirrhosis of liver"": 132,""gastro-oesophageal reflux disease"": 140,""haemorrhoids"": 127,""influenza due to certain identified influenza virus"": 135,""insulin-dependent diabetes mellitus"": 165,""iron deficiency anaemia"": 142,""multiple sclerosis"": 125,""obesity"": 136,""sarcoidosis"": 136,""schizophrenia"": 138,""thyroiditis"": 141,""varicose veins of lower extremities"": 139}}",TRUE,2287,"{""ethnicities"": {""African"": 119,""Any other Asian background"": 120,""Any other Black background"": 104,""Any other mixed background"": 92,""Any other white background"": 114,""Asian or Asian British"": 125,""Bangladeshi"": 96,""Black or Black British"": 131,""British"": 114,""Caribbean"": 127,""Chinese"": 100,""Indian"": 110,""Irish"": 111,""Mixed"": 127,""Other ethnic group"": 116,""Pakistani"": 115,""White"": 105,""White and Asian"": 114,""White and Black African"": 115,""White and Black Caribbean"": 132}}",TRUE,1597,"{""genders"": {""female"": 1271,""male"": 1233}}",TRUE,1597,"{""locations"": {""England"": 322,""Northern Ireland"": 317,""Republic of Ireland"": 311,""Scotland"": 308,""Wales"": 339}}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CINECA_synthetic_cohort_UK1,P65Y,P18Y,,,,,,,,,,,,NCIT:C16576|NCIT:C20197,female|male,GAZ:00150372,UK,,,,,,,,,,,,,,CINECA synthetic cohort UK1
cohortDataTypes|id,cohortDataTypes|label,cohortDesign|id,cohortDesign|label,cohortSize,cohortType,collectionEvents|eventAgeRange|availability,collectionEvents|eventAgeRange|availabilityCount,collectionEvents|eventAgeRange|distribution,collectionEvents|eventDataTypes|availability,collectionEvents|eventDataTypes|availabilityCount,collectionEvents|eventDataTypes|distribution,collectionEvents|eventDiseases|availability,collectionEvents|eventDiseases|availabilityCount,collectionEvents|eventDiseases|distribution,collectionEvents|eventEthnicities|availability,collectionEvents|eventEthnicities|availabilityCount,collectionEvents|eventEthnicities|distribution,collectionEvents|eventGenders|availability,collectionEvents|eventGenders|availabilityCount,collectionEvents|eventGenders|distribution,collectionEvents|eventLocations|availability,collectionEvents|eventLocations|availabilityCount,collectionEvents|eventLocations|distribution,collectionEvents|eventPhenotypes|availability,collectionEvents|eventPhenotypes|availabilityCount,collectionEvents|eventPhenotypes|distribution,collectionEvents|eventTimeline|availability,collectionEvents|eventTimeline|availabilityCount,collectionEvents|eventTimeline|distribution,exclusionCriteria|ageRange|end|iso8601duration,exclusionCriteria|ageRange|start|iso8601duration,exclusionCriteria|diseaseConditions|ageOfOnset,exclusionCriteria|diseaseConditions|diseaseCode|id,exclusionCriteria|diseaseConditions|diseaseCode|label,exclusionCriteria|diseaseConditions|familyHistory,exclusionCriteria|diseaseConditions|notes,exclusionCriteria|diseaseConditions|severity|id,exclusionCriteria|diseaseConditions|severity|label,exclusionCriteria|diseaseConditions|stage|id,exclusionCriteria|diseaseConditions|stage|label,exclusionCriteria|ethnicities|id,exclusionCriteria|ethnicities|label,exclusionCriteria|genders|id,exclusionCriteria|genders|label,exclusionCriteria|locations|id,exclusionCriteria|locations|label,exclusionCriteria|phenotypicConditions|evidence|evidenceCode,exclusionCriteria|phenotypicConditions|evidence|reference,exclusionCriteria|phenotypicConditions|excluded,exclusionCriteria|phenotypicConditions|featureType|id,exclusionCriteria|phenotypicConditions|featureType|label,exclusionCriteria|phenotypicConditions|modifiers,exclusionCriteria|phenotypicConditions|notes,exclusionCriteria|phenotypicConditions|onset,exclusionCriteria|phenotypicConditions|resolution,exclusionCriteria|phenotypicConditions|severity|id,exclusionCriteria|phenotypicConditions|severity|label,id,inclusionCriteria|ageRange|end|iso8601duration,inclusionCriteria|ageRange|start|iso8601duration,inclusionCriteria|diseaseConditions|ageOfOnset,inclusionCriteria|diseaseConditions|diseaseCode|id,inclusionCriteria|diseaseConditions|diseaseCode|label,inclusionCriteria|diseaseConditions|familyHistory,inclusionCriteria|diseaseConditions|notes,inclusionCriteria|diseaseConditions|severity|id,inclusionCriteria|diseaseConditions|severity|label,inclusionCriteria|diseaseConditions|stage|id,inclusionCriteria|diseaseConditions|stage|label,inclusionCriteria|ethnicities|id,inclusionCriteria|ethnicities|label,inclusionCriteria|genders|id,inclusionCriteria|genders|label,inclusionCriteria|locations|id,inclusionCriteria|locations|label,inclusionCriteria|phenotypicConditions|evidence|evidenceCode,inclusionCriteria|phenotypicConditions|evidence|reference,inclusionCriteria|phenotypicConditions|excluded,inclusionCriteria|phenotypicConditions|featureType|id,inclusionCriteria|phenotypicConditions|featureType|label,inclusionCriteria|phenotypicConditions|modifiers,inclusionCriteria|phenotypicConditions|notes,inclusionCriteria|phenotypicConditions|onset,inclusionCriteria|phenotypicConditions|resolution,inclusionCriteria|phenotypicConditions|severity|id,inclusionCriteria|phenotypicConditions|severity|label,name
,,,,,study-defined,,,,,,,TRUE,1705,"{""diseases"": {""acute bronchitis"": 121,""agranulocytosis"": 111,""asthma"": 134,""bipolar affective disorder"": 134,""cardiomyopathy"": 133,""dental caries"": 139,""eating disorders"": 134,""fibrosis and cirrhosis of liver"": 132,""gastro-oesophageal reflux disease"": 140,""haemorrhoids"": 127,""influenza due to certain identified influenza virus"": 135,""insulin-dependent diabetes mellitus"": 165,""iron deficiency anaemia"": 142,""multiple sclerosis"": 125,""obesity"": 136,""sarcoidosis"": 136,""schizophrenia"": 138,""thyroiditis"": 141,""varicose veins of lower extremities"": 139}}",TRUE,2287,"{""ethnicities"": {""African"": 119,""Any other Asian background"": 120,""Any other Black background"": 104,""Any other mixed background"": 92,""Any other white background"": 114,""Asian or Asian British"": 125,""Bangladeshi"": 96,""Black or Black British"": 131,""British"": 114,""Caribbean"": 127,""Chinese"": 100,""Indian"": 110,""Irish"": 111,""Mixed"": 127,""Other ethnic group"": 116,""Pakistani"": 115,""White"": 105,""White and Asian"": 114,""White and Black African"": 115,""White and Black Caribbean"": 132}}",TRUE,1597,"{""genders"": {""female"": 1271,""male"": 1233}}",TRUE,1597,"{""locations"": {""England"": 322,""Northern Ireland"": 317,""Republic of Ireland"": 311,""Scotland"": 308,""Wales"": 339}}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CINECA_synthetic_cohort_UK1,P65Y,P18Y,,,,,,,,,,,,NCIT:C16576|NCIT:C20197,female|male,GAZ:00150372,UK,,,,,,,,,,,,CINECA synthetic cohort UK1
Loading

0 comments on commit 10a2609

Please sign in to comment.