Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre commit black #8

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
repos:
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
9 changes: 7 additions & 2 deletions csv_fhirify.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ def double_parse_multi_value(code_value_list, code_value_delimiter):


def transform_transpose_row_info(
row_in, target_mappings_in, transpose_mappings_in, header_meta_dict_in=dict(),
row_in,
target_mappings_in,
transpose_mappings_in,
header_meta_dict_in=dict(),
):
options_for_none = ["None", "NA", "N/A", "?", "#VALUE!", "-9", ""]
mapped_row_list = []
Expand Down Expand Up @@ -174,7 +177,9 @@ def transform_multi_row_info(
)
else:
for value_mapping_option, value_type_mapping_option, date_mapping_option in zip(
value_mappings_in, value_type_mappings_in, date_mappings_in,
value_mappings_in,
value_type_mappings_in,
date_mappings_in,
):

if (
Expand Down
1 change: 0 additions & 1 deletion csv_ref_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,3 @@ def ref_file_name_lookup(
],
ref_rename_col_backup="Brand name",
)

29 changes: 23 additions & 6 deletions data_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"bmi_and_gail.csv": {
"skip_first_line": True,
"first_line_metadata": False,
"multiple_columns_to_keep": [["barcode", "bmi"], ["barcode", "gail"],],
"multiple_columns_to_keep": [
["barcode", "bmi"],
["barcode", "gail"],
],
},
"birth_year_don_year.csv": {
"skip_first_line": False,
Expand All @@ -18,10 +21,21 @@
"multiple_columns_to_keep": [["barcode", "subject_id"]],
},
},
"target_mappings": {"SID": ["subject_id", "barcode"],},
"value_mappings": ["bmi", "gail",],
"value_type_mappings": ["kg/m2", "%",],
"date_mappings": ["donation year", "donation year",],
"target_mappings": {
"SID": ["subject_id", "barcode"],
},
"value_mappings": [
"bmi",
"gail",
],
"value_type_mappings": [
"kg/m2",
"%",
],
"date_mappings": [
"donation year",
"donation year",
],
"column_headers": [
"SID",
"VALUE",
Expand Down Expand Up @@ -848,7 +862,10 @@
"CONDITION_NAME": ["cancer", "disease"],
"START_DATE": ["donation year"],
},
"value_mappings": ["cancer", "disease",],
"value_mappings": [
"cancer",
"disease",
],
"date_mappings": [
"donation year",
"donation year",
Expand Down
12 changes: 10 additions & 2 deletions etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,19 @@ def map_ynunk(ynunk_value):
def map_marital_status(marital_status):
return {
"Married": ("http://hl7.org/fhir/v3/MaritalStatus", "M", "Married"),
"Single": ("http://hl7.org/fhir/v3/Race", "U", "unmarried",),
"Single": (
"http://hl7.org/fhir/v3/Race",
"U",
"unmarried",
),
"Divorced": ("http://hl7.org/fhir/v3/MaritalStatus", "D", "Divorced"),
"Widowed": ("http://hl7.org/fhir/v3/MaritalStatus", "W", "Widowed"),
"-1": ("http://hl7.org/fhir/v3/NullFlavor", "UNK", "unknown"),
"None": ("http://hl7.org/fhir/v3/NullFlavor", "UNK", "unknown",),
"None": (
"http://hl7.org/fhir/v3/NullFlavor",
"UNK",
"unknown",
),
}.get(marital_status, None)


Expand Down
238 changes: 147 additions & 91 deletions examples/research/case/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,108 +3,164 @@
from fhir_petl.fhir import to_json
from fhir_petl.util import resolve, mkdirp, number, join, year, dateparser, ISOFormat


def map_race(race):
return {
'AMERICAN INDIAN AND ALASKA NATIVE': ('http://hl7.org/fhir/v3/Race', '1002-5', 'American Indian or Alaska Native'),
'ASIAN': ('http://hl7.org/fhir/v3/Race', '2028-9', 'Asian'),
'BLACK OR AFRICAN AMERICAN': ('http://hl7.org/fhir/v3/Race', '2054-5', 'Black or African American'),
'HISPANIC OR LATINO': ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
'WHITE': ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
'NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER': ('http://hl7.org/fhir/v3/Race', '2076-8', 'Native Hawaiian or Other Pacific Islander')
"AMERICAN INDIAN AND ALASKA NATIVE": (
"http://hl7.org/fhir/v3/Race",
"1002-5",
"American Indian or Alaska Native",
),
"ASIAN": ("http://hl7.org/fhir/v3/Race", "2028-9", "Asian"),
"BLACK OR AFRICAN AMERICAN": (
"http://hl7.org/fhir/v3/Race",
"2054-5",
"Black or African American",
),
"HISPANIC OR LATINO": ("http://hl7.org/fhir/v3/Race", "2106-3", "White"),
"WHITE": ("http://hl7.org/fhir/v3/Race", "2106-3", "White"),
"NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER": (
"http://hl7.org/fhir/v3/Race",
"2076-8",
"Native Hawaiian or Other Pacific Islander",
),
}.get(race, None)

patients = (etl.io.csv.fromcsv(resolve('work/Patient.csv'))
.fieldmap({
'id': 'ID',
'STUDYID': 'STUDYID',
'subject_id': ('STUDYID', lambda x: 'CASE-' + x),
'race': ('RACE', map_race),
'gender': ('SEX', {'F': 'female', 'M': 'male'}),
'birth_date': ('BIRTH_YR', year),
'index_date': ('INDEX_YEAR', dateparser('%Y', ISOFormat.DAY)),
'tag': lambda rec: ('subject-type', 'case')
}, True))

index = (patients
.cut('STUDYID', 'id', 'index_date')
.rename('id', 'subject'))

procedures = (etl.io.csv.fromcsv(resolve('work/Procedure.csv'))
.hashjoin(index, lkey='STUDYID', rkey='STUDYID')
.fieldmap({
'id': 'ID',
'date': lambda rec: rec['index_date'] + timedelta(int(rec['DAYS_VIS_INDEX'])),
'code': lambda rec: ('http://www.ama-assn.org/go/cpt', rec['PROC_CODE'], rec['NAME'].strip('" ')),
'subject': 'subject'
}, True))

conditions = (etl.io.csv.fromcsv(resolve('work/Condition.csv'))
.hashjoin(index, lkey='STUDYID', rkey='STUDYID')
.select('DX_CODE', lambda x: x)
.fieldmap({
'id': 'ID',
'onset': lambda rec: rec['index_date'] + timedelta(int(rec['DAYS_ADM_INDEX'])),
'code': lambda rec: ('http://hl7.org/fhir/sid/icd-9-cm', rec['DX_CODE']),
'note': lambda rec: join(rec['CARE_SETTING_TEXT'], rec['LOCATION_POINT_OF_CARE']),
'subject': 'subject'
}, True))

observations = (etl.io.csv.fromcsv(resolve('work/Observation.csv'))
.hashjoin(index, lkey='STUDYID', rkey='STUDYID')
.fieldmap({
'id': 'ID',
'date': lambda rec: rec['index_date'] + timedelta(int(rec['DAYS_VIS_INDEX'])),
'code': lambda rec: ('lab-text', rec['NAME'], rec['NAME']),
'value': lambda rec: number(rec['RESULT_VALUE']) if rec['RESULT_VALUE'] else (rec['CODED_NAME'] or None),
'subject': 'subject'
}, True)
.select('value', lambda x: x))

patients = etl.io.csv.fromcsv(resolve("work/Patient.csv")).fieldmap(
{
"id": "ID",
"STUDYID": "STUDYID",
"subject_id": ("STUDYID", lambda x: "CASE-" + x),
"race": ("RACE", map_race),
"gender": ("SEX", {"F": "female", "M": "male"}),
"birth_date": ("BIRTH_YR", year),
"index_date": ("INDEX_YEAR", dateparser("%Y", ISOFormat.DAY)),
"tag": lambda rec: ("subject-type", "case"),
},
True,
)

index = patients.cut("STUDYID", "id", "index_date").rename("id", "subject")

procedures = (
etl.io.csv.fromcsv(resolve("work/Procedure.csv"))
.hashjoin(index, lkey="STUDYID", rkey="STUDYID")
.fieldmap(
{
"id": "ID",
"date": lambda rec: rec["index_date"]
+ timedelta(int(rec["DAYS_VIS_INDEX"])),
"code": lambda rec: (
"http://www.ama-assn.org/go/cpt",
rec["PROC_CODE"],
rec["NAME"].strip('" '),
),
"subject": "subject",
},
True,
)
)

conditions = (
etl.io.csv.fromcsv(resolve("work/Condition.csv"))
.hashjoin(index, lkey="STUDYID", rkey="STUDYID")
.select("DX_CODE", lambda x: x)
.fieldmap(
{
"id": "ID",
"onset": lambda rec: rec["index_date"]
+ timedelta(int(rec["DAYS_ADM_INDEX"])),
"code": lambda rec: ("http://hl7.org/fhir/sid/icd-9-cm", rec["DX_CODE"]),
"note": lambda rec: join(
rec["CARE_SETTING_TEXT"], rec["LOCATION_POINT_OF_CARE"]
),
"subject": "subject",
},
True,
)
)

observations = (
etl.io.csv.fromcsv(resolve("work/Observation.csv"))
.hashjoin(index, lkey="STUDYID", rkey="STUDYID")
.fieldmap(
{
"id": "ID",
"date": lambda rec: rec["index_date"]
+ timedelta(int(rec["DAYS_VIS_INDEX"])),
"code": lambda rec: ("lab-text", rec["NAME"], rec["NAME"]),
"value": lambda rec: number(rec["RESULT_VALUE"])
if rec["RESULT_VALUE"]
else (rec["CODED_NAME"] or None),
"subject": "subject",
},
True,
)
.select("value", lambda x: x)
)


def medications(rec):
group = rec['DRUG_GROUP'].strip('*')
clazz = rec['DRUG_CLASS'].strip('*')
group = rec["DRUG_GROUP"].strip("*")
clazz = rec["DRUG_CLASS"].strip("*")
return [
('http://hl7.org/fhir/sid/ndc', rec['NDC_CODE'], rec['DRUG_NAME']),
('urn:oid:2.16.840.1.113883.6.68', rec['GPI_CODE'], rec['DRUG_NAME']),
('drug-class', clazz, clazz),
('drug-group', group, group)
("http://hl7.org/fhir/sid/ndc", rec["NDC_CODE"], rec["DRUG_NAME"]),
("urn:oid:2.16.840.1.113883.6.68", rec["GPI_CODE"], rec["DRUG_NAME"]),
("drug-class", clazz, clazz),
("drug-group", group, group),
]

med_dispenses = (etl.io.csv.fromcsv(resolve('work/MedicationDispense.csv'))
.hashjoin(index, lkey='CASE_ID', rkey='STUDYID')
.fieldmap({
'id': 'ID',
'date': lambda rec: rec['index_date'] + timedelta(int(rec['DAYS_VIS_INDEX'])),
'medication': medications,
'quantity': ('DISPENSE_AMOUNT', number),
'daysSupply': ('NUMBER_OF_DAYS_SUPPLY', number),
'subject': 'subject'
}, True))

med_dispenses = (
etl.io.csv.fromcsv(resolve("work/MedicationDispense.csv"))
.hashjoin(index, lkey="CASE_ID", rkey="STUDYID")
.fieldmap(
{
"id": "ID",
"date": lambda rec: rec["index_date"]
+ timedelta(int(rec["DAYS_VIS_INDEX"])),
"medication": medications,
"quantity": ("DISPENSE_AMOUNT", number),
"daysSupply": ("NUMBER_OF_DAYS_SUPPLY", number),
"subject": "subject",
},
True,
)
)


def medications2(rec):
group = rec['DRUG_GROUP'].strip('*')
clazz = rec['DRUG_CLASS'].strip('*')
group = rec["DRUG_GROUP"].strip("*")
clazz = rec["DRUG_CLASS"].strip("*")
return [
('http://hl7.org/fhir/sid/ndc', rec['NDC'], rec['ORDER_NAME']),
('urn:oid:2.16.840.1.113883.6.68', rec['GPI'], rec['ORDER_NAME']),
('drug-class', clazz, clazz),
('drug-group', group, group)
("http://hl7.org/fhir/sid/ndc", rec["NDC"], rec["ORDER_NAME"]),
("urn:oid:2.16.840.1.113883.6.68", rec["GPI"], rec["ORDER_NAME"]),
("drug-class", clazz, clazz),
("drug-group", group, group),
]

med_requests = (etl.io.csv.fromcsv(resolve('work/MedicationRequest.csv'))
.hashjoin(index, lkey='STUDYID', rkey='STUDYID')
.fieldmap({
'id': 'ID',
'date': lambda rec: rec['index_date'] + timedelta(int(rec['DAYS_ORDER_INDEX'])),
'medication': medications2,
'subject': 'subject'
}, True))


mkdirp(resolve('fhir'))
to_json(patients, 'Patient', resolve('fhir/Patient.json'))
to_json(procedures, 'Procedure', resolve('fhir/Procedure.json'))
to_json(conditions, 'Condition', resolve('fhir/Condition.json'))
to_json(observations, 'Observation', resolve('fhir/Observation.json'))
to_json(med_dispenses, 'MedicationDispense', resolve('fhir/MedicationDispense.json'))
to_json(med_requests, 'MedicationRequest', resolve('fhir/MedicationRequest.json'))

med_requests = (
etl.io.csv.fromcsv(resolve("work/MedicationRequest.csv"))
.hashjoin(index, lkey="STUDYID", rkey="STUDYID")
.fieldmap(
{
"id": "ID",
"date": lambda rec: rec["index_date"]
+ timedelta(int(rec["DAYS_ORDER_INDEX"])),
"medication": medications2,
"subject": "subject",
},
True,
)
)


mkdirp(resolve("fhir"))
to_json(patients, "Patient", resolve("fhir/Patient.json"))
to_json(procedures, "Procedure", resolve("fhir/Procedure.json"))
to_json(conditions, "Condition", resolve("fhir/Condition.json"))
to_json(observations, "Observation", resolve("fhir/Observation.json"))
to_json(med_dispenses, "MedicationDispense", resolve("fhir/MedicationDispense.json"))
to_json(med_requests, "MedicationRequest", resolve("fhir/MedicationRequest.json"))
Loading