diff --git a/.gitignore b/.gitignore index 5d947ca..537c4a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,15 @@ -# Build and Release Folders -bin-debug/ -bin-release/ -[Oo]bj/ -[Bb]in/ +# Mac +.DS_Store -# Other files and folders -.settings/ +# JetBrains +.idea/ -# Executables -*.swf -*.air -*.ipa -*.apk +# Visual Studio Code +.vscode/ + +# Python +__pycache__/ + +# Tests +testing/ -# Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties` -# should NOT be excluded as they contain compiler settings and other important -# information for Eclipse / Flash Builder. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d5fb7cf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu + +RUN apt-get update \ + && apt-get install -y git python3-pip python3.6 \ + && apt-get install -y openslide-tools vim + +COPY ./*.py /opt/ +COPY requirements.txt /opt/ + +COPY ./images.sh /usr/bin/images +RUN chmod 0755 /usr/bin/images +COPY ./annotations.sh /usr/bin/annotations +RUN chmod 0755 /usr/bin/annotations + +RUN cd /opt \ + && pip3 install -r /opt/requirements.txt + +COPY run.sh /tmp/run.sh +RUN chmod 755 /tmp/run.sh + +CMD ["sh", "/tmp/run.sh"] \ No newline at end of file diff --git a/README.md b/README.md index 76e1c50..b77995d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,32 @@ # quip_reports -Report generator +For SEER VTR Pilot + +## Usage + +**Output directory:** `/data/reports` + +1. **Print images** + +Prints a list of images for a given collection, with Collection, Study ID, Subject ID, Image ID, Date. + +For all collections, pass "all" instead of "collection name". + + +``` +docker exec quip-reports images username password "collection name" +``` + +2. **Print annotations** + +Prints list of images and their associated annotations, for a given collection. Output columns: Collection, Study ID, Subject ID, Image ID, Segmentation Exec ID, Heatmap Exec ID, Featuremap ID. + + + + +``` +docker exec quip-reports annotations username password "collection name" +``` + + diff --git a/annotations.py b/annotations.py new file mode 100644 index 0000000..1df1660 --- /dev/null +++ b/annotations.py @@ -0,0 +1,100 @@ +import csv +import datetime +import re +import sys +import time + +from mongoapi import * +from pathdbapi import * + + +def usage(): + print('Usage: ' + sys.argv[0] + ' [username] [password] [collection name | "all"]') + exit(1) + + +if len(sys.argv) == 4: + username = sys.argv[1] + password = sys.argv[2] + collection = sys.argv[3] +else: + usage() + +if 'all' in collection.lower(): + usage() + +host = "http://quip-pathdb" +api = MyApi(host, username, password) + +hasNext = True +count = 0 +collection_id = 0 +collection_name = '' +out_dir = '/data/reports/' +everything = False +stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S') + +# if 'all' in collection.lower(): +# everything = True +# images_url = '/listofimages?_format=json' +# lookup_table = api.get_collection_lookup_table() +# name = 'all' +# else: +everything = False +collection_id, collection_name = api.get_collection_info(collection) +if len(collection_name) == 0: + print("Could not find collection:", collection) + exit(1) +images_url = '/listofimages/' + str(collection_id) + '?_format=json' +name = re.sub('\W+', '', collection_name).lower() + +with open(out_dir + 'annotations_' + name + '_' + stamp + '.csv', mode='w') as csv_file: + csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + csv_writer.writerow( + ['Collection', 'Study ID', 'Subject ID', 'Image ID', 'Segmentation ExecID', 'Heatmap ExecID', 'Featuremap ID']) + first_nid = 0 + while hasNext: + count += 1 + + url = images_url + if count > 1: + url += '&page=' + str(count) + + response = api.get_data(url) + if len(response) > 0: + if count == 1: + first_nid = response[0]['nid'][0]['value'] + if count > 1: + nid = response[0]['nid'][0]['value'] + if nid == first_nid: + break + for r in response: + nid = r['nid'][0]['value'] + imageid = r['imageid'][0]['value'] + studyid = r['studyid'][0]['value'] + subjectid = r['clinicaltrialsubjectid'][0]['value'] + + my_set = segmentations(nid, imageid, studyid, subjectid) + if len(my_set) > 0: + for i in my_set: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, i]) + else: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, 'None']) + + my_set = heatmaps(nid, imageid, studyid, subjectid) + if len(my_set) > 0: + for i in my_set: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, ' ', i]) + else: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, ' ', 'None']) + + my_set = api.get_featuremaps(nid) + if len(my_set) > 0: + for i in my_set: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, ' ', ' ', i]) + else: + csv_writer.writerow([collection_name, studyid, subjectid, imageid, ' ', ' ', 'None']) + else: + hasNext = False + +exit(0) diff --git a/annotations.sh b/annotations.sh new file mode 100644 index 0000000..9ac24bd --- /dev/null +++ b/annotations.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# A convenience script to call annotations.py +# ------------------------------------------------------- +if [ "$#" -ne 3 ]; then + echo "Usage: $0 username password collection" + exit 1 +fi + +SECONDS=0 +pythonFile="/opt/annotations.py" +python3.6 "$pythonFile" "$@" +exit_code="$?" +ELAPSED="$(($SECONDS / 3600))hrs $(($SECONDS / 60 % 60))min $(($SECONDS % 60))sec" + +if [ "$exit_code" -eq 0 ]; then + echo "Done. $ELAPSED" +else + NORMAL="\\033[0;39m" + RED="\\033[1;31m" + printf "${RED}Something went wrong.${NORMAL}\n" +fi diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..bdfa9de --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +version: '3.5' + +services: + reports: + build: + context: . + image: quip_reports + container_name: quip-reports + volumes: + - /data/reports:/data/reports + networks: + - quip_distro_default + +networks: + quip_distro_default: + name: quip_distro_default diff --git a/images.py b/images.py new file mode 100644 index 0000000..627ec4d --- /dev/null +++ b/images.py @@ -0,0 +1,79 @@ +import csv +import datetime +import re +import sys +import time + +from pathdbapi import * + + +def usage(): + print('Usage: ' + sys.argv[0] + ' [username] [password] [collection name | "all"]') + exit(1) + + +if len(sys.argv) == 4: + username = sys.argv[1] + password = sys.argv[2] + collection = sys.argv[3] +else: + usage() + +host = "http://quip-pathdb" +api = MyApi(host, username, password) + +hasNext = True +count = 0 +collection_id = 0 +collection_name = '' +out_dir = '/data/reports/' +everything = False +stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S') + +if 'all' in collection.lower(): + everything = True + images_url = '/listofimages?_format=json' + lookup_table = api.get_collection_lookup_table() + name = 'all' +else: + everything = False + collection_id, collection_name = api.get_collection_info(collection) + if len(collection_name) == 0: + print("Could not find collection:", collection) + exit(1) + images_url = '/listofimages/' + str(collection_id) + '?_format=json' + name = re.sub('\W+', '', collection_name).lower() + +with open(out_dir + 'images_' + name + '_' + stamp + '.csv', mode='w') as csv_file: + csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + csv_writer.writerow(['Collection', 'Study ID', 'Subject ID', 'Image ID', 'Date']) + first_nid = 0 + while hasNext: + count += 1 + + url = images_url + if count > 1: + url += '&page=' + str(count) + + response = api.get_data(url) + if len(response) > 0: + if count == 1: + first_nid = response[0]['nid'][0]['value'] + if count > 1: + nid = response[0]['nid'][0]['value'] + if nid == first_nid: + break + for r in response: + datetime_str = r['created'][0]['value'].replace("T", " ") + datetime_str = datetime_str.replace("+00:00", "") + + if everything: + collection_id = r['field_collection'][0]['target_id'] + collection_name = lookup_table[collection_id] + + csv_writer.writerow([collection_name, r['studyid'][0]['value'], r['clinicaltrialsubjectid'][0]['value'], + r['imageid'][0]['value'], datetime_str]) + else: + hasNext = False + +exit(0) diff --git a/images.sh b/images.sh new file mode 100644 index 0000000..768e251 --- /dev/null +++ b/images.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# A convenience script to call images.py +# ------------------------------------------------------- +if [ "$#" -ne 3 ]; then + echo "Usage: $0 username password collection" + exit 1 +fi + +SECONDS=0 +pythonFile="/opt/images.py" +python3.6 "$pythonFile" "$@" +exit_code="$?" +ELAPSED="$(($SECONDS / 3600))hrs $(($SECONDS / 60 % 60))min $(($SECONDS % 60))sec" + +if [ "$exit_code" -eq 0 ]; then + echo "Done. $ELAPSED" +else + NORMAL="\\033[0;39m" + RED="\\033[1;31m" + printf "${RED}Something went wrong.${NORMAL}\n" +fi diff --git a/mongoapi.py b/mongoapi.py new file mode 100644 index 0000000..f7286c4 --- /dev/null +++ b/mongoapi.py @@ -0,0 +1,35 @@ +import pymongo + +host = 'ca-mongo' +my_client = pymongo.MongoClient("mongodb://" + host + ":27017/") +my_db = my_client["camic"] +heat_col = my_db['heatmap'] +mark_col = my_db['mark'] + + +def get_data(query, collection): + my_set = set() + for x in collection.find(query, { + "provenance.analysis.execution_id": 1, + "_id": 0 + }): + my_set.add(x['provenance']['analysis']['execution_id']) + return my_set + + +def segmentations(nid, imageid, studyid, subjectid): + return get_data({ + "provenance.image.slide": str(nid), + "provenance.image.imageid": imageid, + "provenance.image.study": studyid, + "provenance.image.subject": subjectid + }, mark_col) + + +def heatmaps(nid, imageid, studyid, subjectid): + return get_data({ + "provenance.image.slide": str(nid), + "provenance.image.subject_id": subjectid, + "provenance.image.case_id": imageid, + "provenance.analysis.study_id": studyid + }, heat_col) diff --git a/pathdbapi.py b/pathdbapi.py new file mode 100644 index 0000000..7d40b3d --- /dev/null +++ b/pathdbapi.py @@ -0,0 +1,87 @@ +import requests + + +class MyApi: + host = None + username = None + password = None + cookie = None + + def __init__(self, host, username, password): + self.host = host + self.username = username + self.password = password + + try: + self.cookie = self.get_cookie() + if self.cookie is None: + raise Exception("Request for cookie failed.") + except Exception as e: + print("Exception: {}".format(e)) + + def get_cookie(self): + try: + auth = "{\"name\":\"" + self.username + "\", \"pass\": \"" + self.password + "\"}" + r1 = requests.post(self.host + '/user/login?_format=json', data=auth) + except Exception as e: + print("Exception: {}".format(e)) + return None + else: + return r1.cookies + + def get_data(self, url): + # make our API request + r2 = requests.get(self.host + url, cookies=self.cookie) + if 'json' in r2.headers.get('Content-Type'): + js = r2.json() + else: + print("Didn't get json. Response headers: {}".format(r2.headers)) + js = None + + return js + + def get_collection_info(self, collection): + collection_id = 0 + collection_name = "" + a_dict = self.get_collection_lookup_table() + + if len(a_dict) > 0: + for key in a_dict: + if str(a_dict[key]).lower() in collection.lower(): + collection_name = a_dict[key] + collection_id = key + break + + return collection_id, collection_name + + def get_collection_lookup_table(self): + response = self.get_data('/collections?_format=json') + lookup_table = {} + + if len(response) > 0: + for r in response: + collection_name = r['name'][0]['value'] + collection_id = r['tid'][0]['value'] + lookup_table[collection_id] = collection_name + + return lookup_table + + def get_featuremaps(self, slide_id): + """ + Returns list of Featuremap Execution IDs + """ + my_set = set() + + response = self.get_data('/maps/' + str(slide_id) + '?_format=json') + + if len(response) > 0: + for r in response: + exec = r['execution_id'] + if len(exec) > 0: + my_set.add(exec[0]['value']) + else: + map_type = r['field_map_type'] + if len(map_type) > 0: + my_set.add(map_type[0]['value']) + + return my_set diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b36cfe4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +geojson +pymongo diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..93e6465 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +while true; do sleep 1000; done