-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #35 from umccr/update/add-plotting-script-to-illum…
…ina-interop Added plotting script to illumina interop container
- Loading branch information
Showing
2 changed files
with
159 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,14 +5,25 @@ LABEL author="Alexis Lucattini" \ | |
maintainer="[email protected]" | ||
|
||
ARG ILLUMINA_INTEROP_VERSION="1.3.1" | ||
ARG PANDAS_VERSION="2.2.2" | ||
ARG MATPLOTLIB_VERSION="3.9.0" | ||
ARG SEABORN_VERSION="0.13.2" | ||
|
||
COPY interop_imaging_plot.py /opt/conda/bin/interop_imaging_plot | ||
|
||
RUN conda config --prepend channels conda-forge && \ | ||
conda install -c bioconda illumina-interop=="${ILLUMINA_INTEROP_VERSION}" && \ | ||
conda install -y -c bioconda illumina-interop=="${ILLUMINA_INTEROP_VERSION}" && \ | ||
pip install --upgrade pip && \ | ||
pip install \ | ||
pandas=="${PANDAS_VERSION}" \ | ||
matplotlib=="${MATPLOTLIB_VERSION}" \ | ||
seaborn=="${SEABORN_VERSION}" && \ | ||
find /opt/conda/ -follow -type f -name '*.a' -delete && \ | ||
find /opt/conda/ -follow -type f -name '*.pyc' -delete && \ | ||
/opt/conda/bin/conda clean --yes \ | ||
--all \ | ||
--force-pkgs-dirs | ||
--force-pkgs-dirs && \ | ||
chmod +x /opt/conda/bin/interop_imaging_plot | ||
|
||
# Reset entrypoint to null for cwl | ||
ENTRYPOINT [] | ||
|
146 changes: 146 additions & 0 deletions
146
repositories/illumina-interop/1.3.1/interop_imaging_plot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
#!/usr/bin/env python3 | ||
from functools import reduce | ||
|
||
# Imports | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
import pandas as pd | ||
import sys | ||
import re | ||
|
||
from pathlib import Path | ||
from typing import Union, List | ||
|
||
|
||
""" | ||
Usage: | ||
interop_imaging_plot <input_csv> <output_png> <run_name> | ||
""" | ||
|
||
|
||
def header_regex_match(header_name) -> List[str]: | ||
""" | ||
If the header name is in the format Name<item1;item2;item3> | ||
:param header_name: | ||
:return: | ||
""" | ||
# Check if header name is in the format Name<item1;item2;item3> | ||
header_regex = re.compile(r'(.*)<(.*)>') | ||
header_match = header_regex.match(header_name) | ||
|
||
if not header_match: | ||
return [str(header_name)] | ||
|
||
# Return the header name and the items | ||
return list( | ||
map( | ||
lambda group_2_match_iter: f"{header_match.group(1)}_{group_2_match_iter}", | ||
header_match.group(2).split(";") | ||
) | ||
) | ||
|
||
|
||
def read_csv(input_csv: Path) -> pd.DataFrame: | ||
""" | ||
Read the input csv, and clean up the headers | ||
:param input_csv: | ||
:return: | ||
""" | ||
# Read in header | ||
# Headers % Base<A;C;G;T> should be | ||
# Base_A, Base_C, Base_G, Base_T | ||
# What an insane way to encode a header!! | ||
imaging_df_headers_list = pd.read_csv( | ||
input_csv, | ||
comment='#', | ||
header=0 | ||
).columns.tolist() | ||
|
||
imaging_df_headers_list = list( | ||
reduce( | ||
lambda x, y: x + y, | ||
map( | ||
lambda column_name_iter: list(header_regex_match(column_name_iter)), | ||
imaging_df_headers_list | ||
) | ||
) | ||
) | ||
|
||
# Read in data | ||
imaging_df = pd.read_csv( | ||
input_csv, | ||
# Skip comments | ||
comment='#', | ||
# Assign header but we overwrite it with names | ||
header=0, | ||
# Set our own header | ||
names=imaging_df_headers_list | ||
).drop_duplicates( | ||
subset=['Lane', '% Occupied', '% Pass Filter'] | ||
).assign( | ||
Lane=lambda row: row['Lane'].astype('category') | ||
) | ||
|
||
return imaging_df | ||
|
||
|
||
def plot_data(imaging_df: pd.DataFrame, output_png: Path, run_id: str) -> None: | ||
""" | ||
Use the seaborn scatterplot library to plot the data | ||
:param imaging_df: | ||
:param output_png: | ||
:param run_id: | ||
:return: | ||
""" | ||
# Write data | ||
fig, ax = plt.subplots() | ||
|
||
# Set grid style | ||
sns.set_style('whitegrid') | ||
|
||
# SNS Dot plot | ||
sns.scatterplot( | ||
x='% Occupied', | ||
y='% Pass Filter', | ||
data=imaging_df, | ||
hue='Lane', | ||
ax=ax, | ||
alpha=0.6 | ||
) | ||
|
||
# Set title | ||
ax.set_title(f"Pct. Pass Filter vs. Pct. Occupied for run '{run_id}'") | ||
|
||
# Set x-axis label | ||
ax.set_xlabel('% Occupied') | ||
|
||
# Set x limits | ||
ax.set_xlim(left=0, right=100) | ||
|
||
# Set y limits | ||
ax.set_ylim(bottom=0, top=100) | ||
|
||
# Set legend | ||
ax.legend(title='Lane') | ||
|
||
# Save plot | ||
fig.savefig(sys.argv[2]) | ||
|
||
|
||
def main(): | ||
# Set io | ||
input_csv = sys.argv[1] | ||
output_png = sys.argv[2] | ||
run_id = sys.argv[3] | ||
|
||
# Read in data | ||
imaging_df = read_csv(Path(input_csv)) | ||
|
||
# Plot data | ||
plot_data(imaging_df, Path(output_png), run_id) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |