Skip to content

Commit

Permalink
Merge pull request #35 from umccr/update/add-plotting-script-to-illum…
Browse files Browse the repository at this point in the history
…ina-interop

Added plotting script to illumina interop container
  • Loading branch information
alexiswl authored Jun 4, 2024
2 parents 8f71b0b + a0c2602 commit f668fba
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 2 deletions.
15 changes: 13 additions & 2 deletions repositories/illumina-interop/1.3.1/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,25 @@ LABEL author="Alexis Lucattini" \
maintainer="[email protected]"

ARG ILLUMINA_INTEROP_VERSION="1.3.1"
ARG PANDAS_VERSION="2.2.2"
ARG MATPLOTLIB_VERSION="3.9.0"
ARG SEABORN_VERSION="0.13.2"

COPY interop_imaging_plot.py /opt/conda/bin/interop_imaging_plot

RUN conda config --prepend channels conda-forge && \
conda install -c bioconda illumina-interop=="${ILLUMINA_INTEROP_VERSION}" && \
conda install -y -c bioconda illumina-interop=="${ILLUMINA_INTEROP_VERSION}" && \
pip install --upgrade pip && \
pip install \
pandas=="${PANDAS_VERSION}" \
matplotlib=="${MATPLOTLIB_VERSION}" \
seaborn=="${SEABORN_VERSION}" && \
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.pyc' -delete && \
/opt/conda/bin/conda clean --yes \
--all \
--force-pkgs-dirs
--force-pkgs-dirs && \
chmod +x /opt/conda/bin/interop_imaging_plot

# Reset entrypoint to null for cwl
ENTRYPOINT []
Expand Down
146 changes: 146 additions & 0 deletions repositories/illumina-interop/1.3.1/interop_imaging_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env python3
from functools import reduce

# Imports
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sys
import re

from pathlib import Path
from typing import Union, List


"""
Usage:
interop_imaging_plot <input_csv> <output_png> <run_name>
"""


def header_regex_match(header_name) -> List[str]:
"""
If the header name is in the format Name<item1;item2;item3>
:param header_name:
:return:
"""
# Check if header name is in the format Name<item1;item2;item3>
header_regex = re.compile(r'(.*)<(.*)>')
header_match = header_regex.match(header_name)

if not header_match:
return [str(header_name)]

# Return the header name and the items
return list(
map(
lambda group_2_match_iter: f"{header_match.group(1)}_{group_2_match_iter}",
header_match.group(2).split(";")
)
)


def read_csv(input_csv: Path) -> pd.DataFrame:
"""
Read the input csv, and clean up the headers
:param input_csv:
:return:
"""
# Read in header
# Headers % Base<A;C;G;T> should be
# Base_A, Base_C, Base_G, Base_T
# What an insane way to encode a header!!
imaging_df_headers_list = pd.read_csv(
input_csv,
comment='#',
header=0
).columns.tolist()

imaging_df_headers_list = list(
reduce(
lambda x, y: x + y,
map(
lambda column_name_iter: list(header_regex_match(column_name_iter)),
imaging_df_headers_list
)
)
)

# Read in data
imaging_df = pd.read_csv(
input_csv,
# Skip comments
comment='#',
# Assign header but we overwrite it with names
header=0,
# Set our own header
names=imaging_df_headers_list
).drop_duplicates(
subset=['Lane', '% Occupied', '% Pass Filter']
).assign(
Lane=lambda row: row['Lane'].astype('category')
)

return imaging_df


def plot_data(imaging_df: pd.DataFrame, output_png: Path, run_id: str) -> None:
"""
Use the seaborn scatterplot library to plot the data
:param imaging_df:
:param output_png:
:param run_id:
:return:
"""
# Write data
fig, ax = plt.subplots()

# Set grid style
sns.set_style('whitegrid')

# SNS Dot plot
sns.scatterplot(
x='% Occupied',
y='% Pass Filter',
data=imaging_df,
hue='Lane',
ax=ax,
alpha=0.6
)

# Set title
ax.set_title(f"Pct. Pass Filter vs. Pct. Occupied for run '{run_id}'")

# Set x-axis label
ax.set_xlabel('% Occupied')

# Set x limits
ax.set_xlim(left=0, right=100)

# Set y limits
ax.set_ylim(bottom=0, top=100)

# Set legend
ax.legend(title='Lane')

# Save plot
fig.savefig(sys.argv[2])


def main():
# Set io
input_csv = sys.argv[1]
output_png = sys.argv[2]
run_id = sys.argv[3]

# Read in data
imaging_df = read_csv(Path(input_csv))

# Plot data
plot_data(imaging_df, Path(output_png), run_id)


if __name__ == "__main__":
main()

0 comments on commit f668fba

Please sign in to comment.