Skip to content

Commit

Permalink
Add option to find regions with zeros - important for VGP where some …
Browse files Browse the repository at this point in the history
…large 200k+ regions on one or both haplotypes have absolutely no coverage from Pacbio hifi reads.

Possibly from bionano optical stuff - and it may not really exist since absolutely nothing maps there!
  • Loading branch information
fubar2 committed Sep 30, 2024
1 parent e9dbc13 commit e0d1157
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 9 deletions.
29 changes: 26 additions & 3 deletions tools/bigwig_outlier_bed/bigwig_outlier_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __init__(self, args):
self.bedouthilo = args.bedouthilo
self.tableoutfile = args.tableoutfile
self.bedwin = args.minwin
self.bedoutzero = args.bedoutzero
self.qlo = None
self.qhi = None
if args.outbeds != "outtab":
Expand All @@ -133,7 +134,7 @@ def __init__(self, args):
self.bwlabels += ["Nolabel"] * (nbw - nlab)
self.makeBed()

def processVals(self, bw, isTop):
def processVals(self, bw, isTop, isZero):
"""
idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex.
Expand All @@ -143,6 +144,8 @@ def processVals(self, bw, isTop):
"""
if isTop:
bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
elif isZero:
bwex = np.r_[False, bw == 0, False] # extend with 0s
else:
bwex = np.r_[False, bw <= self.bwbot, False]
bwexd = np.diff(bwex)
Expand Down Expand Up @@ -190,6 +193,7 @@ def makeTableRow(self, bw, bwlabel, chr):
def makeBed(self):
bedhi = []
bedlo = []
bedzero = []
restab = []
bwlabels = self.bwlabels
bwnames = self.bwnames
Expand Down Expand Up @@ -229,9 +233,24 @@ def makeBed(self):
+ histo
)
bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
if self.bedoutzero is not None:
bwzero = self.processVals(bw, isTop=False, isZero=True)
for j, seg in enumerate(bwzero):
seglen = seg[1] - seg[0]
if seglen >= self.bedwin:
score = seglen
bedzero.append(
(
chr,
seg[0],
seg[1],
"%s_%d" % (bwlabel, score),
score,
)
)
if self.qhi is not None:
self.bwtop = np.quantile(bw, self.qhi)
bwhi = self.processVals(bw, isTop=True)
bwhi = self.processVals(bw, isTop=True, isZero=False)
for j, seg in enumerate(bwhi):
seglen = seg[1] - seg[0]
if seglen >= self.bedwin:
Expand All @@ -247,7 +266,7 @@ def makeBed(self):
)
if self.qlo is not None:
self.bwbot = np.quantile(bw, self.qlo)
bwlo = self.processVals(bw, isTop=False)
bwlo = self.processVals(bw, isTop=False, isZero=False)
for j, seg in enumerate(bwlo):
seglen = seg[1] - seg[0]
if seg[1] - seg[0] >= self.bedwin:
Expand Down Expand Up @@ -280,6 +299,9 @@ def makeBed(self):
t.write(stable)
t.write("\n")
some = False
if self.outbeds in ["outzero"]:
self.writeBed(bedzero, self.bedoutzeo)
some = True
if self.qlo:
if self.outbeds in ["outall", "outlo", "outlohi"]:
self.writeBed(bedlo, self.bedoutlo)
Expand Down Expand Up @@ -308,6 +330,7 @@ def makeBed(self):
a("--bedouthi", default=None)
a("--bedoutlo", default=None)
a("--bedouthilo", default=None)
a("--bedoutzero", default=None)
a("-w", "--bigwig", nargs="+")
a("-n", "--bigwiglabels", nargs="+")
a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed")
Expand Down
25 changes: 19 additions & 6 deletions tools/bigwig_outlier_bed/bigwig_outlier_bed.xml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<tool name="Bigwig extremes to bed features" id="bigwig_outlier_bed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<tool name="Bigwig outliers to bed features" id="bigwig_outlier_bed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>Writes high and low bigwig runs as features in a bed file</description>
<macros>
<token name="@TOOL_VERSION@">0.2.0</token>
<token name="@PYTHON_VERSION@">3.12.3</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@VERSION_SUFFIX@">2</token>
</macros>
<edam_topics>
<edam_topic>topic_0157</edam_topic>
Expand Down Expand Up @@ -43,6 +43,9 @@
#if $outbeds in ['outlo', 'outall', 'outlohi']:
--bedoutlo '$bedoutlo'
#end if
#if $outbeds in ['outzero']:
--bedoutzero '$bedoutzero'
#end if
--minwin '$minwin'
#if $qhi:
--qhi '$qhi'
Expand All @@ -67,9 +70,10 @@
<option value="outhilo" selected="true">Make 1 bed output with both low and high regions</option>
<option value="outhi">Make 1 bed output with high regions only</option>
<option value="outlo">Make 1 bed output with low regions only</option>
<option value="outzero">Make 1 bed output for regions with contiguous zero values only</option>
<option value="outall">Make 3 bed outputs with low and high together in one, high in one and low in the other</option>
<option value="outlohi">Make 2 bed outputs with high in one and low in the other</option>
<option value="outtab">NO bed outputs. Report bigwig value distribution only</option>
<option value="outtab">NO bed outputs. Report bigwig value distribution table only</option>
</param>
<param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig input" help="">
<option value="donotmake">Do not create this report</option>
Expand All @@ -86,6 +90,9 @@
<data name="bedoutlo" format="bed" label="Low bed">
<filter>outbeds in ["outall", "outlohi", "outlo"]</filter>
</data>
<data name="bedoutzero" format="bed" label="Zeros only bed">
<filter>outbeds in ["outzero"]</filter>
</data>
<data name="tableoutfile" format="txt" label="Contig statistics">
<filter>tableout == "create" or outbeds == "outtab"</filter>
</data>
Expand Down Expand Up @@ -150,9 +157,15 @@
<param name="qlo" value="0.5"/>
<param name="tableout" value="create"/>
</test>



<test expect_num_outputs="1">
<output name="bedoutzero" value="bedoutzero_sample" compare="diff" lines_diff="0"/>
<param name="outbeds" value="outzero"/>
<param name="bigwig" value="fake.bigwig,bigwig_sample"/>
<param name="minwin" value="2"/>
<param name="qhi" value=""/>
<param name="qlo" value=""/>
<param name="tableout" value="donotmake"/>
</test>
</tests>
<help><![CDATA[
Expand Down

0 comments on commit e0d1157

Please sign in to comment.