Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add chainsort from ucsc/kent #6252

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions tools/ucsc_tools/ucsc_chainsort/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: ucsc_chainsort
owner: iuc
description: Sort chains. By default sorts by score.
long_description: Note this loads all chains into memory, so it is not suitable for large sets. Instead, run chainSort on multiple small files, followed by chainMergeSort.
homepage_url: http://hgdownload.cse.ucsc.edu/admin/exe/
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc-tools/chainsort
type: unrestricted
categories:
- Sequence Analysis
55 changes: 55 additions & 0 deletions tools/ucsc_tools/ucsc_chainsort/chainsort.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<tool id="ucsc_chainsort" name="chainSort" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT">
<description>
Sort chains
</description>
<macros>
<token name="@TOOL_VERSION@">455</token>
</macros>
<xrefs>
<xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">ucsc-chainsort</requirement>
</requirements>
<version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command>
<command detect_errors="exit_code"><![CDATA[
chainSort
'$in_chain'
$sort_by
'$out'
]]></command>
<inputs>
<param name="in_chain" format="chain" type="data" label="Chain dataset" />
<param name="sort_by" type="select" label="Sort">
<option value="" selected="true">by score</option>
<option value="-target">on target start</option>
<option value="-query">on query start</option>
</param>
</inputs>
<outputs>
<data name="out" format="chain" />
</outputs>
<tests>
<test>
<param name="in_chain" value="input.chain" />
<output name="out" file="sorted_score.chain" />
</test>
<test>
<param name="in_chain" value="input.chain" />
<param name="sort_by" value="-query" />
<output name="out" file="sorted_query.chain" />
</test>
</tests>
<help><![CDATA[
**What it does**

`chainSort`_ is a tool to sort chains.

Note this tool loads all chains into memory, so it is not suitable for large datasets. Instead, run chainSort on multiple small dataset, followed by chainMergeSort.

.. _chainSort: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt
]]> </help>
<citations>
<citation type="doi">10.1093/bib/bbs038</citation>
</citations>
</tool>
150 changes: 150 additions & 0 deletions tools/ucsc_tools/ucsc_chainsort/test-data/input.chain
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2
24 0 1
52 12 0
46 0 6
24 1 0
63 0 1
6 0 9
78 14 0
25 0 4
14 4 0
4 19 0
12 9 0
43 77 0
19 2 0
37 2 0
61 1 0
16 0 4
74 0 7
51 1 0
30 17 0
18 1 0
28 0 2
125 0 13
8 0 14
8 0 3
88 5 0
35 0 1
5 0 1
6 0 7
21 0 1
27 4 0
64 10 0
46 2 0
42 1 0
54 0 1
12 1 0
115 3 0
10 0 1
27 1 0
20 1 0
42 21 0
20 2 0
50 11 0
8 27 0
45 1 0
26 11 0
5 3 0
28 2 0
34 1 0
24 0 2
39 1 0
29 3 0
68 17 0
4 0 3
29

chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3
17 0 14
111 2 0
42 3 0
39 0 47
31 0 1
16

chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4
34 8 0
38 0 2
9 0 17
11 0 24
15 0 1
161

chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22
14 3 0
16 19 0
19 13 0
21

chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23
22

chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24
14

chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25
20 6 0
12

chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6
83 1 0
53

chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7
58 0 1
109

chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9
88

chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10
31 1 0
70

chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11
25 0 2
123

chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12
120

chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13
99 4 0
73

chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15
10 1 0
38 0 1
20

chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16
23

chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17
30 5 0
8 3 0
27 0 9
8

chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18
50

chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19
11 1 0
7 0 1
15 6 0
15

chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20
22 2 0
5 0 1
9

chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21
9 0 1
12 7 0
12 4 0
13 0 1
41
151 changes: 151 additions & 0 deletions tools/ucsc_tools/ucsc_chainsort/test-data/sorted_query.chain
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13
99 4 0
73

chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7
58 0 1
109

chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6
83 1 0
53

chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15
10 1 0
38 0 1
20

chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20
22 2 0
5 0 1
9

chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18
50

chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9
88

chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4
34 8 0
38 0 2
9 0 17
11 0 24
15 0 1
161

chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25
20 6 0
12

chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19
11 1 0
7 0 1
15 6 0
15

chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16
23

chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22
14 3 0
16 19 0
19 13 0
21

chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11
25 0 2
123

chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2
24 0 1
52 12 0
46 0 6
24 1 0
63 0 1
6 0 9
78 14 0
25 0 4
14 4 0
4 19 0
12 9 0
43 77 0
19 2 0
37 2 0
61 1 0
16 0 4
74 0 7
51 1 0
30 17 0
18 1 0
28 0 2
125 0 13
8 0 14
8 0 3
88 5 0
35 0 1
5 0 1
6 0 7
21 0 1
27 4 0
64 10 0
46 2 0
42 1 0
54 0 1
12 1 0
115 3 0
10 0 1
27 1 0
20 1 0
42 21 0
20 2 0
50 11 0
8 27 0
45 1 0
26 11 0
5 3 0
28 2 0
34 1 0
24 0 2
39 1 0
29 3 0
68 17 0
4 0 3
29

chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10
31 1 0
70

chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17
30 5 0
8 3 0
27 0 9
8

chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23
22

chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21
9 0 1
12 7 0
12 4 0
13 0 1
41

chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3
17 0 14
111 2 0
42 3 0
39 0 47
31 0 1
16

chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12
120

chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24
14

Loading