From 0d0680db59107ea55b241aa3050cf30bdf0ba1a4 Mon Sep 17 00:00:00 2001 From: Martin Cech Date: Sun, 18 Aug 2024 14:12:21 +0200 Subject: [PATCH] add chainsort from ucsc/kent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit standardise tool id, simplify params Co-authored-by: Björn Grüning fix test match folder name and tool id match folder and tool id --- tools/ucsc_tools/ucsc_chainsort/.shed.yml | 9 ++ tools/ucsc_tools/ucsc_chainsort/chainsort.xml | 55 +++++++ .../ucsc_chainsort/test-data/input.chain | 150 +++++++++++++++++ .../test-data/sorted_query.chain | 151 ++++++++++++++++++ .../test-data/sorted_score.chain | 151 ++++++++++++++++++ 5 files changed, 516 insertions(+) create mode 100644 tools/ucsc_tools/ucsc_chainsort/.shed.yml create mode 100644 tools/ucsc_tools/ucsc_chainsort/chainsort.xml create mode 100644 tools/ucsc_tools/ucsc_chainsort/test-data/input.chain create mode 100644 tools/ucsc_tools/ucsc_chainsort/test-data/sorted_query.chain create mode 100644 tools/ucsc_tools/ucsc_chainsort/test-data/sorted_score.chain diff --git a/tools/ucsc_tools/ucsc_chainsort/.shed.yml b/tools/ucsc_tools/ucsc_chainsort/.shed.yml new file mode 100644 index 00000000000..0e268a8518b --- /dev/null +++ b/tools/ucsc_tools/ucsc_chainsort/.shed.yml @@ -0,0 +1,9 @@ +name: ucsc_chainsort +owner: iuc +description: Sort chains. By default sorts by score. +long_description: Note this loads all chains into memory, so it is not suitable for large sets. Instead, run chainSort on multiple small files, followed by chainMergeSort. +homepage_url: http://hgdownload.cse.ucsc.edu/admin/exe/ +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc-tools/chainsort +type: unrestricted +categories: + - Sequence Analysis diff --git a/tools/ucsc_tools/ucsc_chainsort/chainsort.xml b/tools/ucsc_tools/ucsc_chainsort/chainsort.xml new file mode 100644 index 00000000000..0e9150572ba --- /dev/null +++ b/tools/ucsc_tools/ucsc_chainsort/chainsort.xml @@ -0,0 +1,55 @@ + + + Sort chains + + + 455 + + + UCSC_Genome_Browser_Utilities + + + ucsc-chainsort + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bib/bbs038 + + diff --git a/tools/ucsc_tools/ucsc_chainsort/test-data/input.chain b/tools/ucsc_tools/ucsc_chainsort/test-data/input.chain new file mode 100644 index 00000000000..ce9f4d1e4f0 --- /dev/null +++ b/tools/ucsc_tools/ucsc_chainsort/test-data/input.chain @@ -0,0 +1,150 @@ +##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91 +##gapPenalties=axtChain O=400 E=30 +chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2 +24 0 1 +52 12 0 +46 0 6 +24 1 0 +63 0 1 +6 0 9 +78 14 0 +25 0 4 +14 4 0 +4 19 0 +12 9 0 +43 77 0 +19 2 0 +37 2 0 +61 1 0 +16 0 4 +74 0 7 +51 1 0 +30 17 0 +18 1 0 +28 0 2 +125 0 13 +8 0 14 +8 0 3 +88 5 0 +35 0 1 +5 0 1 +6 0 7 +21 0 1 +27 4 0 +64 10 0 +46 2 0 +42 1 0 +54 0 1 +12 1 0 +115 3 0 +10 0 1 +27 1 0 +20 1 0 +42 21 0 +20 2 0 +50 11 0 +8 27 0 +45 1 0 +26 11 0 +5 3 0 +28 2 0 +34 1 0 +24 0 2 +39 1 0 +29 3 0 +68 17 0 +4 0 3 +29 + +chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3 +17 0 14 +111 2 0 +42 3 0 +39 0 47 +31 0 1 +16 + +chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4 +34 8 0 +38 0 2 +9 0 17 +11 0 24 +15 0 1 +161 + +chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22 +14 3 0 +16 19 0 +19 13 0 +21 + +chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23 +22 + +chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24 +14 + +chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25 +20 6 0 +12 + +chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6 +83 1 0 +53 + +chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7 +58 0 1 +109 + +chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9 +88 + +chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10 +31 1 0 +70 + +chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11 +25 0 2 +123 + +chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12 +120 + +chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13 +99 4 0 +73 + +chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15 +10 1 0 +38 0 1 +20 + +chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16 +23 + +chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17 +30 5 0 +8 3 0 +27 0 9 +8 + +chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18 +50 + +chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19 +11 1 0 +7 0 1 +15 6 0 +15 + +chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20 +22 2 0 +5 0 1 +9 + +chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21 +9 0 1 +12 7 0 +12 4 0 +13 0 1 +41 \ No newline at end of file diff --git a/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_query.chain b/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_query.chain new file mode 100644 index 00000000000..f75b68f8fe1 --- /dev/null +++ b/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_query.chain @@ -0,0 +1,151 @@ +##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91 +##gapPenalties=axtChain O=400 E=30 +chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13 +99 4 0 +73 + +chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7 +58 0 1 +109 + +chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6 +83 1 0 +53 + +chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15 +10 1 0 +38 0 1 +20 + +chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20 +22 2 0 +5 0 1 +9 + +chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18 +50 + +chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9 +88 + +chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4 +34 8 0 +38 0 2 +9 0 17 +11 0 24 +15 0 1 +161 + +chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25 +20 6 0 +12 + +chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19 +11 1 0 +7 0 1 +15 6 0 +15 + +chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16 +23 + +chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22 +14 3 0 +16 19 0 +19 13 0 +21 + +chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11 +25 0 2 +123 + +chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2 +24 0 1 +52 12 0 +46 0 6 +24 1 0 +63 0 1 +6 0 9 +78 14 0 +25 0 4 +14 4 0 +4 19 0 +12 9 0 +43 77 0 +19 2 0 +37 2 0 +61 1 0 +16 0 4 +74 0 7 +51 1 0 +30 17 0 +18 1 0 +28 0 2 +125 0 13 +8 0 14 +8 0 3 +88 5 0 +35 0 1 +5 0 1 +6 0 7 +21 0 1 +27 4 0 +64 10 0 +46 2 0 +42 1 0 +54 0 1 +12 1 0 +115 3 0 +10 0 1 +27 1 0 +20 1 0 +42 21 0 +20 2 0 +50 11 0 +8 27 0 +45 1 0 +26 11 0 +5 3 0 +28 2 0 +34 1 0 +24 0 2 +39 1 0 +29 3 0 +68 17 0 +4 0 3 +29 + +chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10 +31 1 0 +70 + +chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17 +30 5 0 +8 3 0 +27 0 9 +8 + +chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23 +22 + +chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21 +9 0 1 +12 7 0 +12 4 0 +13 0 1 +41 + +chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3 +17 0 14 +111 2 0 +42 3 0 +39 0 47 +31 0 1 +16 + +chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12 +120 + +chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24 +14 + diff --git a/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_score.chain b/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_score.chain new file mode 100644 index 00000000000..fe5ecee5832 --- /dev/null +++ b/tools/ucsc_tools/ucsc_chainsort/test-data/sorted_score.chain @@ -0,0 +1,151 @@ +##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91 +##gapPenalties=axtChain O=400 E=30 +chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2 +24 0 1 +52 12 0 +46 0 6 +24 1 0 +63 0 1 +6 0 9 +78 14 0 +25 0 4 +14 4 0 +4 19 0 +12 9 0 +43 77 0 +19 2 0 +37 2 0 +61 1 0 +16 0 4 +74 0 7 +51 1 0 +30 17 0 +18 1 0 +28 0 2 +125 0 13 +8 0 14 +8 0 3 +88 5 0 +35 0 1 +5 0 1 +6 0 7 +21 0 1 +27 4 0 +64 10 0 +46 2 0 +42 1 0 +54 0 1 +12 1 0 +115 3 0 +10 0 1 +27 1 0 +20 1 0 +42 21 0 +20 2 0 +50 11 0 +8 27 0 +45 1 0 +26 11 0 +5 3 0 +28 2 0 +34 1 0 +24 0 2 +39 1 0 +29 3 0 +68 17 0 +4 0 3 +29 + +chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3 +17 0 14 +111 2 0 +42 3 0 +39 0 47 +31 0 1 +16 + +chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4 +34 8 0 +38 0 2 +9 0 17 +11 0 24 +15 0 1 +161 + +chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6 +83 1 0 +53 + +chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7 +58 0 1 +109 + +chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9 +88 + +chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10 +31 1 0 +70 + +chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11 +25 0 2 +123 + +chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12 +120 + +chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13 +99 4 0 +73 + +chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15 +10 1 0 +38 0 1 +20 + +chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16 +23 + +chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17 +30 5 0 +8 3 0 +27 0 9 +8 + +chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18 +50 + +chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19 +11 1 0 +7 0 1 +15 6 0 +15 + +chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20 +22 2 0 +5 0 1 +9 + +chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21 +9 0 1 +12 7 0 +12 4 0 +13 0 1 +41 + +chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22 +14 3 0 +16 19 0 +19 13 0 +21 + +chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23 +22 + +chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24 +14 + +chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25 +20 6 0 +12 +