From ad678f19e8b9a1f43cceff484bdd5084d403ef3f Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 22 Jul 2024 12:47:35 +0200 Subject: [PATCH] Add tests for parsemarkers --- test/data/marker.fna | 2 ++ test/test_parsemarkers.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 test/data/marker.fna create mode 100644 test/test_parsemarkers.py diff --git a/test/data/marker.fna b/test/data/marker.fna new file mode 100644 index 00000000..e929c753 --- /dev/null +++ b/test/data/marker.fna @@ -0,0 +1,2 @@ +>abc +GTGGTGAGTGCTTGGACGTGTGTGGTGATGCTCATGGTGTCTTCCCCCCTTCCCCGTGTTGTTACCACCCACTCTACCCGCTCACACTTTCGAATACAAGGGTATTTTTCGAAATGGAAAAGCTGTCGAGCACTCACATGTTCGACAGCTTCATATGTCCTTGAACTACGTAACTTTTTCCTTACAATCAGCGCAAAGTCCAAAGATTTCGGCCTCGTGACTACTGAGAGCAAAGCCGTTTTTAGTGGCAATTTCCTGTGCCCATGTCTCTACTGGACCGCCATCGATTTCGACTGTGTGACCGCAATTGGTGCAGACCAAGTGATGGTGGTGTCCCTCGTCGTGGCATTGGCGGTACAGAGTTTCTCCACCTGTGACGGTAAGTACGTCGACTGCCCTGATGTCCGCGAGGGATTGGAGGGTTCGGTAGACGGTTGTGAGGCCGACGTTGTGTTCCCTGGTGGATAGCTCGTGATGGATTTCTTTGGCGGAAGCGAAGTTATCGATTTCCTCAAGAACGTCAATTACGGCTTTTCGCTGTCTGGTGCTTCGCACTCCCAGCTTCGGGGCAGAGCCTTGGCTGATGCGATTGATACCCACCGTTGATCCTCCTCAATGACACAAAATGTACTTCGATAGTCTACCCAGATGTGTCAACCCCTGCGTTTAGTGCCAGGAGAAGTATGTCGAGGACGAGTGGTTCGGCAAGTGAATAAGTCATTTGCCGGCCTTGACGTTCTGCGTCGACGATACCTGCAGTTTTAAGGACTTTGAGGTGTTGGCTCACTAGTGGTTGCGAACTTTTTACTAGCTTGACCAATTCGTGGACGTAGTGGGGCCTTTCGTTGAGGGCGAGGATGATTTCGATTCTTAAGGGGGAATCTAGTGCCCTAATCAGCAGGCTGATCGCTTTGATGTTTTTTGCTGTTGCAAGTTTCTGAAGCTCAGCTGATGCTGTGGATTCGGACTCTTCTGCAGGGGTGACGAAATTCCGATTTGAGTGTTGAGCCACGGGGAAGTCCTTCCGTCCTTAGGCTAGGTCTGGAATGGATCTAGCACGCTTGCTATTTTACCTTCTATATAAACCTTTTATGAGGGAAATGAAAAAATAGTTATTAGAACTAGTTTACATCGCGAAGGCCGCAAAATGACGGGGTCAGCGGAAGCAACATCGTTAGTTGGGCTAGGATTGGTTGGGTATGTCCTAAAAGGGACGGTTATTTTTTCATTCGACGTGGAGGAGAGCATCCGACGTGGCTCAGCAATCGATCATCGACACCGTGGTTAACCTGTGTAAACGACGTGGACTGGTGTACCCCTGTGGTGAGATCTACGGCGGTACCCGCTCTGCGTGGGACTACGGCCCGCTGGGTGTGGAGCTGAAGGAAAACATCAAGCGCCAGTGGTGGCGTTCTATGGTTACTTCCCGCCCAGATGTTGTGGGTGTTGATACTTCTGTCATCCTTCCTCGCCAGGTGTGGGTAACTTCCGGCCACGTTGAGGTCTTCACTGACCCACTGGTTGAGTCTTTGAACACCCACAAGCGTTACCGTGCGGACCACCTGCTGGAGCAGTACGAAGAGAAGCATGGTCACCCACCTGTAAACGGCTTGGCTGACATCAACGATCCAGAGACCGGCCAGCCAGGTAACTGGACTGAGCCTAAGGCGTTCTCTGGTCTTCTGAAGACTTTCTTGGGACCTGTGGACGACGAAGAGGGTCTGCACTACCTGCGCCCTGAAACTGCTCAGGGTATCTTCGTGAACTTCAAGAACGTGATGAACACTTCACGCATGAAGCCACCTTTCGGTATCGCGAACATCGGTAAGTCTTTCCGTAACGAGATCACCCCAGGTAACTTCATTTTCCGTACTCGTGAGTTCGAGCAGATGGAGATGGAGTTCTTCGTCAAGCCTGGTGAGGACGAAGAGTGGCACCAGCACTGGATTGATACTCGCCTGCAGTGGTACATCAACCTGGGCATTAAGCCTGAGAACCTGCGTCTGTACGAGCACCCTCAGGAGAAGCTGTCTCACTACTCCAAGCGCACTGTTGATATTGAGTACGCATTCAACTTTGCTAACACCAAGTGGGGCGAGTTAGAGGGTATCGCGAACCGTACTGATTACGATCTTCGCGTGCACTCTGAGGGCTCTGGTGAGGACCTGTCATTCTTCGATCAGGAGACCAATGAGCGTTGGATTCCTTTCGTAATCGAGCCTGCTGCAGGTCTTGGTCGCGCAATGATGATGTTCCTGATGGATGCTTATCACGAGGACGAGGCACCAAACTCAAAGGGTGGCGTCGATAAGCGTGTTGTTCTGAAGCTTGACCGTCGCCTTGCGCCGGTTAAGGTTGCGGTCTTGCCGCTGTCAAAGAAGGACACTTTGACGCCTTTGGCGGAAAAGCTCGCAGCAGAGCTGCGTGAATTCTGGAACGTTGATTACGACACTTCAGGTGCGATTGGTCGCCGTTACCGTCGTCAGGACGAGATCGGTACTCCATTCTGCGTCACCGTTGACTTTGATTCTCTCGAGGACAACGCTGTGACCGTGCGTGAGCGCGACACCATGGAGCAGGTTCGTGTTCCACTTGATGAGCTGCAGGGTTACTTGGCTCAGCGCCTCATCGGCTGCTAAACGGCAACCAATAGAGCGATAATTCGCTAAGACGAATGTAATCGCAGCAACATATAGCACCGGCTTAACAGGCCGGTGCTATTCTGTTCGCATGACTTCGAAGGATCTGATTGTGACCTCCTATACGTCTTGGGGCAAGCGTTTCAAGAATGACGGGAAGCTTTTTATTAACCTACTTCGCAGCACCACTGATAGTGCTGATGAAAAGGTTTTAGCCACTTTCGGTGAAGTTCCCAGCAAATCATTTGAAACCACCGCAACGGTTGATGAGCAGCAGTGGGAACTGTCCTTCAATATTGATGGAACGGCAACTGCCAAGCTTCCTGATGGTCGTGTGTTCAGCGCGAATGCAGGTGAGAAGACCTTTACCAAGTCCAAGCGGATTGAAATCGACATGGACGGCACCGCGATGGCTGCTGTTAATGAAGATAAAAACAATTGGATTATCGACGATTCTGAAGAGAATAAAGTCGCTCAGTTTACCGGTATGAACAACGGTGTGCGTCGCGCGATTGTGGAGTTTGAGCCTGACGTAGAAGTCACCCAGGAGCAGGAAATTTTCTTGTCGTGGGTTGCTCGGAAAACTCTGGAATCCCGCATGTTGGGCTCCAGTTGGGGACTGACTCTGTTTTTGATCATTTTGACGCCAATCATTATTTTTCTCACTTTCAGCTAAAAGGACCATGCAATGGTAGACGCTCAGCGCCCCAAAGCAGGCATCTTCGGTAGCCACACAGAAGAAACATGGGTGTGGCTCGGTAATGAACTTTTCGACGAGTCCGGCGAGGTCATCGCCGACGTTCGCTCCGACGTCCTCTACGTGGATCGCGAACGACTACTCATCGAATCCACCCCCGGCACCATGCGTTTTCGTTGCCGCGCAACACTGTCCGGGGGTGAGGTCTATACAATGACTCAGAATTCTTTCACTGTGGGGGATCTCACTGCGGTGTGCGGGCGCCGGACGTATTCGCTAAAAAGGGTGTCGCCGTGGCGTAAAGAACGCCTGATCACCAACAATGGGGTGGAAGTGGCGCGACTTCGCCCGATGACCAGCGGTAAAGTCGAATTCATTGTGGGCACCGCGGACAGCGAGGCGTTGCCGTTCGTCGACGCAGTATTTTTGAGCTGGGCGTGCGTCCTGGTGGATTCGGCCGTGCGCCGGCCGAAAATTTAAAAGCTTTTTGCTTATCGACGCACCCCTCCACCTGTTTTTTGTAGCCGGGGGATCATTTCCTTTGAAGGATCCAATCTCCGCACTTAGTTTCCTTCGGTGTGAAGGAAAGAGTTCCGTAAAGACCTCTATCTCATTTAAAGAAGTGGAGGATTAGGGTCGTTGACTCGCCTTCGGCACTAATTTGAGCCAAGTTCAAGTTTGCTGCCATCCCAGGTGACCGAAAATGTCCTATGCGAGGTCTCTTCGGTCACTTGGTTTTGCTCGTTTCAGGCTAGAAGCGGCCTCCGCGGAACCCTCCTCCGCCACCGCCACCACCGCTGAAGCCGCCACCGCCTCCACCGAAGCCTCCACCGAAACCGCCACCGCGGCCGCTGTTGAGAATCGAGTTGATCACCATGCCGGTGACAATCGCACCGGTGGTTCCGCCACCGGAATTGTGGCGATTGTTGTAGGTGGTGATGTCGTTTTGTGCTGACTTGCTGGCGCGTTGGGCTGCGACTGCTGCTTGACGTCCGTAATCAATTCCTGCACGGGTGTCGCGGGTGCGGTTTTGTTGTGCCATGGCGTACAGTTTTTGTGCGTTGGCCAGGTGGGTGCGGGCTTCGGATTTTACGATGCGACCGCGGGTGGAGATGAGGTCTTCGGCCTTTTGGATTTGGCTTCTTGCAGATTGCAGCTGTTGGTCGAATACGCGTAGCTGGCGGGCTTGATCAGCTGCGGTGGCGCGAAGTGTGTCAAGTTGAGTGTCGAGGGCGGAGTCGACATCGACAAGTTCTGTGTAGGTTCCGAGCGGATCCTTTTCGGCGTCTGCTGATGCGGTGGTTAGTGCTGCGCTGGCTGCGCGGACAGCATCGTCGAGGGAGGCCCAGTCGGCACGGGCACCGTCGGCTCCTGCGCTTTGTTTGAGTTGGCCGGCTTCGTTGATTTCGTCTGAGATTTCTTGAATCAGATCGGCAACGTTTGCTTTGGCTGTGGAGATGTTTTCATCGGCATGCTCGACGCCCTCGAGGAGTTTGTCTGCGGTAGTGATGGCGTGCTCGATGTGACGGATCGCGTCGATAAGCCCGCCCTGCTCGCCTGCGGGCATGGACTCTATCTTGTACGCCTGTGGCAGGACTTCTTCTGCTTCGTCGAGCGAAGCGCTGGCGAGGTCGACGTTGTCGTCGATGCTTTCAAGGACCTCTGCTGAGTAGCGAGCGCGCAGGCCAGCGAGTGTTTCTTGAGCCTTGGGGAGGCGGGTGCGCAGGTCGACGGATTTTTGGGTGAGAGCATCCAATTTGCTGCCCGCGTTGATCAGCAGGTTGCGCATATCGGCAAAGTTTTGGGCCTCGGCGTCGAGGGCATCGTCGGCTTGGCCACAGGATGAAATGATTTCTACCAGCATGGATCGACGTTCGGCTTCGGATTCTGGGATAGAATCGTTGAGGCGCTGCTGAATCTCAAAGGCTTTTTGCAGGGTGCCGGTGGAGTGGTTCATGGCGCGGTTGAAGCTGCGGGTGCGCTCTGGTCCGAACTCGGAGGTAGCGATAGCGAGCTCTTCTTTTCCGCGACGGATGGAGTCATCAGTGGAGGTGAGCTCTTCTTGGGCAAGGTGTTCGAGAGTTTCCATGGGAAGCTGCATGAGGCGGTTGGTATCGCGAGGGTCGATCTCACGTGCATCTTCCAAGGTTGCAGCACTTGTTTTCTTCTTGCGGCTGCGGGAATAGGCCCAAATTCCGCCACCAGCGGCCACTGTGCCAACGCCCGCAGCAGCCAACCAAGCGCCGGAAGATCCAGAAGAGCCTGAGGTTCCTGAGGCACCAGAACTAGAACCAACTGATTCTGCCAGCGCTAGTGCGGAGCCTGCCCAATCTTCTTGGGAAAGCGCCTGGAAAGCAGCGTTGTTGGCGGCGTCGAGTTCAGCGTCGGTCCATTGAGTACCACCTTGGATGCCGTACTGCCGTTCCTCGGGAGCGAGTGCATAAACCAAGACGTTTCCGCCGCCGTTGGCTTGGAGTGCTTGCTGCGTCCACGTTTCAGGGTCAACTCCGTCGAAAGAGCTTAGGAAAACAACGAAAATAACCTTTTGTTCAGATGCCTTTACATCATCGATGGCAGCCTGAATGTTGGTGATATCGGACGAGGAAATCTGGCCGGTGTAGTCAGTGACATTGTCTTGGTAAAATTCTGGTGATTCAGCCAAGACATATGTTTCTGTGGCTTCTGCAGTGTGAGCAGTAAAAAATGGTCCACTGATAAGGAGCGCGCCAGCTCCAATTGCCACAGTGACCGATACACGGCGGACGTTTTCCCGAAGATGCACCAAACTAAAGTTCATGGTCCCCACCTTAGACGAGTCCAGCTGGCACACTAGTTAACGTGAGAAGATTTTTAGCCAAGAGTTTACTCTTAACCGCAGTAGCGCAACCAGCCCTGAGGGTGGTCGCGTATTCGATGCTCAGAACGCCTAATAATCGGCACAAAATTGATTCAATTTTGGTGTTGGGCACAGCTCAATATGATGGGGTTCCATCGAGGCAGTTTGCTGCTCGTTTGAGGCATGCCGCGAAGCTGTGGCGTCTTCATGAAATCCAGCATGTATATACTGTCGGCGGAAAACTTCCTGGTGATCGTTTCACCGAAGCAGAAGTCGCGCGGGAGTATTTGATCAAAGAGGGCGTGGATCCGGATCTGATTTTTGTCTCTGCAGTTGGCAATGACACTGTCTCCTCCTATGAGGCGCTTGATCCGGAAAAGCTTGGTCGGGTGCTGATTGTTACTGATCCGAACCATTCGTATCGGGCGGTGCGCATCGCGCGACGCATGGGCTTTGACGCGAAACCTTCCCCGACAACCTATAGTCCCGCGAAGTTTCCGTCGATAGTTTATTTTCTGACCTTGTCCCATGAGTGGGGCGGGGTAGTGGTACAGGACGTGTCGTGGCTCTTGGGCGAACGGGTGGCCGATAAGGTGGAAGCATCTTTGCGAACTATCCAAGGCCTGCTGCGCCCTTCGAGGCGTGCGCGCCATGAGCAACTTCGGAGGCTGAAAAAGTAGATGTACCCCTATTCCGACGCAGACGCTTTTCGACGCCACCCTGAGCGCGCCAAGTCCAGCCAACTGCGCACCAGCGCCGTAGACACCCGCAGCGCGTTCGCCCGCGACCGGGCTCGCGTGCTGCATTCTGCTGCTCTTCGACGCCTCGCGGATAAAACCCAAGTGGTTGGCCCCAATGATGGTGATACTCCGCGCACCCGGCTGACGCACTCTTTGGAAGTAGCTCAAATTGCACGGGGAATCGGAGCTGGACTGGATTTGGATCCTGATCTGTGCGATCTGGCAGGGCTGTGCCATGACATTGGGCATCCGCCGTATGGACACAACGGTGAAAACGCGTTGAATGAAGTTGCTGCGGCCTGTGGAGGATTTGAGGGCAACGCCCAAACCTTGCGTATTCTCACGCGGCTGGAGCCAAAAATTGTCTCTGATGAGGGGGAGAGCTTTGGGCTGAACTTGTCGCGGGCTGCTCTTGATGCTGCATGTAAGTATCCGTGGGCTAAAACAAATGCGGATGGCAGTGTCAATAAGAAATACAGTGCTTATGATGAGGACGCAGAAATTCTTGCTTGGATCAGGCAAGGCCATGAAGACCTCAGACCACCAATCGAAGCTCAGGTCATGGACTTTTCCGATGATATTGCCTACTCAGTACACGATGTAGAAGACGGCATTGTTTCCGGTCGCATCGATTTGAAAGTGCTGTGGGACCTGGTGGAATTAGCAGCACTGGCGGACAAAGGCGCAGCAGCTTTCGGAGGCTCGCCTGCAGAACTCATCGAGGGCGCAGCCTCGTTGCGGGAGCTTCCTGTGGTAGCGGCCGCTGCAGATTTTGATTTCTCACTGCGTTCCTACGCTGCGCTGAAGGCCATGACCTCAGAACTAGTGGGAAGATACGTTGGCTCTACCATCGAGTCAACAAAGAAAACACACGCCGGCATTGATGTGGGACGCATGCACGGCGATTTGATCATTCCAGAAACAGCGGCCAGTGAAGTAAAACTGCTCAAAACGTTAGCGGTTCTCTATGTGATGGATGACCCAGGGCACCTTGCGCGCCAAAACAGGCAACGGGATCGTATCTTCCGGGTTTTTGACTACCTGGTGCTGGGGGCTCCGGGATCGTTGGATCCGATGTATCGCCAGTGGTTTATTGAAGCGGATTCAGAATCGGAACAGATCCGTGTGATTGTTGATCAGATTGCGTCGATGACGGAGTCTCGTCTGGAACGCCTTGCCCGGAATGCTGCTGACATCTCAGGATTCTTGGGATAGTTGGTTAGAGCAGCAGCGATTTTTAGTAAGGCCAATAACATGTTTTGGCTTAAACCTGTGTCGTGTCAGATGGTGGCGAAGTAGAGTTCGCAAAGCTAGCGAACATGAATTCGTGTTCAGGAACTTAACAGGGATCAAACAGAGAACAGAGAACAGATCACGCTGCCCAAAAATCGCACTTTTAAGGTTTGTGGGCGTCTGTGTGTGGTTTGCCGCTGTAAAGTATCACCACGTTATGCGCCCTGGTGTGATCAAGCGTTCGTTCTGGGTCGAAACCCCAAAAGTCACAATTCCCCAGAAGCGGGTCAAACCCATTTAGCTTATTGCTTACATATCGAGGGTTTAGAAAAGTGATTTGTCGGATCAGTCGGTTTCTGCCAAGTAAATAGAACTTTATAAATTTTGTGGCTCTCAAATCTTAGGCCACGGCTTCCGATTTGAACCGGAGGTTCAAAAGGCTTATATAGACAAGATTCTGCATCGTCTCACGAGCCCCTCATTGCCTGACACGGTCAATCGTGTGGGAGGTACCAATCCGTGAGATTTCTGCCAACGAGCGATTCATTGGCCCCGCTGCAGAGCTGGCAGAACACGGACATAACCCAAATAATCTGAGGTCTGCCGTTTGCAGCAGCATTAGCGTTTGATGTGGAAGGTGATGCAGAGGCTGTTGATCTGCAAGCGCGTCTTTCCCAAGCACGGGGGAACCCTGAAGCATCGGATGCTCTAGTTGCTGAGCTGACTGGTGTTACTGCTAATCATCCGTTGGTCAGTGCTTGTCTGAAGTTTCCGCTCAATCCTAAGCTTCTCAAGATTTCGTAAAAAAGCTGCCAACTACCGTAAAACCGCACTACTAGAGGAGTGCGTTTTTCGTTCCTGAACACATTGCGTGCTGCAACTTAATTATGGTCCTCCCAGCTCAGTGTGCTGTGTGGATTGTTTATTCTCGTCCATTAAGTGATCGAGAAAAAGTTGTTGTAAAGTCATGCGCATGTGTGGAATTGTTGGATATATTGGCCAGGCGGGCGACTCCCGTGATTACTTTGCGCTTGACGTCGTTTTAGAAGGACTGCGCCGACTTGAATACCGCGGTTATGATTCCGCAGGTGTAGCTGTTCATGCGAACGGTGAAATCAGCTACCGAAAGAAGGCTGGAAAGGTAGCTGCGCTGGACGCTGAGATCGCTCGCGCTCCTTTGGCGGATTCCATTTTGGCTATTGGTCACACCCGGTGGGCAACTCACGGTGGACCAACCGATGCAAATGCACACCCCCATGTTGTTGATGGCGGCAAGTTAGCTGTCGTACACAACGGTATTATTGAAAACTTTGCAGAGCTGCGCGCAGAGCTTTCAGCTAAGGGCTACAACTTTGTTTCCGTTACTGACACTGAAGTTGCCGCCACATTGCTGGCAGAAATCTACAACACCCAGGCTAATGGCGATCTGACCAAGGCTATGCAGCTTACTGGTCAGCGTCTTGAGGGTGCGTTCACCCTGCTGGCTATCCATGCTGATCATGATGATCGTATTGTTGCAGCGCGCCGTAACTCTCCTTTGGTTATTGGCTTGGGTGAAGGCGAAAACTTCCTCGGCTCTGACGTTTCTGGCTTCATCGATTACACCCGCAAGGCTGTTGAGATGGGCAACGATCAGATTGTGACCATCACTGCGAACGACTACCAGATCACCAACTTCGATGGTTCTGAGGCAACCGGAAAACCTTTCGACGTGGAGTGGGATGCGGCTGCTGCTGAAAAGGGTGGCTTTGATTCCTTCATGGATAAGGAAATCCACGACCAGCCAGCTGCAGTGCGTGACACCCTCCTCGGACGTTTAGATGAGGATGGCAAGCTGGTCCTTGATGAGCTTCGT diff --git a/test/test_parsemarkers.py b/test/test_parsemarkers.py new file mode 100644 index 00000000..7dace124 --- /dev/null +++ b/test/test_parsemarkers.py @@ -0,0 +1,37 @@ +import unittest +import vamb +import testtools +from pathlib import Path +import tempfile +import shutil +import io + + +class TestParseMarkers(unittest.TestCase): + def test_instantiate(self): + tmp = tempfile.mkdtemp() + tmp_path = Path(tmp) + shutil.rmtree(tmp) + markers = vamb.parsemarkers.Markers.from_files( + Path(testtools.DATADIR).joinpath("marker.fna"), + Path(testtools.PARENTDIR).joinpath("vamb").joinpath("marker.hmm"), + ["abc"], + tmp_path, + 4, + None, + ) + self.assertIsNotNone(markers.markers[0]) + self.assertEqual(len(markers.markers), 1) + self.assertEqual(set(markers.markers[0]), {39}) + self.assertEqual( + markers.refhash, vamb.vambtools.RefHasher.hash_refnames(["abc"]) + ) + + buf = io.StringIO() + markers.save(buf) + buf.seek(0) + + markers2 = vamb.parsemarkers.Markers.load(buf, markers.refhash) + self.assertEqual(len(markers.markers), len(markers2.markers)) + self.assertEqual(set(markers.markers[0]), set(markers2.markers[0])) + self.assertEqual(markers.marker_names, markers2.marker_names)