diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 754f743..d1ef469 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -28,14 +28,37 @@ jobs: RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' - name: Gather coverage information (fa-compression) - id: coverage + id: coverage-fa-compression uses: actions-rs/grcov@v0.1 - name: Upload coverage reports to Codecov (fa-compression) uses: codecov/codecov-action@v4.0.1 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ${{ steps.coverage.outputs.report }} + file: ${{ steps.coverage-fa-compression.outputs.report }} flags: fa-compression verbose: true fail_ci_if_error: true + + - name: Run cargo test (sa-mappings) + uses: actions-rs/cargo@v1 + with: + command: test + args: --all-features --no-fail-fast -p sa-mappings + env: + CARGO_INCREMENTAL: 0 + RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' + RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' + + - name: Gather coverage information (sa-mappings) + id: coverage-sa-mappings + uses: actions-rs/grcov@v0.1 + + - name: Upload coverage reports to Codecov (sa-mappings) + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ${{ steps.coverage-sa-mappings.outputs.report }} + flags: sa-mappings + verbose: true + fail_ci_if_error: true diff --git a/Cargo.lock b/Cargo.lock index 7cda751..3ec5475 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -17,30 +32,121 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anstyle" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +[[package]] +name = "attohttpc" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe174d1b67f7b2bafed829c09db039301eb5841f66e43be2cf60b326e7f8e2cc" +dependencies = [ + "flate2", + "http", + "log", + "native-tls", + "openssl", + "serde", + "serde_json", + "url", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + [[package]] name = "bumpalo" version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +[[package]] +name = "bytelines" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1297656b3c221f5251560da47ce530d981345d3dabe822067c18ecb36e67aacb" +dependencies = [ + "futures-util", + "tokio", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cc" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" + [[package]] name = "cfg-if" version = "1.0.0" @@ -74,6 +180,21 @@ dependencies = [ "half", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "clap" version = "4.5.4" @@ -99,6 +220,31 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.5.1" @@ -108,7 +254,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap", + "clap 4.5.4", "criterion-plot", "is-terminal", "itertools", @@ -166,18 +312,145 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "either" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "backtrace", + "version_check", +] + [[package]] name = "fa-compression" version = "0.1.0" dependencies = [ "criterion", - "rand", + "rand 0.8.5", +] + +[[package]] +name = "fastrand" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fst" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927fb434ff9f0115b215dc0efd2e4fbdd7448522a92a1aa37c77d6a2f8f1ebd6" +dependencies = [ + "byteorder", + "memmap", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", ] [[package]] @@ -191,6 +464,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + [[package]] name = "half" version = "2.4.0" @@ -201,19 +480,58 @@ dependencies = [ "crunchy", ] +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "is-terminal" version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", "windows-sys", ] @@ -242,12 +560,24 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + [[package]] name = "log" version = "0.4.21" @@ -260,6 +590,43 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "memmap" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "num-traits" version = "0.2.18" @@ -269,6 +636,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.19.0" @@ -281,6 +657,83 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.58", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "plotters" version = "0.3.5" @@ -315,6 +768,30 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" version = "1.0.79" @@ -333,6 +810,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.8.5" @@ -341,7 +831,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -351,9 +841,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", ] +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.6.4" @@ -383,6 +888,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "regex" version = "1.10.4" @@ -412,12 +926,50 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "ryu" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "sa-mappings" +version = "0.1.0" +dependencies = [ + "bytelines", + "fa-compression", + "tempdir", + "umgap", +] + [[package]] name = "same-file" version = "1.0.6" @@ -427,6 +979,38 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "security-framework" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.197" @@ -444,7 +1028,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.58", ] [[package]] @@ -458,6 +1042,65 @@ dependencies = [ "serde", ] +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap 2.34.0", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "strum" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "530efb820d53b712f4e347916c5e7ed20deb76a4f0457943b3182fb889b06d2c" + +[[package]] +name = "strum_macros" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6e163a520367c465f59e0a61a23cfae3b10b6546d78b6f672a382be79f7110" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.58" @@ -469,6 +1112,37 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -479,12 +1153,115 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +dependencies = [ + "backtrace", + "bytes", + "pin-project-lite", +] + +[[package]] +name = "umgap" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db403119ca827e0e097aa3268283072a373e65733f0c9edcdec06ea163741421" +dependencies = [ + "attohttpc", + "clap 2.34.0", + "csv", + "error-chain", + "fst", + "lazy_static", + "ordered-float", + "rayon", + "regex", + "serde_json", + "structopt", + "strum", + "strum_macros", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "walkdir" version = "2.5.0" @@ -522,7 +1299,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -544,7 +1321,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 71ffef2..6c02f58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" -members = [ - "fa-compression" +members = [ + "fa-compression", + "sa-mappings" ] diff --git a/codecov.yml b/codecov.yml index ec42d7c..030fcbe 100644 --- a/codecov.yml +++ b/codecov.yml @@ -7,6 +7,10 @@ coverage: target: 90% flags: - fa-compression + sa-mappings: + target: 90% + flags: + - sa-mappings patch: default: target: 90% @@ -14,9 +18,17 @@ coverage: target: 90% flags: - fa-compression + sa-mappings: + target: 90% + flags: + - sa-mappings flags: fa-compression: paths: - fa-compression carryforward: true + sa-mappings: + paths: + - sa-mappings + carryforward: true diff --git a/sa-mappings/Cargo.toml b/sa-mappings/Cargo.toml new file mode 100644 index 0000000..f02934a --- /dev/null +++ b/sa-mappings/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "sa-mappings" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dev-dependencies] +tempdir = "0.3.7" + +[dependencies] +fa-compression = { path = "../fa-compression" } +bytelines = "2.5.0" +umgap = "1.1.0" diff --git a/sa-mappings/src/functionality.rs b/sa-mappings/src/functionality.rs new file mode 100644 index 0000000..b26152b --- /dev/null +++ b/sa-mappings/src/functionality.rs @@ -0,0 +1,59 @@ +//! This module contains the FunctionAggregator struct that is responsible for aggregating the +//! functional annotations of proteins. + +use crate::proteins::Protein; + +/// A struct that represents a function aggregator +pub struct FunctionAggregator {} + +impl FunctionAggregator { + /// Aggregates the functional annotations of proteins + /// + /// # Arguments + /// * `proteins` - A vector of proteins + /// + /// # Returns + /// + /// Returns a string containing the aggregated functional annotations + pub fn aggregate(&self, proteins: Vec) -> String { + proteins + .iter() + .map(|protein| protein.get_functional_annotations()) + .collect::>() + .join(";") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_aggregate() { + let proteins = vec![ + Protein { + uniprot_id: "uniprot1".to_string(), + sequence: (0, 3), + taxon_id: 1, + functional_annotations: vec![ + 0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27, + ] + }, + Protein { + uniprot_id: "uniprot2".to_string(), + sequence: (4, 3), + taxon_id: 2, + functional_annotations: vec![ + 0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27, + ] + }, + ]; + + let function_aggregator = FunctionAggregator {}; + + assert_eq!( + function_aggregator.aggregate(proteins), + "GO:0009279;IPR:IPR016364;IPR:IPR008816;GO:0009279;IPR:IPR016364;IPR:IPR008816" + ); + } +} diff --git a/sa-mappings/src/lib.rs b/sa-mappings/src/lib.rs new file mode 100644 index 0000000..6986c13 --- /dev/null +++ b/sa-mappings/src/lib.rs @@ -0,0 +1,8 @@ +//! This library provides functionality to map protein sequences to their respective taxonomic +//! identifiers and functional annotations. + +#![warn(missing_docs)] + +pub mod functionality; +pub mod proteins; +pub mod taxonomy; diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs new file mode 100644 index 0000000..a79dd3d --- /dev/null +++ b/sa-mappings/src/proteins.rs @@ -0,0 +1,335 @@ +//! This module contains the `Protein` and `Proteins` structs, which are used to represent proteins +//! and collections of proteins, respectively. + +use std::{ + error::Error, + fs::File, + io::BufReader, + ops::Index, + str::from_utf8 +}; + +use bytelines::ByteLines; +use fa_compression::algorithm1::decode; +use umgap::taxon::TaxonId; + +use crate::taxonomy::TaxonAggregator; + +/// The separation character used in the input string +pub static SEPARATION_CHARACTER: u8 = b'-'; + +/// The termination character used in the input string +/// This character should be smaller than the separation character +pub static TERMINATION_CHARACTER: u8 = b'$'; + +/// A struct that represents a protein and its linked information +pub struct Protein { + /// The id of the protein + pub uniprot_id: String, + + /// start position and length of the protein in the input string + pub sequence: (usize, u32), + + /// the taxon id of the protein + pub taxon_id: TaxonId, + + /// The encoded functional annotations of the protein + pub functional_annotations: Vec +} + +/// A struct that represents a collection of proteins +pub struct Proteins { + /// The input string containing all proteins + input_string: Vec, + + /// The proteins in the input string + proteins: Vec +} + +impl Protein { + /// Returns the decoded functional annotations of the protein + pub fn get_functional_annotations(&self) -> String { + decode(&self.functional_annotations) + } +} + +impl Proteins { + /// Creates a new `Proteins` struct from a database file and a `TaxonAggregator` + /// + /// # Arguments + /// * `file` - The path to the database file + /// * `taxon_aggregator` - The `TaxonAggregator` to use + /// + /// # Returns + /// + /// Returns a `Result` containing the `Proteins` struct + /// + /// # Errors + /// + /// Returns a `Box` if an error occurred while reading the database file + pub fn try_from_database_file( + file: &str, + taxon_aggregator: &TaxonAggregator + ) -> Result> { + let mut input_string: String = String::new(); + let mut proteins: Vec = Vec::new(); + + let file = File::open(file)?; + + let mut start_index = 0; + + // Read the lines as bytes, since the input string is not guaranteed to be utf8 + // because of the encoded functional annotations + let mut lines = ByteLines::new(BufReader::new(file)); + + while let Some(Ok(line)) = lines.next() { + let mut fields = line.split(|b| *b == b'\t'); + + // uniprot_id, taxon_id and sequence should always contain valid utf8 + let uniprot_id = from_utf8(fields.next().unwrap())?; + let taxon_id = from_utf8(fields.next().unwrap())?.parse::()?; + let sequence = from_utf8(fields.next().unwrap())?; + let functional_annotations: Vec = fields.next().unwrap().to_vec(); + + if !taxon_aggregator.taxon_exists(taxon_id) { + continue; + } + + input_string.push_str(&sequence.to_uppercase()); + input_string.push(SEPARATION_CHARACTER.into()); + + proteins.push(Protein { + uniprot_id: uniprot_id.to_string(), + sequence: (start_index, sequence.len() as u32), + taxon_id, + functional_annotations + }); + + start_index += sequence.len() + 1; + } + + input_string.pop(); + input_string.push(TERMINATION_CHARACTER.into()); + + Ok(Self { + input_string: input_string.into_bytes(), + proteins + }) + } + + /// Returns the sequence of a protein + /// + /// # Arguments + /// * `protein` - The protein to get the sequence from + /// + /// # Returns + /// + /// Returns a string slice containing the sequence of the protein + pub fn get_sequence(&self, protein: &Protein) -> &str { + let (start, length) = protein.sequence; + let end = start + length as usize; + + // unwrap should never fail since the input string will always be utf8 + std::str::from_utf8(&self.input_string[start .. end]).unwrap() + } +} + +impl Index for Proteins { + type Output = Protein; + + fn index(&self, index: usize) -> &Self::Output { + &self.proteins[index] + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs::File, + io::Write, + path::PathBuf + }; + + use fa_compression::algorithm1::decode; + use tempdir::TempDir; + + use super::*; + use crate::taxonomy::AggregationMethod; + + fn create_database_file(tmp_dir: &TempDir) -> PathBuf { + let database_file = tmp_dir.path().join("database.tsv"); + let mut file = File::create(&database_file).unwrap(); + + file.write("P12345\t1\tMLPGLALLLLAAWTARALEV\t".as_bytes()) + .unwrap(); + file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27]) + .unwrap(); + file.write("\n".as_bytes()).unwrap(); + file.write("P54321\t2\tPTDGNAGLLAEPQIAMFCGRLNMHMNVQNG\t".as_bytes()) + .unwrap(); + file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27]) + .unwrap(); + file.write("\n".as_bytes()).unwrap(); + file.write("P67890\t6\tKWDSDPSGTKTCIDT\t".as_bytes()) + .unwrap(); + file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27]) + .unwrap(); + file.write("\n".as_bytes()).unwrap(); + file.write("P13579\t17\tKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH\t".as_bytes()) + .unwrap(); + file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27]) + .unwrap(); + file.write("\n".as_bytes()).unwrap(); + + database_file + } + + fn create_taxonomy_file(tmp_dir: &TempDir) -> PathBuf { + let taxonomy_file = tmp_dir.path().join("taxonomy.tsv"); + let mut file = File::create(&taxonomy_file).unwrap(); + + writeln!(file, "1\troot\tno rank\t1\t\x01").unwrap(); + writeln!(file, "2\tBacteria\tsuperkingdom\t1\t\x01").unwrap(); + writeln!(file, "6\tAzorhizobium\tgenus\t1\t\x01").unwrap(); + writeln!(file, "7\tAzorhizobium caulinodans\tspecies\t6\t\x01").unwrap(); + writeln!(file, "9\tBuchnera aphidicola\tspecies\t6\t\x01").unwrap(); + writeln!(file, "10\tCellvibrio\tgenus\t6\t\x01").unwrap(); + writeln!(file, "11\tCellulomonas gilvus\tspecies\t10\t\x01").unwrap(); + writeln!(file, "13\tDictyoglomus\tgenus\t11\t\x01").unwrap(); + writeln!(file, "14\tDictyoglomus thermophilum\tspecies\t10\t\x01").unwrap(); + writeln!(file, "16\tMethylophilus\tgenus\t14\t\x01").unwrap(); + writeln!(file, "17\tMethylophilus methylotrophus\tspecies\t16\t\x01").unwrap(); + writeln!(file, "18\tPelobacter\tgenus\t17\t\x01").unwrap(); + writeln!(file, "19\tSyntrophotalea carbinolica\tspecies\t17\t\x01").unwrap(); + writeln!(file, "20\tPhenylobacterium\tgenus\t19\t\x01").unwrap(); + + taxonomy_file + } + + #[test] + fn test_new_protein() { + let protein = Protein { + uniprot_id: "P12345".to_string(), + sequence: (0, 3), + taxon_id: 1, + functional_annotations: vec![0xD1, 0x11] + }; + + assert_eq!(protein.uniprot_id, "P12345"); + assert_eq!(protein.sequence, (0, 3)); + assert_eq!(protein.taxon_id, 1); + assert_eq!(protein.functional_annotations, vec![0xD1, 0x11]); + } + + #[test] + fn test_new_proteins() { + let proteins = Proteins { + input_string: "MLPGLALLLLAAWTARALEV-PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG" + .as_bytes() + .to_vec(), + proteins: vec![ + Protein { + uniprot_id: "P12345".to_string(), + sequence: (0, 3), + taxon_id: 1, + functional_annotations: vec![0xD1, 0x11] + }, + Protein { + uniprot_id: "P54321".to_string(), + sequence: (4, 3), + taxon_id: 2, + functional_annotations: vec![0xD1, 0x11] + }, + ] + }; + + assert_eq!( + proteins.input_string, + "MLPGLALLLLAAWTARALEV-PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG".as_bytes() + ); + assert_eq!(proteins.proteins.len(), 2); + assert_eq!(proteins.proteins[0].uniprot_id, "P12345"); + assert_eq!(proteins.proteins[0].sequence, (0, 3)); + assert_eq!(proteins.proteins[0].taxon_id, 1); + assert_eq!(proteins.proteins[0].functional_annotations, vec![0xD1, 0x11]); + assert_eq!(proteins.proteins[1].uniprot_id, "P54321"); + assert_eq!(proteins.proteins[1].sequence, (4, 3)); + assert_eq!(proteins.proteins[1].taxon_id, 2); + assert_eq!(proteins.proteins[1].functional_annotations, vec![0xD1, 0x11]); + } + + #[test] + fn test_get_sequence() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_get_sequences").unwrap(); + + let database_file = create_database_file(&tmp_dir); + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + let proteins = + Proteins::try_from_database_file(database_file.to_str().unwrap(), &taxon_aggregator) + .unwrap(); + + //assert_eq!(proteins.proteins.len(), 4); + assert_eq!(proteins.get_sequence(&proteins[0]), "MLPGLALLLLAAWTARALEV"); + assert_eq!(proteins.get_sequence(&proteins[1]), "PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG"); + assert_eq!(proteins.get_sequence(&proteins[2]), "KWDSDPSGTKTCIDT"); + assert_eq!( + proteins.get_sequence(&proteins[3]), + "KEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH" + ); + } + + #[test] + fn test_get_taxon() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_get_taxon").unwrap(); + + let database_file = create_database_file(&tmp_dir); + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + let proteins = + Proteins::try_from_database_file(database_file.to_str().unwrap(), &taxon_aggregator) + .unwrap(); + + let taxa = vec![1, 2, 6, 17]; + for (i, protein) in proteins.proteins.iter().enumerate() { + assert_eq!(protein.taxon_id, taxa[i]); + } + } + + #[test] + fn test_get_functional_annotations() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_get_fa").unwrap(); + + let database_file = create_database_file(&tmp_dir); + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + let proteins = + Proteins::try_from_database_file(database_file.to_str().unwrap(), &taxon_aggregator) + .unwrap(); + + for protein in proteins.proteins.iter() { + assert_eq!( + decode(&protein.functional_annotations), + "GO:0009279;IPR:IPR016364;IPR:IPR008816" + ); + } + } +} diff --git a/sa-mappings/src/taxonomy.rs b/sa-mappings/src/taxonomy.rs new file mode 100644 index 0000000..ada93ff --- /dev/null +++ b/sa-mappings/src/taxonomy.rs @@ -0,0 +1,254 @@ +//! This module provides a `TaxonAggregator` struct that is used to aggregate taxonomic information. +//! It uses a taxonomy file to create a taxonomic tree and performs aggregation using different +//! methods. + +use std::error::Error; + +use umgap::{ + agg::{ + count, + MultiThreadSafeAggregator + }, + rmq::{ + lca::LCACalculator, + mix::MixCalculator + }, + taxon::{ + read_taxa_file, + TaxonId, + TaxonList, + TaxonTree + } +}; + +/// A struct that represents a taxon aggregator. +pub struct TaxonAggregator { + /// A vector that contains the snapped taxon IDs. + snapping: Vec>, + + /// The aggregator used to aggregate taxon IDs. + aggregator: Box, + + /// The taxon list. + taxon_list: TaxonList +} + +/// An enum that specifies the aggregation method to use. +pub enum AggregationMethod { + /// The Lowest Common Ancestor (LCA) aggregation method. + Lca, + + /// The LCA* aggregation method. + LcaStar +} + +impl TaxonAggregator { + /// Creates a new `TaxonAggregator` from a taxonomy file and an aggregation method. + /// + /// # Arguments + /// + /// * `file` - A string slice that represents the path to the taxonomy file. + /// * `method` - An `AggregationMethod` enum that specifies the aggregation method to use. + /// + /// # Returns + /// + /// Returns a `Result` containing the `TaxonAggregator` + /// + /// # Errors + /// + /// Returns a `Box` if an error occurred while reading the taxonomy file. + pub fn try_from_taxonomy_file( + file: &str, + method: AggregationMethod + ) -> Result> { + let taxons = read_taxa_file(file)?; + let taxon_tree = TaxonTree::new(&taxons); + let taxon_list = TaxonList::new(taxons); + let snapping = taxon_tree.snapping(&taxon_list, true); + + let aggregator: Box = match method { + AggregationMethod::Lca => Box::new(MixCalculator::new(taxon_tree, 1.0)), + AggregationMethod::LcaStar => Box::new(LCACalculator::new(taxon_tree)) + }; + + Ok(Self { + snapping, + aggregator, + taxon_list + }) + } + + /// Checks if a taxon exists in the taxon list. + /// + /// # Arguments + /// + /// * `taxon` - The taxon ID to check. + /// + /// # Returns + /// + /// Returns a boolean value indicating whether the taxon exists in the taxon list. + pub fn taxon_exists(&self, taxon: TaxonId) -> bool { + self.taxon_list.get(taxon).is_some() + } + + /// Snaps a taxon to its closest ancestor in the taxonomic tree. + /// + /// # Arguments + /// + /// * `taxon` - The taxon ID to snap. + /// + /// # Returns + /// + /// Returns the snapped taxon ID, or panics if the taxon cannot be snapped. + pub fn snap_taxon(&self, taxon: TaxonId) -> TaxonId { + self.snapping[taxon].unwrap_or_else(|| panic!("Could not snap taxon with id {taxon}")) + } + + /// Aggregates a list of taxon IDs using the specified aggregation method. + /// + /// # Arguments + /// + /// * `taxa` - A vector of taxon IDs to aggregate. + /// + /// # Returns + /// + /// Returns the aggregated taxon ID, or panics if aggregation fails. + pub fn aggregate(&self, taxa: Vec) -> TaxonId { + let count = count(taxa.into_iter().map(|t| (t, 1.0))); + self.aggregator + .aggregate(&count) + .unwrap_or_else(|_| panic!("Could not aggregate following taxon ids: {:?}", &count)) + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs::File, + io::Write, + path::PathBuf + }; + + use tempdir::TempDir; + + use super::*; + + fn create_taxonomy_file(tmp_dir: &TempDir) -> PathBuf { + let taxonomy_file = tmp_dir.path().join("taxonomy.tsv"); + let mut file = File::create(&taxonomy_file).unwrap(); + + writeln!(file, "1\troot\tno rank\t1\t\x01").unwrap(); + writeln!(file, "2\tBacteria\tsuperkingdom\t1\t\x01").unwrap(); + writeln!(file, "6\tAzorhizobium\tgenus\t1\t\x01").unwrap(); + writeln!(file, "7\tAzorhizobium caulinodans\tspecies\t6\t\x01").unwrap(); + writeln!(file, "9\tBuchnera aphidicola\tspecies\t6\t\x01").unwrap(); + writeln!(file, "10\tCellvibrio\tgenus\t6\t\x01").unwrap(); + writeln!(file, "11\tCellulomonas gilvus\tspecies\t10\t\x01").unwrap(); + writeln!(file, "13\tDictyoglomus\tgenus\t11\t\x01").unwrap(); + writeln!(file, "14\tDictyoglomus thermophilum\tspecies\t10\t\x01").unwrap(); + writeln!(file, "16\tMethylophilus\tgenus\t14\t\x01").unwrap(); + writeln!(file, "17\tMethylophilus methylotrophus\tspecies\t16\t\x01").unwrap(); + writeln!(file, "18\tPelobacter\tgenus\t17\t\x01").unwrap(); + writeln!(file, "19\tSyntrophotalea carbinolica\tspecies\t17\t\x01").unwrap(); + writeln!(file, "20\tPhenylobacterium\tgenus\t19\t\x01").unwrap(); + + taxonomy_file + } + + #[test] + fn test_try_from_taxonomy_file() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap(); + + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let _ = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + let _ = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::LcaStar + ) + .unwrap(); + } + + #[test] + fn test_taxon_exists() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_taxon_exists").unwrap(); + + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + + for i in 0 ..= 20 { + if [0, 3, 4, 5, 8, 12, 15].contains(&i) { + assert!(!taxon_aggregator.taxon_exists(i)); + } else { + assert!(taxon_aggregator.taxon_exists(i)); + } + } + } + + #[test] + fn test_snap_taxon() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_snap_taxon").unwrap(); + + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + + for i in 0 ..= 20 { + if ![0, 3, 4, 5, 8, 12, 15].contains(&i) { + assert_eq!(taxon_aggregator.snap_taxon(i), i); + } + } + } + + #[test] + fn test_aggregate_lca() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_aggregate").unwrap(); + + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::Lca + ) + .unwrap(); + + assert_eq!(taxon_aggregator.aggregate(vec![7, 9]), 6); + assert_eq!(taxon_aggregator.aggregate(vec![11, 14]), 10); + assert_eq!(taxon_aggregator.aggregate(vec![17, 19]), 17); + } + + #[test] + fn test_aggregate_lca_star() { + // Create a temporary directory for this test + let tmp_dir = TempDir::new("test_aggregate").unwrap(); + + let taxonomy_file = create_taxonomy_file(&tmp_dir); + + let taxon_aggregator = TaxonAggregator::try_from_taxonomy_file( + taxonomy_file.to_str().unwrap(), + AggregationMethod::LcaStar + ) + .unwrap(); + + assert_eq!(taxon_aggregator.aggregate(vec![7, 9]), 6); + assert_eq!(taxon_aggregator.aggregate(vec![11, 14]), 10); + assert_eq!(taxon_aggregator.aggregate(vec![17, 19]), 19); + } +}