From 248b19c26fbe923421f9d400afdf6adfcca483af Mon Sep 17 00:00:00 2001
From: Tibo Vande Moortele <34175340+tibvdm@users.noreply.github.com>
Date: Wed, 5 Jun 2024 16:52:47 +0200
Subject: [PATCH] Add SA compression, fix some tests and refactor some code

* bitarray compression

* add coverage for bitarray and small README

* document library

* cargo formatting + better panic message in testing

* add is_empty method for clippy

* cargo formatting

* function to compress data without having to store everything in memory

* compressed integration + some major refactoring

* dynamic resuired bits per value

* update tests and documentation

* update coverage config

* fmt + clippy

* format --all

* improve testing

* formatting

* fix bitarray tests

* some additional smaller tests

* update codecov config

* ignore main files when covering code

* test failing reader and writer in test module

* small cleanup

* small cleanup

* some debug information

* allow preliminary enzyme numbers (x.x.x.nx)

* encode functional annotations on load

* more debug info + some movement of SA code
---
 .github/workflows/coverage.yml          | 105 +++++-
 .gitignore                              |   3 +
 Cargo.lock                              |  18 +-
 Cargo.toml                              |   4 +-
 README.md                               |  10 +-
 bitarray/Cargo.toml                     |   8 +
 bitarray/README.md                      |  21 ++
 bitarray/src/binary.rs                  | 203 ++++++++++++
 bitarray/src/lib.rs                     | 410 +++++++++++++++++++++++
 codecov.yml                             |  42 ++-
 fa-compression/src/algorithm1/decode.rs |  18 +-
 fa-compression/src/algorithm1/encode.rs |  16 +-
 fa-compression/src/algorithm1/mod.rs    |  24 +-
 sa-builder/Cargo.toml                   |   2 +
 sa-builder/README.md                    |  28 ++
 sa-builder/src/binary.rs                | 149 ---------
 sa-builder/src/lib.rs                   | 198 +++++++++--
 sa-builder/src/main.rs                  | 102 ++++--
 sa-compression/Cargo.toml               |  10 +
 sa-compression/src/lib.rs               | 271 +++++++++++++++
 sa-index/Cargo.toml                     |   3 +-
 sa-index/src/binary.rs                  | 416 ++++++++++++++++++++++++
 sa-index/src/lib.rs                     | 364 +++++++++------------
 sa-index/src/main.rs                    |  13 -
 sa-index/src/peptide_search.rs          |  72 +++-
 sa-index/src/sa_searcher.rs             | 290 ++++++++++-------
 sa-index/src/suffix_to_protein_index.rs |  16 +-
 sa-index/src/util.rs                    |  63 ----
 sa-mappings/src/functionality.rs        |  76 ++++-
 sa-mappings/src/proteins.rs             |  40 ++-
 sa-mappings/src/taxonomy.rs             |  72 +++-
 sa-server/Cargo.toml                    |   3 +-
 sa-server/src/main.rs                   |  59 +++-
 33 files changed, 2381 insertions(+), 748 deletions(-)
 create mode 100644 bitarray/Cargo.toml
 create mode 100644 bitarray/README.md
 create mode 100644 bitarray/src/binary.rs
 create mode 100644 bitarray/src/lib.rs
 create mode 100644 sa-builder/README.md
 delete mode 100644 sa-builder/src/binary.rs
 create mode 100644 sa-compression/Cargo.toml
 create mode 100644 sa-compression/src/lib.rs
 create mode 100644 sa-index/src/binary.rs
 delete mode 100644 sa-index/src/main.rs
 delete mode 100644 sa-index/src/util.rs

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index bf98de4..f015f40 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -19,6 +19,33 @@ jobs:
           toolchain: nightly
           override: true
 
+
+
+      - name: Run cargo test (bitarray)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --all-features --no-fail-fast -p bitarray
+        env:
+          CARGO_INCREMENTAL: 0
+          RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+          RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+
+      - name: Gather coverage information (bitarray)
+        id: coverage-bitarray
+        uses: actions-rs/grcov@v0.1
+      
+      - name: Upload coverage reports to Codecov (bitarray)
+        uses: codecov/codecov-action@v4.0.1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ${{ steps.coverage-bitarray.outputs.report }}
+          flags: bitarray
+          verbose: true
+          fail_ci_if_error: true
+
+
+
       - name: Run cargo test (fa-compression)
         uses: actions-rs/cargo@v1
         with:
@@ -42,6 +69,82 @@ jobs:
           verbose: true
           fail_ci_if_error: true
 
+
+
+      - name: Run cargo test (sa-builder)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --all-features --no-fail-fast -p sa-builder
+        env:
+          CARGO_INCREMENTAL: 0
+          RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+          RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+
+      - name: Gather coverage information (sa-builder)
+        id: coverage-sa-builder
+        uses: actions-rs/grcov@v0.1
+      
+      - name: Upload coverage reports to Codecov (sa-builder)
+        uses: codecov/codecov-action@v4.0.1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ${{ steps.coverage-sa-builder.outputs.report }}
+          flags: sa-builder
+          verbose: true
+          fail_ci_if_error: true
+
+
+
+      - name: Run cargo test (sa-compression)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --all-features --no-fail-fast -p sa-compression
+        env:
+          CARGO_INCREMENTAL: 0
+          RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+          RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+
+      - name: Gather coverage information (sa-compression)
+        id: coverage-sa-compression
+        uses: actions-rs/grcov@v0.1
+      
+      - name: Upload coverage reports to Codecov (sa-compression)
+        uses: codecov/codecov-action@v4.0.1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ${{ steps.coverage-sa-compression.outputs.report }}
+          flags: sa-compression
+          verbose: true
+          fail_ci_if_error: true
+
+
+
+      - name: Run cargo test (sa-index)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --all-features --no-fail-fast -p sa-index
+        env:
+          CARGO_INCREMENTAL: 0
+          RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+          RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+
+      - name: Gather coverage information (sa-index)
+        id: coverage-sa-index
+        uses: actions-rs/grcov@v0.1
+      
+      - name: Upload coverage reports to Codecov (sa-index)
+        uses: codecov/codecov-action@v4.0.1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ${{ steps.coverage-sa-index.outputs.report }}
+          flags: sa-index
+          verbose: true
+          fail_ci_if_error: true
+
+
       - name: Run cargo test (sa-mappings)
         uses: actions-rs/cargo@v1
         with:
@@ -63,4 +166,4 @@ jobs:
           file: ${{ steps.coverage-sa-mappings.outputs.report }}
           flags: sa-mappings
           verbose: true
-          fail_ci_if_error: true
+          fail_ci_if_error: true
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 2f7896d..f534053 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
 target/
+data/
+
+.DS_Store
diff --git a/Cargo.lock b/Cargo.lock
index d5ed545..900c218 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -240,6 +240,10 @@ dependencies = [
  "which",
 ]
 
+[[package]]
+name = "bitarray"
+version = "0.1.0"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -1408,16 +1412,27 @@ dependencies = [
  "clap 4.5.4",
  "libdivsufsort-rs",
  "libsais64-rs",
+ "sa-compression",
+ "sa-index",
  "sa-mappings",
 ]
 
+[[package]]
+name = "sa-compression"
+version = "0.1.0"
+dependencies = [
+ "bitarray",
+ "sa-index",
+]
+
 [[package]]
 name = "sa-index"
 version = "0.1.0"
 dependencies = [
+ "bitarray",
  "clap 4.5.4",
+ "fa-compression",
  "rayon",
- "sa-builder",
  "sa-mappings",
  "serde",
  "serde_json",
@@ -1444,6 +1459,7 @@ dependencies = [
  "axum",
  "clap 4.5.4",
  "sa-builder",
+ "sa-compression",
  "sa-index",
  "sa-mappings",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 728b06e..fec447c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,10 @@
 [workspace]
 resolver = "2"
 
-members = [
+members = [ "bitarray",
     "fa-compression",
     "libsais64-rs",
-    "sa-builder",
+    "sa-builder", "sa-compression",
     "sa-index",
     "sa-mappings",
     "sa-server"
diff --git a/README.md b/README.md
index 9a4607c..15f8897 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,20 @@
 
 ![Codecov](https://img.shields.io/codecov/c/github/unipept/unipept-index?token=IZ75A2FY98&logo=Codecov)
 
-The unipept index written entirely in `Rust`. This repository consists of multiple different Rust projects that depend on 
-each other. More information about each project can be found in their respective `README.md` file.
+The unipept index written entirely in `Rust`. This repository consists of multiple different Rust projects that depend on each other. More information about each project can be found in their respective `README.md` file.
 
 ## Installation
 
-Clone this repository with the following command:
+> [!NOTE]  
+> To build and use the Unipept Index, you need to have Rust installed. If you don't have Rust installed, you can get it from [rust-lang.org](https://www.rust-lang.org/).
+
+Clone this repository by executing the following command:
 
 ```bash
 git clone https://github.com/unipept/unipept-index.git
 ```
 
-And build the projects using:
+Next, build everything by executing the following command in the root of the repository.
 
 ```bash
 cargo build --release
diff --git a/bitarray/Cargo.toml b/bitarray/Cargo.toml
new file mode 100644
index 0000000..8176d57
--- /dev/null
+++ b/bitarray/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "bitarray"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/bitarray/README.md b/bitarray/README.md
new file mode 100644
index 0000000..074f7c5
--- /dev/null
+++ b/bitarray/README.md
@@ -0,0 +1,21 @@
+# Bitarray
+
+![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/unipept/unipept-index/test.yml?logo=github)
+![Codecov](https://img.shields.io/codecov/c/github/unipept/unipept-index?token=IZ75A2FY98&flag=bitarray&logo=codecov)
+![Static Badge](https://img.shields.io/badge/doc-rustdoc-blue)
+
+The `bitarray` offers a special array where each item is represented by a specified amount of bits (smaller than 64). The bitarray uses a pre-alocated vector and allows you to `set` or `get` a value from the array.
+
+## Example
+
+```rust
+use bitarray;
+
+fn main() {
+    let bitarray = BitArray::<40>::with_capacity(4);
+
+    bitarray.set(0, 0b0001110011111010110001000111111100110010);
+
+    assert_eq!(bitarray.get(0), 0b0001110011111010110001000111111100110010);
+}
+```
diff --git a/bitarray/src/binary.rs b/bitarray/src/binary.rs
new file mode 100644
index 0000000..2609c3b
--- /dev/null
+++ b/bitarray/src/binary.rs
@@ -0,0 +1,203 @@
+//! This module provides utilities for reading and writing the bitarray as binary.
+
+use std::io::{
+    BufRead,
+    Read,
+    Result,
+    Write
+};
+
+use crate::BitArray;
+
+/// The `Binary` trait provides methods for reading and writing a struct as binary.
+pub trait Binary {
+    /// Writes the struct as binary to the given writer.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer` - The writer to write the binary data to.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the write operation is successful, or an `Err` if an error occurs.
+    fn write_binary<W: Write>(&self, writer: &mut W) -> Result<()>;
+
+    /// Reads binary data into a struct from the given reader.
+    ///
+    /// # Arguments
+    ///
+    /// * `reader` - The reader to read the binary data from.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the read operation is successful, or an `Err` if an error occurs.
+    fn read_binary<R: BufRead>(&mut self, reader: R) -> Result<()>;
+}
+
+/// Implementation of the `Binary` trait for the `BitArray` struct.
+impl Binary for BitArray {
+    /// Writes the binary representation of the `BitArray` to the given writer.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer` - The writer to which the binary data will be written.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if there was a problem writing to the writer.
+    fn write_binary<W: Write>(&self, writer: &mut W) -> Result<()> {
+        for value in self.data.iter() {
+            writer.write_all(&value.to_le_bytes())?;
+        }
+
+        Ok(())
+    }
+
+    /// Reads the binary representation of the `BitArray` from the given reader.
+    ///
+    /// # Arguments
+    ///
+    /// * `reader` - The reader from which the binary data will be read.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if there was a problem reading from the reader.
+    fn read_binary<R: BufRead>(&mut self, mut reader: R) -> Result<()> {
+        self.data.clear();
+
+        let mut buffer = vec![0; 8 * 1024];
+
+        loop {
+            let (finished, bytes_read) = fill_buffer(&mut reader, &mut buffer)?;
+            for buffer_slice in buffer[.. bytes_read].chunks_exact(8) {
+                self.data
+                    .push(u64::from_le_bytes(buffer_slice.try_into().unwrap()));
+            }
+
+            if finished {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Fills the buffer with data read from the input.
+///
+/// # Arguments
+///
+/// * `input` - The input source to read data from.
+/// * `buffer` - The buffer to fill with data.
+///
+/// # Returns
+///
+/// Returns a tuple `(finished, bytes_read)` where `finished` indicates whether the end of the input
+/// is reached, and `bytes_read` is the number of bytes read into the buffer.
+fn fill_buffer<T: Read>(input: &mut T, buffer: &mut Vec<u8>) -> std::io::Result<(bool, usize)> {
+    // Store the buffer size in advance, because rust will complain
+    // about the buffer being borrowed mutably while it's borrowed
+    let buffer_size = buffer.len();
+
+    let mut writable_buffer_space = buffer.as_mut();
+
+    loop {
+        match input.read(writable_buffer_space) {
+            // No bytes written, which means we've completely filled the buffer
+            // or we've reached the end of the file
+            Ok(0) => {
+                return Ok((
+                    !writable_buffer_space.is_empty(),
+                    buffer_size - writable_buffer_space.len()
+                ));
+            }
+
+            // We've read {bytes_read} bytes
+            Ok(bytes_read) => {
+                // Shrink the writable buffer slice
+                writable_buffer_space = writable_buffer_space[bytes_read ..].as_mut();
+            }
+
+            // An error occurred while reading
+            Err(e) => {
+                return Err(e);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    pub struct ErrorInput;
+
+    impl Read for ErrorInput {
+        fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
+            Err(std::io::Error::new(std::io::ErrorKind::Other, "read error"))
+        }
+    }
+
+    #[test]
+    fn test_fill_buffer() {
+        let input_str = "a".repeat(8_000);
+        let mut input = input_str.as_bytes();
+
+        let mut buffer = vec![0; 800];
+
+        loop {
+            let (finished, bytes_read) = fill_buffer(&mut input, &mut buffer).unwrap();
+
+            if finished {
+                assert!(bytes_read < 800);
+                break;
+            } else {
+                assert_eq!(bytes_read, 800);
+            }
+        }
+    }
+
+    #[test]
+    fn test_fill_buffer_read_error() {
+        let mut input = ErrorInput;
+        let mut buffer = vec![0; 800];
+
+        assert!(fill_buffer(&mut input, &mut buffer).is_err());
+    }
+
+    #[test]
+    fn test_write_binary() {
+        let mut bitarray = BitArray::with_capacity(4, 40);
+        bitarray.set(0, 0x1234567890);
+        bitarray.set(1, 0xabcdef0123);
+        bitarray.set(2, 0x4567890abc);
+        bitarray.set(3, 0xdef0123456);
+
+        let mut buffer = Vec::new();
+        bitarray.write_binary(&mut buffer).unwrap();
+
+        assert_eq!(
+            buffer,
+            vec![
+                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
+                0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+            ]
+        );
+    }
+
+    #[test]
+    fn test_read_binary() {
+        let buffer = vec![
+            0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
+            0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0,
+        ];
+
+        let mut bitarray = BitArray::with_capacity(4, 40);
+        bitarray.read_binary(&buffer[..]).unwrap();
+
+        assert_eq!(bitarray.get(0), 0x1234567890);
+        assert_eq!(bitarray.get(1), 0xabcdef0123);
+        assert_eq!(bitarray.get(2), 0x4567890abc);
+        assert_eq!(bitarray.get(3), 0xdef0123456);
+    }
+}
diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
new file mode 100644
index 0000000..0a5f647
--- /dev/null
+++ b/bitarray/src/lib.rs
@@ -0,0 +1,410 @@
+//! This module contains the `BitArray` struct and its associated methods.
+
+mod binary;
+
+use std::{
+    cmp::max,
+    io::{
+        Result,
+        Write
+    }
+};
+
+/// Re-export the `Binary` trait.
+pub use binary::Binary;
+
+/// A fixed-size bit array implementation.
+pub struct BitArray {
+    /// The underlying data storage for the bit array.
+    data:           Vec<u64>,
+    /// The mask used to extract the relevant bits from each element in the data vector.
+    mask:           u64,
+    /// The length of the bit array.
+    len:            usize,
+    /// The number of bits in a single element of the data vector.
+    bits_per_value: usize
+}
+
+impl BitArray {
+    /// Creates a new `BitArray` with the specified capacity.
+    ///
+    /// # Arguments
+    ///
+    /// * `capacity` - The number of bits the `BitArray` can hold.
+    /// * `bits_per_value` - The number of bits in a single value.
+    ///
+    /// # Returns
+    ///
+    /// A new `BitArray` with the specified capacity.
+    pub fn with_capacity(capacity: usize, bits_per_value: usize) -> Self {
+        let extra = if capacity * bits_per_value % 64 == 0 {
+            0
+        } else {
+            1
+        };
+        Self {
+            data: vec![0; capacity * bits_per_value / 64 + extra],
+            mask: (1 << bits_per_value) - 1,
+            len: capacity,
+            bits_per_value
+        }
+    }
+
+    /// Retrieves the value at the specified index in the `BitArray`.
+    ///
+    /// # Arguments
+    ///
+    /// * `index` - The index of the value to retrieve.
+    ///
+    /// # Returns
+    ///
+    /// The value at the specified index.
+    pub fn get(&self, index: usize) -> u64 {
+        let start_block = index * self.bits_per_value / 64;
+        let start_block_offset = index * self.bits_per_value % 64;
+
+        // If the value is contained within a single block
+        if start_block_offset + self.bits_per_value <= 64 {
+            // Shift the value to the right so that the relevant bits are in the least significant
+            // position Then mask out the irrelevant bits
+            return self.data[start_block] >> (64 - start_block_offset - self.bits_per_value)
+                & self.mask;
+        }
+
+        let end_block = (index + 1) * self.bits_per_value / 64;
+        let end_block_offset = (index + 1) * self.bits_per_value % 64;
+
+        // Extract the relevant bits from the start block and shift them {end_block_offset} bits to
+        // the left
+        let a = self.data[start_block] << end_block_offset;
+
+        // Extract the relevant bits from the end block and shift them to the least significant
+        // position
+        let b = self.data[end_block] >> (64 - end_block_offset);
+
+        // Paste the two values together and mask out the irrelevant bits
+        (a | b) & self.mask
+    }
+
+    /// Sets the value at the specified index in the `BitArray`.
+    ///
+    /// # Arguments
+    ///
+    /// * `index` - The index of the value to set.
+    /// * `value` - The value to set at the specified index.
+    pub fn set(&mut self, index: usize, value: u64) {
+        let start_block = index * self.bits_per_value / 64;
+        let start_block_offset = index * self.bits_per_value % 64;
+
+        // If the value is contained within a single block
+        if start_block_offset + self.bits_per_value <= 64 {
+            // Clear the relevant bits in the start block
+            self.data[start_block] &=
+                !(self.mask << (64 - start_block_offset - self.bits_per_value));
+            // Set the relevant bits in the start block
+            self.data[start_block] |= value << (64 - start_block_offset - self.bits_per_value);
+            return;
+        }
+
+        let end_block = (index + 1) * self.bits_per_value / 64;
+        let end_block_offset = (index + 1) * self.bits_per_value % 64;
+
+        // Clear the relevant bits in the start block
+        self.data[start_block] &= !(self.mask >> start_block_offset);
+        // Set the relevant bits in the start block
+        self.data[start_block] |= value >> end_block_offset;
+
+        // Clear the relevant bits in the end block
+        self.data[end_block] &= !(self.mask << (64 - end_block_offset));
+        // Set the relevant bits in the end block
+        self.data[end_block] |= value << (64 - end_block_offset);
+    }
+
+    /// Returns the number of bits in a single value.
+    ///
+    /// # Returns
+    ///
+    /// The number of bits in a single value.
+    pub fn bits_per_value(&self) -> usize {
+        self.bits_per_value
+    }
+
+    /// Returns the length of the `BitArray`.
+    ///
+    /// # Returns
+    ///
+    /// The length of the `BitArray`.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Checks if the `BitArray` is empty.
+    ///
+    /// # Returns
+    ///
+    /// `true` if the `BitArray` is empty, `false` otherwise.
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Clears the `BitArray`, setting all bits to 0.
+    pub fn clear(&mut self) {
+        self.data.iter_mut().for_each(|x| *x = 0);
+    }
+}
+
+/// Writes the data to a writer in a binary format using a bit array. This function is helpfull
+/// when writing large amounts of data to a writer in chunks. The data is written in chunks of the
+/// specified capacity, so memory usage is minimized.
+///
+/// # Arguments
+///
+/// * `data` - The data to write.
+/// * `bits_per_value` - The number of bits in a single value.
+/// * `max_capacity` - The maximum amount of elements that may be stored in the bit array.
+/// * `writer` - The writer to write the data to.
+///
+/// # Returns
+///
+/// A `Result` indicating whether the write operation was successful or not.
+pub fn data_to_writer(
+    data: Vec<i64>,
+    bits_per_value: usize,
+    max_capacity: usize,
+    writer: &mut impl Write
+) -> Result<()> {
+    // Update the max capacity to be a multiple of the greatest common divisor of the bits per value
+    // and 64. This is done to ensure that the bit array can store the data entirely
+    let greates_common_divisor = gcd(bits_per_value, 64);
+    let capacity =
+        max(greates_common_divisor, max_capacity / greates_common_divisor * greates_common_divisor);
+
+    // If amount of data is less than the max capacity, write the data to the writer in a single
+    // chunk
+    if data.len() <= capacity {
+        let mut bitarray = BitArray::with_capacity(data.len(), bits_per_value);
+
+        for (i, &value) in data.iter().enumerate() {
+            bitarray.set(i, value as u64);
+        }
+        bitarray.write_binary(writer)?;
+
+        return Ok(());
+    }
+
+    // Create a bit array that can store a single chunk of data
+    let mut bitarray = BitArray::with_capacity(capacity, bits_per_value);
+
+    // Write the data to the writer in chunks of the specified capacity
+    let chunks = data.chunks_exact(capacity);
+
+    // Store the remainder before looping over the chunks
+    let remainder = chunks.remainder();
+
+    for chunk in chunks {
+        for (i, &value) in chunk.iter().enumerate() {
+            bitarray.set(i, value as u64);
+        }
+        bitarray.write_binary(writer)?;
+        bitarray.clear();
+    }
+
+    // Create a new bit array with the remainder capacity
+    bitarray = BitArray::with_capacity(remainder.len(), bits_per_value);
+
+    for (i, &value) in remainder.iter().enumerate() {
+        bitarray.set(i, value as u64);
+    }
+    bitarray.write_binary(writer)?;
+
+    Ok(())
+}
+
+/// Calculates the greatest common divisor of two numbers.
+///
+/// # Arguments
+///
+/// * `a` - The first number.
+/// * `b` - The second number.
+///
+/// # Returns
+///
+/// The greatest common divisor of the two numbers.
+fn gcd(mut a: usize, mut b: usize) -> usize {
+    while b != 0 {
+        if b < a {
+            std::mem::swap(&mut b, &mut a);
+        }
+        b %= a;
+    }
+    a
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bitarray_with_capacity() {
+        let bitarray = BitArray::with_capacity(4, 40);
+        assert_eq!(bitarray.data, vec![0, 0, 0]);
+        assert_eq!(bitarray.mask, 0xff_ffff_ffff);
+        assert_eq!(bitarray.len, 4);
+    }
+
+    #[test]
+    fn test_bitarray_get() {
+        let mut bitarray = BitArray::with_capacity(4, 40);
+        bitarray.data = vec![0x1cfac47f32c25261, 0x4dc9f34db6ba5108, 0x9144eb9ca32eb4a4];
+
+        assert_eq!(bitarray.get(0), 0b0001110011111010110001000111111100110010);
+        assert_eq!(bitarray.get(1), 0b1100001001010010011000010100110111001001);
+        assert_eq!(bitarray.get(2), 0b1111001101001101101101101011101001010001);
+        assert_eq!(bitarray.get(3), 0b0000100010010001010001001110101110011100);
+    }
+
+    #[test]
+    fn test_bitarray_set() {
+        let mut bitarray = BitArray::with_capacity(4, 40);
+
+        bitarray.set(0, 0b0001110011111010110001000111111100110010);
+        bitarray.set(1, 0b1100001001010010011000010100110111001001);
+        bitarray.set(2, 0b1111001101001101101101101011101001010001);
+        bitarray.set(3, 0b0000100010010001010001001110101110011100);
+
+        assert_eq!(bitarray.data, vec![0x1cfac47f32c25261, 0x4dc9f34db6ba5108, 0x9144EB9C00000000]);
+    }
+
+    #[test]
+    fn test_bitarray_bits_per_value() {
+        let bitarray = BitArray::with_capacity(4, 40);
+        assert_eq!(bitarray.bits_per_value(), 40);
+    }
+
+    #[test]
+    fn test_bitarray_len() {
+        let bitarray = BitArray::with_capacity(4, 40);
+        assert_eq!(bitarray.len(), 4);
+    }
+
+    #[test]
+    fn test_bitarray_is_empty() {
+        let bitarray = BitArray::with_capacity(0, 40);
+        assert!(bitarray.is_empty());
+    }
+
+    #[test]
+    fn test_bitarray_is_not_empty() {
+        let bitarray = BitArray::with_capacity(4, 40);
+        assert!(!bitarray.is_empty());
+    }
+
+    #[test]
+    fn test_bitarray_clear() {
+        let mut bitarray = BitArray::with_capacity(4, 40);
+        bitarray.data = vec![0x1cfac47f32c25261, 0x4dc9f34db6ba5108, 0x9144eb9ca32eb4a4];
+
+        bitarray.clear();
+
+        assert_eq!(bitarray.data, vec![0, 0, 0]);
+    }
+
+    #[test]
+    fn test_data_to_writer_no_chunks_needed() {
+        let data = vec![0x1234567890, 0xabcdef0123, 0x4567890abc, 0xdef0123456];
+        let mut writer = Vec::new();
+
+        data_to_writer(data, 40, 2, &mut writer).unwrap();
+
+        assert_eq!(
+            writer,
+            vec![
+                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
+                0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+            ]
+        );
+    }
+
+    #[test]
+    fn test_data_to_writer_chunks_needed_no_remainder() {
+        let data = vec![
+            0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777,
+            0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee,
+            0xffffffff, 0x00000000, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555,
+            0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+            0xdddddddd, 0xeeeeeeee, 0xffffffff, 0x00000000, 0x11111111, 0x22222222, 0x33333333,
+            0x44444444, 0x55555555, 0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa,
+            0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff, 0x00000000, 0x11111111,
+            0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, 0x88888888,
+            0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff,
+            0x00000000,
+        ];
+        let mut writer = Vec::new();
+
+        data_to_writer(data, 32, 8, &mut writer).unwrap();
+
+        assert_eq!(
+            writer,
+            vec![
+                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33,
+                0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88,
+                0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
+                0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd,
+                0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11,
+                0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66,
+                0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
+                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb,
+                0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
+                0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44,
+                0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88,
+                0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99,
+                0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd,
+                0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22,
+                0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66,
+                0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77,
+                0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb,
+                0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00,
+                0xff, 0xff, 0xff, 0xff
+            ]
+        );
+    }
+
+    #[test]
+    fn test_data_to_writer_chunks_needed_plus_remainder() {
+        let data = vec![
+            0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777,
+            0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee,
+            0xffffffff, 0x00000000, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555,
+            0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+            0xdddddddd, 0xeeeeeeee, 0xffffffff, 0x00000000, 0x11111111, 0x22222222, 0x33333333,
+        ];
+        let mut writer = Vec::new();
+
+        data_to_writer(data, 32, 8, &mut writer).unwrap();
+
+        assert_eq!(
+            writer,
+            vec![
+                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33,
+                0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88,
+                0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
+                0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd,
+                0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11,
+                0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66,
+                0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
+                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb,
+                0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
+                0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
+                0x33, 0x33, 0x33, 0x33
+            ]
+        );
+    }
+
+    #[test]
+    fn test_gcd() {
+        assert_eq!(gcd(40, 64), 8);
+        assert_eq!(gcd(64, 40), 8);
+        assert_eq!(gcd(64, 64), 64);
+        assert_eq!(gcd(32, 64), 32);
+    }
+}
diff --git a/codecov.yml b/codecov.yml
index 030fcbe..0a92f62 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -3,32 +3,52 @@ coverage:
     project:
       default:
         target: 90%
+      bitarray:
+        flags:
+          - bitarray
       fa-compression:
-        target: 90%
         flags:
           - fa-compression
-      sa-mappings:
-        target: 90%
+      sa-builder:
         flags:
-          - sa-mappings
-    patch:
-      default:
-        target: 90%
-      fa-compression:
-        target: 90%
+          - sa-builder
+      sa-compression:
         flags:
-          - fa-compression
+          - sa-compression
+      sa-index:
+        target: 80%
+        flags:
+          - sa-index
       sa-mappings:
-        target: 90%
         flags:
           - sa-mappings
+    patch: off
 
 flags:
+  bitarray:
+    paths:
+      - bitarray
+    carryforward: true
   fa-compression:
     paths:
       - fa-compression
     carryforward: true
+  sa-builder:
+    paths:
+      - sa-builder
+    carryforward: true
+  sa-compression:
+    paths:
+      - sa-compression
+    carryforward: true
+  sa-index:
+    paths:
+      - sa-index
+    carryforward: true
   sa-mappings:
     paths:
       - sa-mappings
     carryforward: true
+
+ignore:
+  - "**/main.rs" # Ignore main.rs files
diff --git a/fa-compression/src/algorithm1/decode.rs b/fa-compression/src/algorithm1/decode.rs
index 453f107..5295a58 100644
--- a/fa-compression/src/algorithm1/decode.rs
+++ b/fa-compression/src/algorithm1/decode.rs
@@ -28,9 +28,9 @@ static PREFIXES: [&str; 3] = ["EC:", "GO:", "IPR:IPR"];
 /// ```
 /// use fa_compression::algorithm1::decode;
 ///
-/// let input = &[ 44, 44, 44, 189, 17, 26, 56, 173, 18, 116, 117, 225, 67, 116, 110, 17, 153, 39 ];
+/// let input = &[ 44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117 ];
 /// let result = decode(input);
-/// assert_eq!(result, "EC:1.1.1.-;GO:0009279;IPR:IPR016364;IPR:IPR032635;IPR:IPR008816");
+/// assert_eq!(result, "EC:1.1.1.-;GO:0009279;IPR:IPR016364");
 /// ```
 pub fn decode(input: &[u8]) -> String {
     if input.is_empty() {
@@ -82,23 +82,23 @@ mod tests {
 
     #[test]
     fn test_decode_single_ec() {
-        assert_eq!(decode(&[44, 44, 44, 189, 208]), "EC:1.1.1.-")
+        assert_eq!(decode(&[44, 44, 44, 190, 224]), "EC:1.1.1.-")
     }
 
     #[test]
     fn test_decode_single_go() {
-        assert_eq!(decode(&[209, 17, 163, 138, 208]), "GO:0009279")
+        assert_eq!(decode(&[225, 17, 163, 138, 224]), "GO:0009279")
     }
 
     #[test]
     fn test_decode_single_ipr() {
-        assert_eq!(decode(&[221, 18, 116, 117]), "IPR:IPR016364")
+        assert_eq!(decode(&[238, 18, 116, 117]), "IPR:IPR016364")
     }
 
     #[test]
     fn test_decode_no_ec() {
         assert_eq!(
-            decode(&[209, 17, 163, 138, 209, 39, 71, 94, 17, 153, 39]),
+            decode(&[225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39]),
             "GO:0009279;IPR:IPR016364;IPR:IPR008816"
         )
     }
@@ -106,7 +106,7 @@ mod tests {
     #[test]
     fn test_decode_no_go() {
         assert_eq!(
-            decode(&[44, 44, 44, 190, 44, 60, 44, 141, 209, 39, 71, 80]),
+            decode(&[44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80]),
             "EC:1.1.1.-;EC:1.2.1.7;IPR:IPR016364"
         )
     }
@@ -114,7 +114,7 @@ mod tests {
     #[test]
     fn test_decode_no_ipr() {
         assert_eq!(
-            decode(&[44, 44, 44, 189, 17, 26, 56, 174, 17, 26, 56, 173]),
+            decode(&[44, 44, 44, 190, 17, 26, 56, 175, 17, 26, 56, 174]),
             "EC:1.1.1.-;GO:0009279;GO:0009279"
         )
     }
@@ -123,7 +123,7 @@ mod tests {
     fn test_decode_all() {
         assert_eq!(
             decode(&[
-                44, 44, 44, 189, 17, 26, 56, 173, 18, 116, 117, 225, 67, 116, 110, 17, 153, 39
+                44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39
             ]),
             "EC:1.1.1.-;GO:0009279;IPR:IPR016364;IPR:IPR032635;IPR:IPR008816"
         )
diff --git a/fa-compression/src/algorithm1/encode.rs b/fa-compression/src/algorithm1/encode.rs
index 8cce9cd..0877c9a 100644
--- a/fa-compression/src/algorithm1/encode.rs
+++ b/fa-compression/src/algorithm1/encode.rs
@@ -28,7 +28,7 @@ use super::{
 /// let input = "IPR:IPR016364;EC:1.1.1.-;GO:0009279";
 /// let encoded = encode(input);
 ///
-/// assert_eq!(encoded, vec![ 44, 44, 44, 189, 17, 26, 56, 173, 18, 116, 117 ]);
+/// assert_eq!(encoded, vec![ 44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117 ]);
 /// ```
 pub fn encode(input: &str) -> Vec<u8> {
     if input.is_empty() {
@@ -94,24 +94,24 @@ mod tests {
 
     #[test]
     fn test_encode_single_ec() {
-        assert_eq!(encode("EC:1.1.1.-"), vec![44, 44, 44, 189, 208])
+        assert_eq!(encode("EC:1.1.1.-"), vec![44, 44, 44, 190, 224])
     }
 
     #[test]
     fn test_encode_single_go() {
-        assert_eq!(encode("GO:0009279"), vec![209, 17, 163, 138, 208])
+        assert_eq!(encode("GO:0009279"), vec![225, 17, 163, 138, 224])
     }
 
     #[test]
     fn test_encode_single_ipr() {
-        assert_eq!(encode("IPR:IPR016364"), vec![221, 18, 116, 117])
+        assert_eq!(encode("IPR:IPR016364"), vec![238, 18, 116, 117])
     }
 
     #[test]
     fn test_encode_no_ec() {
         assert_eq!(
             encode("IPR:IPR016364;GO:0009279;IPR:IPR008816"),
-            vec![209, 17, 163, 138, 209, 39, 71, 94, 17, 153, 39]
+            vec![225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39]
         )
     }
 
@@ -119,7 +119,7 @@ mod tests {
     fn test_encode_no_go() {
         assert_eq!(
             encode("IPR:IPR016364;EC:1.1.1.-;EC:1.2.1.7"),
-            vec![44, 44, 44, 190, 44, 60, 44, 141, 209, 39, 71, 80]
+            vec![44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80]
         )
     }
 
@@ -127,7 +127,7 @@ mod tests {
     fn test_encode_no_ipr() {
         assert_eq!(
             encode("EC:1.1.1.-;GO:0009279;GO:0009279"),
-            vec![44, 44, 44, 189, 17, 26, 56, 174, 17, 26, 56, 173]
+            vec![44, 44, 44, 190, 17, 26, 56, 175, 17, 26, 56, 174]
         )
     }
 
@@ -135,7 +135,7 @@ mod tests {
     fn test_encode_all() {
         assert_eq!(
             encode("IPR:IPR016364;EC:1.1.1.-;IPR:IPR032635;GO:0009279;IPR:IPR008816"),
-            vec![44, 44, 44, 189, 17, 26, 56, 173, 18, 116, 117, 225, 67, 116, 110, 17, 153, 39]
+            vec![44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39]
         )
     }
 }
diff --git a/fa-compression/src/algorithm1/mod.rs b/fa-compression/src/algorithm1/mod.rs
index 3a1d9b4..a495c9e 100644
--- a/fa-compression/src/algorithm1/mod.rs
+++ b/fa-compression/src/algorithm1/mod.rs
@@ -73,6 +73,7 @@ enum CharacterSet {
     /// Special Enzyme Commission characters
     Dash,
     Point,
+    Preliminary,
 
     /// Different annotation type separator
     Comma,
@@ -106,6 +107,7 @@ impl Encode for CharacterSet {
             b'9' => CharacterSet::Nine,
             b'-' => CharacterSet::Dash,
             b'.' => CharacterSet::Point,
+            b'n' => CharacterSet::Preliminary,
             b',' => CharacterSet::Comma,
             b';' => CharacterSet::Semicolon,
             _ => panic!("Invalid character")
@@ -138,8 +140,9 @@ impl Decode for CharacterSet {
             10 => '9',
             11 => '-',
             12 => '.',
-            13 => ',',
-            14 => ';',
+            13 => 'n',
+            14 => ',',
+            15 => ';',
             _ => panic!("Invalid character")
         }
     }
@@ -167,10 +170,12 @@ impl BitOr for CharacterSet {
 mod tests {
     use super::*;
 
-    static CHARACTERS: [u8; 15] =
-        [b'$', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'-', b'.', b',', b';'];
+    static CHARACTERS: [u8; 16] = [
+        b'$', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'-', b'.', b'n', b',',
+        b';'
+    ];
 
-    static CHARACTER_SETS: [CharacterSet; 15] = [
+    static CHARACTER_SETS: [CharacterSet; 16] = [
         CharacterSet::Empty,
         CharacterSet::Zero,
         CharacterSet::One,
@@ -184,6 +189,7 @@ mod tests {
         CharacterSet::Nine,
         CharacterSet::Dash,
         CharacterSet::Point,
+        CharacterSet::Preliminary,
         CharacterSet::Comma,
         CharacterSet::Semicolon
     ];
@@ -233,13 +239,7 @@ mod tests {
     #[test]
     #[should_panic]
     fn test_decode_invalid() {
-        CharacterSet::decode(15);
-    }
-
-    #[test]
-    #[should_panic]
-    fn test_decode_pair_invalid() {
-        CharacterSet::decode_pair(0b11111111);
+        CharacterSet::decode(16);
     }
 
     #[test]
diff --git a/sa-builder/Cargo.toml b/sa-builder/Cargo.toml
index 9937384..cb7d553 100644
--- a/sa-builder/Cargo.toml
+++ b/sa-builder/Cargo.toml
@@ -10,3 +10,5 @@ clap = { version = "4.4.8", features = ["derive"] }
 libsais64-rs = { path = "../libsais64-rs" }
 libdivsufsort-rs = "0.1.0"
 sa-mappings = { path = "../sa-mappings" }
+sa-compression = { path = "../sa-compression" }
+sa-index = { path = "../sa-index" }
diff --git a/sa-builder/README.md b/sa-builder/README.md
new file mode 100644
index 0000000..5f51b07
--- /dev/null
+++ b/sa-builder/README.md
@@ -0,0 +1,28 @@
+Suffix Array Builder
+====================
+
+A rust implementation to build large generalized suffix arrays.
+
+# Usage
+
+```plain
+Build a (sparse, compressed) suffix array from the given text
+
+Usage: sa-builder [OPTIONS] --database-file <DATABASE_FILE> --taxonomy <TAXONOMY> --output <OUTPUT>
+
+Options:
+  -d, --database-file <DATABASE_FILE>
+          File with the proteins used to build the suffix tree. All the proteins are expected to be concatenated using a hashtag `#`
+  -t, --taxonomy <TAXONOMY>
+          The taxonomy to be used as a tsv file. This is a preprocessed version of the NCBI taxonomy
+  -o, --output <OUTPUT>
+          Output location where to store the suffix array
+  -s, --sparseness-factor <SPARSENESS_FACTOR>
+          The sparseness_factor used on the suffix array (default value 1, which means every value in the SA is used) [default: 1]
+  -a, --construction-algorithm <CONSTRUCTION_ALGORITHM>
+          The algorithm used to construct the suffix array (default value LibSais) [default: lib-sais] [possible values: lib-div-suf-sort, lib-sais]
+  -c, --compress-sa
+          If the suffix array should be compressed (default value true)
+  -h, --help
+          Print help
+```
diff --git a/sa-builder/src/binary.rs b/sa-builder/src/binary.rs
deleted file mode 100644
index ad417d3..0000000
--- a/sa-builder/src/binary.rs
+++ /dev/null
@@ -1,149 +0,0 @@
-use std::{
-    cmp::min,
-    error::Error,
-    fs::{
-        File,
-        OpenOptions
-    },
-    io::{
-        Read,
-        Write
-    }
-};
-
-const ONE_GIB: usize = 2usize.pow(30);
-
-/// Trait implemented by structs that are binary serializable
-/// In our case this is will be a [i64] since the suffix array is a Vec<i64>
-pub trait Serializable {
-    /// Serializes self into a vector of bytes
-    ///
-    /// # Returns
-    ///
-    /// Returns a vector of bytes
-    fn serialize(&self) -> Vec<u8>;
-}
-
-impl Serializable for [i64] {
-    fn serialize(&self) -> Vec<u8> {
-        let mut res = vec![];
-        self.iter()
-            .for_each(|entry| res.extend_from_slice(&entry.to_le_bytes()));
-        res
-    }
-}
-
-/// Deserializes a vector of bytes into the suffix array
-///
-/// # Arguments
-/// * `data` - The raw bytes needed to be serialized into a suffix array
-///
-/// # Returns
-///
-/// Returns the suffix array, a Vec<i64>
-fn deserialize_sa(data: &[u8]) -> Vec<i64> {
-    let mut res = vec![];
-    if data.len() % 8 != 0 {
-        panic!("Serialized data is not a multiple of 8 bytes long!")
-    }
-    for start in (0 .. data.len()).step_by(8) {
-        res.push(i64::from_le_bytes(data[start .. start + 8].try_into().unwrap()));
-    }
-    res
-}
-
-/// Writes the given suffix array with the `sparseness_factor` factor to the given file
-///
-/// # Arguments
-/// * `sparseness_factor` - The sparseness factor of the suffix array
-/// * `suffix_array` - The suffix array
-/// * `filename` - The name of the file we want to write the suffix array to
-///
-/// # Returns
-///
-/// Returns () if writing away the suffix array succeeded
-///
-/// # Errors
-///
-/// Returns an io::Error if writing away the suffix array failed
-pub fn write_suffix_array(
-    sparseness_factor: u8,
-    suffix_array: &[i64],
-    filename: &str
-) -> Result<(), std::io::Error> {
-    // create the file
-    let mut f = OpenOptions::new()
-        .create(true)
-        .write(true)
-        .truncate(true) // if the file already exists, empty the file
-        .open(filename)?;
-    f.write_all(&[sparseness_factor])?; // write the sample rate as the first byte
-
-    // write 1 GiB at a time, to minimize extra used memory since we need to translate i64 to [u8;
-    // 8]
-    let sa_len = suffix_array.len();
-    for start_index in (0 .. sa_len).step_by(ONE_GIB / 8) {
-        let end_index = min(start_index + ONE_GIB / 8, sa_len);
-        f.write_all(&suffix_array[start_index .. end_index].serialize())?;
-    }
-
-    Ok(())
-}
-
-/// Loads the suffix array from the file with the given `filename`
-///
-/// # Arguments
-/// * `filename` - The filename of the file where the suffix array is stored
-///
-/// # Returns
-///
-/// Returns the sample rate of the suffix array, together with the suffix array
-///
-/// # Errors
-///
-/// Returns any error from opening the file or reading the file
-pub fn load_suffix_array(filename: &str) -> Result<(u8, Vec<i64>), Box<dyn Error>> {
-    let mut file = &File::open(filename)?;
-    let mut sparseness_factor_buffer = [0_u8; 1];
-    file.read_exact(&mut sparseness_factor_buffer)
-        .map_err(|_| "Could not read the sample rate from the binary file")?;
-    let sparseness_factor = sparseness_factor_buffer[0];
-
-    let mut sa = vec![];
-    loop {
-        let mut buffer = vec![];
-        // use take in combination with read_to_end to ensure that the buffer will be completely
-        // filled (except when the file is smaller than the buffer)
-        let count = file.take(ONE_GIB as u64).read_to_end(&mut buffer)?;
-        if count == 0 {
-            break;
-        }
-        sa.extend_from_slice(&deserialize_sa(&buffer[.. count]));
-    }
-
-    Ok((sparseness_factor, sa))
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::binary::{
-        deserialize_sa,
-        Serializable
-    };
-
-    #[test]
-    fn test_serialize_deserialize() {
-        let data: Vec<i64> = vec![5, 2165487362, -12315135];
-        let serialized = data.serialize();
-        let deserialized = deserialize_sa(serialized.as_ref());
-        assert_eq!(data, deserialized);
-    }
-
-    #[test]
-    fn test_serialize_deserialize_empty() {
-        let data: Vec<i64> = vec![];
-        let serialized = data.serialize();
-        let deserialized = deserialize_sa(serialized.as_ref());
-        assert_eq!(data, deserialized);
-    }
-}
diff --git a/sa-builder/src/lib.rs b/sa-builder/src/lib.rs
index 65092e5..98ac63d 100644
--- a/sa-builder/src/lib.rs
+++ b/sa-builder/src/lib.rs
@@ -1,5 +1,3 @@
-pub mod binary;
-
 use std::error::Error;
 
 use clap::{
@@ -7,25 +5,29 @@ use clap::{
     ValueEnum
 };
 
-/// Enum that represents all possible commandline arguments
+/// Build a (sparse, compressed) suffix array from the given text
 #[derive(Parser, Debug)]
 pub struct Arguments {
     /// File with the proteins used to build the suffix tree. All the proteins are expected to be
-    /// concatenated using a `#`.
+    /// concatenated using a hashtag `#`.
     #[arg(short, long)]
     pub database_file:          String,
-    #[arg(short, long)]
     /// The taxonomy to be used as a tsv file. This is a preprocessed version of the NCBI taxonomy.
+    #[arg(short, long)]
     pub taxonomy:               String,
-    /// Output file to store the built index.
+    /// Output location where to store the suffix array
     #[arg(short, long)]
     pub output:                 String,
     /// The sparseness_factor used on the suffix array (default value 1, which means every value in
     /// the SA is used)
-    #[arg(long, default_value_t = 1)]
+    #[arg(short, long, default_value_t = 1)]
     pub sparseness_factor:      u8,
-    #[arg(short, long, value_enum, default_value_t = SAConstructionAlgorithm::LibSais)]
-    pub construction_algorithm: SAConstructionAlgorithm
+    /// The algorithm used to construct the suffix array (default value LibSais)
+    #[arg(short('a'), long, value_enum, default_value_t = SAConstructionAlgorithm::LibSais)]
+    pub construction_algorithm: SAConstructionAlgorithm,
+    /// If the suffix array should be compressed (default value true)
+    #[arg(short, long, default_value_t = false)]
+    pub compress_sa:            bool
 }
 
 /// Enum representing the two possible algorithms to construct the suffix array
@@ -35,51 +37,185 @@ pub enum SAConstructionAlgorithm {
     LibSais
 }
 
-/// Gets the current time in ms
+/// Build a sparse suffix array from the given text
 ///
 /// # Arguments
-/// * `data` - The text on which we want to build the suffix array
+/// * `text` - The text on which we want to build the suffix array
 /// * `construction_algorithm` - The algorithm used during construction
 /// * `sparseness_factor` - The sparseness factor used on the suffix array
 ///
 /// # Returns
 ///
-/// Returns the constructed suffix array
+/// Returns the constructed (sparse) suffix array
 ///
 /// # Errors
 ///
 /// The errors that occurred during the building of the suffix array itself
-pub fn build_sa(
-    data: &mut Vec<u8>,
+pub fn build_ssa(
+    text: &mut Vec<u8>,
     construction_algorithm: &SAConstructionAlgorithm,
     sparseness_factor: u8
 ) -> Result<Vec<i64>, Box<dyn Error>> {
     // translate all L's to a I
-    for character in data.iter_mut() {
+    translate_l_to_i(text);
+
+    // Build the suffix array using the selected algorithm
+    let mut sa = match construction_algorithm {
+        SAConstructionAlgorithm::LibSais => libsais64_rs::sais64(text),
+        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text)
+    }
+    .ok_or("Building suffix array failed")?;
+
+    // make the SA sparse and decrease the vector size if we have sampling (sampling_rate > 1)
+    sample_sa(&mut sa, sparseness_factor);
+
+    Ok(sa)
+}
+
+/// Translate all L's to I's in the given text
+///
+/// # Arguments
+/// * `text` - The text in which we want to translate the L's to I's
+///
+/// # Returns
+///
+/// The text with all L's translated to I's
+fn translate_l_to_i(text: &mut [u8]) {
+    for character in text.iter_mut() {
         if *character == b'L' {
             *character = b'I'
         }
     }
+}
 
-    let mut sa = match construction_algorithm {
-        SAConstructionAlgorithm::LibSais => libsais64_rs::sais64(data),
-        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(data)
+/// Sample the suffix array with the given sparseness factor
+///
+/// # Arguments
+/// * `sa` - The suffix array that we want to sample
+/// * `sparseness_factor` - The sparseness factor used for sampling
+///
+/// # Returns
+///
+/// The sampled suffix array
+fn sample_sa(sa: &mut Vec<i64>, sparseness_factor: u8) {
+    if sparseness_factor <= 1 {
+        return;
     }
-    .ok_or("Building suffix array failed")?;
 
-    // make the SA sparse and decrease the vector size if we have sampling (== sampling_rate > 1)
-    if sparseness_factor > 1 {
-        let mut current_sampled_index = 0;
-        for i in 0 .. sa.len() {
-            let current_sa_val = sa[i];
-            if current_sa_val % sparseness_factor as i64 == 0 {
-                sa[current_sampled_index] = current_sa_val;
-                current_sampled_index += 1;
-            }
+    let mut current_sampled_index = 0;
+    for i in 0 .. sa.len() {
+        let current_sa_val = sa[i];
+        if current_sa_val % sparseness_factor as i64 == 0 {
+            sa[current_sampled_index] = current_sa_val;
+            current_sampled_index += 1;
         }
-        // make shorter
-        sa.resize(current_sampled_index, 0);
     }
 
-    Ok(sa)
+    // make shorter
+    sa.resize(current_sampled_index, 0);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_arguments() {
+        let args = Arguments::parse_from(&[
+            "sa-builder",
+            "--database-file",
+            "database.fa",
+            "--taxonomy",
+            "taxonomy.tsv",
+            "--output",
+            "output.fa",
+            "--sparseness-factor",
+            "2",
+            "--construction-algorithm",
+            "lib-div-suf-sort",
+            "--compress-sa"
+        ]);
+
+        assert_eq!(args.database_file, "database.fa");
+        assert_eq!(args.taxonomy, "taxonomy.tsv");
+        assert_eq!(args.output, "output.fa");
+        assert_eq!(args.sparseness_factor, 2);
+        assert_eq!(args.construction_algorithm, SAConstructionAlgorithm::LibDivSufSort);
+        assert_eq!(args.compress_sa, true);
+    }
+
+    #[test]
+    fn test_sa_construction_algorithm() {
+        assert_eq!(
+            SAConstructionAlgorithm::from_str("lib-div-suf-sort", false),
+            Ok(SAConstructionAlgorithm::LibDivSufSort)
+        );
+        assert_eq!(
+            SAConstructionAlgorithm::from_str("lib-sais", false),
+            Ok(SAConstructionAlgorithm::LibSais)
+        );
+    }
+
+    #[test]
+    fn test_build_ssa_libsais() {
+        let mut text = b"ABRACADABRA$".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibSais, 1).unwrap();
+        assert_eq!(sa, vec![11, 10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
+    }
+
+    #[test]
+    fn test_build_ssa_libsais_empty() {
+        let mut text = b"".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibSais, 1).unwrap();
+        assert_eq!(sa, vec![]);
+    }
+
+    #[test]
+    fn test_build_ssa_libsais_sparse() {
+        let mut text = b"ABRACADABRA$".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibSais, 2).unwrap();
+        assert_eq!(sa, vec![10, 0, 8, 4, 6, 2]);
+    }
+
+    #[test]
+    fn test_build_ssa_libdivsufsort() {
+        let mut text = b"ABRACADABRA$".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibDivSufSort, 1).unwrap();
+        assert_eq!(sa, vec![11, 10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
+    }
+
+    #[test]
+    fn test_build_ssa_libdivsufsort_empty() {
+        let mut text = b"".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibDivSufSort, 1).unwrap();
+        assert_eq!(sa, vec![]);
+    }
+
+    #[test]
+    fn test_build_ssa_libdivsufsort_sparse() {
+        let mut text = b"ABRACADABRA$".to_vec();
+        let sa = build_ssa(&mut text, &SAConstructionAlgorithm::LibDivSufSort, 2).unwrap();
+        assert_eq!(sa, vec![10, 0, 8, 4, 6, 2]);
+    }
+
+    #[test]
+    fn test_translate_l_to_i() {
+        let mut text = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ$-".to_vec();
+        translate_l_to_i(&mut text);
+        assert_eq!(text, b"ABCDEFGHIJKIMNOPQRSTUVWXYZ$-".to_vec());
+    }
+
+    #[test]
+    fn test_sample_sa_1() {
+        let mut sa = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        sample_sa(&mut sa, 1);
+        assert_eq!(sa, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+    }
+
+    #[test]
+    fn test_sample_sa_2() {
+        let mut sa = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        sample_sa(&mut sa, 2);
+        assert_eq!(sa, vec![0, 2, 4, 6, 8]);
+    }
 }
diff --git a/sa-builder/src/main.rs b/sa-builder/src/main.rs
index e651673..812bd7d 100644
--- a/sa-builder/src/main.rs
+++ b/sa-builder/src/main.rs
@@ -1,9 +1,18 @@
+use std::{
+    fs::{
+        File,
+        OpenOptions
+    },
+    io::Result
+};
+
 use clap::Parser;
 use sa_builder::{
-    binary::write_suffix_array,
-    build_sa,
+    build_ssa,
     Arguments
 };
+use sa_compression::dump_compressed_suffix_array;
+use sa_index::binary::dump_suffix_array;
 use sa_mappings::{
     proteins::Proteins,
     taxonomy::{
@@ -13,42 +22,75 @@ use sa_mappings::{
 };
 
 fn main() {
-    let args = Arguments::parse();
     let Arguments {
         database_file,
         taxonomy,
         output,
         sparseness_factor,
-        construction_algorithm
-    } = args;
+        construction_algorithm,
+        compress_sa
+    } = Arguments::parse();
+
+    eprintln!();
+    eprintln!("📋 Started loading the taxon file...");
     let taxon_id_calculator =
-        TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar);
-    if let Err(err) = taxon_id_calculator {
-        eprintln!("{}", err);
-        std::process::exit(1);
-    }
+        TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar)
+            .unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
+    eprintln!("✅ Successfully loaded the taxon file!");
+    eprintln!("\tAggregation method: LCA*");
 
-    let taxon_id_calculator = taxon_id_calculator.unwrap();
+    eprintln!();
+    eprintln!("📋 Started loading the proteins...");
+    let mut data =
+        Proteins::try_from_database_file_without_annotations(&database_file, &taxon_id_calculator)
+            .unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
+    eprintln!("✅ Successfully loaded the proteins!");
 
-    // read input
-    let data =
-        Proteins::try_from_database_file_without_annotations(&database_file, &taxon_id_calculator);
-    if let Err(err) = data {
-        eprintln!("{}", err);
-        std::process::exit(1);
-    }
-    let mut data = data.unwrap();
-    // calculate sa
-    let sa = build_sa(&mut data, &construction_algorithm, sparseness_factor);
-    if let Err(err) = sa {
-        eprintln!("{}", err);
-        std::process::exit(1);
+    eprintln!();
+    eprintln!("📋 Started building the suffix array...");
+    let sa = build_ssa(&mut data, &construction_algorithm, sparseness_factor)
+        .unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
+    eprintln!("✅ Successfully built the suffix array!");
+    eprintln!("\tAmount of items: {}", sa.len());
+    eprintln!("\tSample rate: {}", sparseness_factor);
+
+    // open the output file
+    let mut file =
+        open_file(&output).unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
+
+    eprintln!();
+    eprintln!("📋 Started dumping the suffix array...");
+
+    if compress_sa {
+        let bits_per_value = (data.len() as f64).log2().ceil() as usize;
+
+        if let Err(err) =
+            dump_compressed_suffix_array(sa, sparseness_factor, bits_per_value, &mut file)
+        {
+            eprint_and_exit(err.to_string().as_str());
+        };
+
+        eprintln!("✅ Successfully dumped the suffix array!");
+        eprintln!("\tAmount of bits per item: {}", bits_per_value);
+    } else {
+        if let Err(err) = dump_suffix_array(&sa, sparseness_factor, &mut file) {
+            eprint_and_exit(err.to_string().as_str());
+        }
+
+        eprintln!("✅ Successfully dumped the suffix array!");
+        eprintln!("\tAmount of bits per item: 64");
     }
-    let sa = sa.unwrap();
+}
+
+fn open_file(file: &str) -> Result<File> {
+    OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true) // if the file already exists, empty the file
+        .open(file)
+}
 
-    // output the build SA
-    if let Err(err) = write_suffix_array(sparseness_factor, &sa, &output) {
-        eprintln!("{}", err);
-        std::process::exit(1);
-    };
+fn eprint_and_exit(err: &str) -> ! {
+    eprintln!("{}", err);
+    std::process::exit(1);
 }
diff --git a/sa-compression/Cargo.toml b/sa-compression/Cargo.toml
new file mode 100644
index 0000000..70a6cbf
--- /dev/null
+++ b/sa-compression/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "sa-compression"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bitarray = { path = "../bitarray" }
+sa-index = { path = "../sa-index" }
diff --git a/sa-compression/src/lib.rs b/sa-compression/src/lib.rs
new file mode 100644
index 0000000..85a41df
--- /dev/null
+++ b/sa-compression/src/lib.rs
@@ -0,0 +1,271 @@
+use std::{
+    error::Error,
+    io::{
+        BufRead,
+        Write
+    }
+};
+
+use bitarray::{
+    data_to_writer,
+    Binary,
+    BitArray
+};
+use sa_index::SuffixArray;
+
+/// Writes the compressed suffix array to a writer.
+///
+/// # Arguments
+///
+/// * `sa` - The suffix array to be compressed.
+/// * `sparseness_factor` - The sparseness factor used for compression.
+/// * `bits_per_value` - The number of bits used to represent each value in the compressed array.
+/// * `writer` - The writer to which the compressed array will be written.
+///
+/// # Errors
+///
+/// Returns an error if writing to the writer fails.
+pub fn dump_compressed_suffix_array(
+    sa: Vec<i64>,
+    sparseness_factor: u8,
+    bits_per_value: usize,
+    writer: &mut impl Write
+) -> Result<(), Box<dyn Error>> {
+    // Write the flags to the writer
+    // 00000001 indicates that the suffix array is compressed
+    writer
+        .write(&[bits_per_value as u8])
+        .map_err(|_| "Could not write the required bits to the writer")?;
+
+    // Write the sparseness factor to the writer
+    writer
+        .write(&[sparseness_factor])
+        .map_err(|_| "Could not write the sparseness factor to the writer")?;
+
+    // Write the size of the suffix array to the writer
+    writer
+        .write(&(sa.len() as u64).to_le_bytes())
+        .map_err(|_| "Could not write the size of the suffix array to the writer")?;
+
+    // Compress the suffix array and write it to the writer
+    data_to_writer(sa, bits_per_value, 8 * 1024, writer)
+        .map_err(|_| "Could not write the compressed suffix array to the writer")?;
+
+    Ok(())
+}
+
+/// Load the compressed suffix array from a reader.
+///
+/// # Arguments
+///
+/// * `reader` - The reader from which the compressed array will be read.
+/// * `bits_per_value` - The number of bits used to represent each value in the compressed array.
+///
+/// # Errors
+///
+/// Returns an error if reading from the reader fails.
+pub fn load_compressed_suffix_array(
+    reader: &mut impl BufRead,
+    bits_per_value: usize
+) -> Result<SuffixArray, Box<dyn Error>> {
+    // Read the sample rate from the binary file (1 byte)
+    let mut sample_rate_buffer = [0_u8; 1];
+    reader
+        .read_exact(&mut sample_rate_buffer)
+        .map_err(|_| "Could not read the sample rate from the binary file")?;
+    let sample_rate = sample_rate_buffer[0];
+
+    // Read the size of the suffix array from the binary file (8 bytes)
+    let mut size_buffer = [0_u8; 8];
+    reader
+        .read_exact(&mut size_buffer)
+        .map_err(|_| "Could not read the size of the suffix array from the binary file")?;
+    let size = u64::from_le_bytes(size_buffer) as usize;
+
+    // Read the compressed suffix array from the binary file
+    let mut compressed_suffix_array = BitArray::with_capacity(size, bits_per_value);
+    compressed_suffix_array
+        .read_binary(reader)
+        .map_err(|_| "Could not read the compressed suffix array from the binary file")?;
+
+    Ok(SuffixArray::Compressed(compressed_suffix_array, sample_rate))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Read;
+
+    use super::*;
+
+    pub struct FailingWriter {
+        /// The number of times the write function can be called before it fails.
+        pub valid_write_count: usize
+    }
+
+    impl Write for FailingWriter {
+        fn write(&mut self, _: &[u8]) -> Result<usize, std::io::Error> {
+            if self.valid_write_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Write failed"));
+            }
+
+            self.valid_write_count -= 1;
+            Ok(1)
+        }
+
+        fn flush(&mut self) -> Result<(), std::io::Error> {
+            Ok(())
+        }
+    }
+
+    pub struct FailingReader {
+        /// The number of times the read function can be called before it fails.
+        pub valid_read_count: usize
+    }
+
+    impl Read for FailingReader {
+        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+            if self.valid_read_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Read failed"));
+            }
+
+            self.valid_read_count -= 1;
+            Ok(buf.len())
+        }
+    }
+
+    impl BufRead for FailingReader {
+        fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
+            Ok(&[])
+        }
+
+        fn consume(&mut self, _: usize) {}
+    }
+
+    #[test]
+    fn test_dump_compressed_suffix_array() {
+        let sa = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+        let mut writer = vec![];
+        dump_compressed_suffix_array(sa, 1, 8, &mut writer).unwrap();
+
+        assert_eq!(
+            writer,
+            vec![
+                // bits per value
+                8, // sparseness factor
+                1, // size of the suffix array
+                10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
+                8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the required bits to the writer")]
+    fn test_dump_compressed_suffix_array_fail_required_bits() {
+        let mut writer = FailingWriter {
+            valid_write_count: 0
+        };
+
+        dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the sparseness factor to the writer")]
+    fn test_dump_compressed_suffix_array_fail_sparseness_factor() {
+        let mut writer = FailingWriter {
+            valid_write_count: 1
+        };
+
+        dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the size of the suffix array to the writer")]
+    fn test_dump_compressed_suffix_array_fail_size() {
+        let mut writer = FailingWriter {
+            valid_write_count: 2
+        };
+
+        dump_compressed_suffix_array(vec![], 1, 8, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the compressed suffix array to the writer")]
+    fn test_dump_compressed_suffix_array_fail_compressed_suffix_array() {
+        let mut writer = FailingWriter {
+            valid_write_count: 3
+        };
+
+        dump_compressed_suffix_array(vec![1], 1, 8, &mut writer).unwrap();
+    }
+
+    #[test]
+    fn test_load_compressed_suffix_array() {
+        let data = vec![
+            // sparseness factor
+            1, // size of the suffix array
+            10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
+            8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9,
+        ];
+
+        let mut reader = std::io::BufReader::new(&data[..]);
+        let compressed_suffix_array = load_compressed_suffix_array(&mut reader, 8).unwrap();
+
+        assert_eq!(compressed_suffix_array.sample_rate(), 1);
+        for i in 0 .. 10 {
+            assert_eq!(compressed_suffix_array.get(i), i as i64 + 1);
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the sample rate from the binary file")]
+    fn test_load_compressed_suffix_array_fail_sample_rate() {
+        let mut reader = FailingReader {
+            valid_read_count: 0
+        };
+
+        load_compressed_suffix_array(&mut reader, 8).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the size of the suffix array from the binary file")]
+    fn test_load_compressed_suffix_array_fail_size() {
+        let mut reader = FailingReader {
+            valid_read_count: 1
+        };
+
+        load_compressed_suffix_array(&mut reader, 8).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the compressed suffix array from the binary file")]
+    fn test_load_compressed_suffix_array_fail_compressed_suffix_array() {
+        let mut reader = FailingReader {
+            valid_read_count: 2
+        };
+
+        load_compressed_suffix_array(&mut reader, 8).unwrap();
+    }
+
+    #[test]
+    fn test_failing_writer() {
+        let mut writer = FailingWriter {
+            valid_write_count: 0
+        };
+        assert!(writer.flush().is_ok());
+        assert!(writer.write(&[0]).is_err());
+    }
+
+    #[test]
+    fn test_failing_reader() {
+        let mut reader = FailingReader {
+            valid_read_count: 0
+        };
+        let right_buffer: [u8; 0] = [];
+        assert_eq!(reader.fill_buf().unwrap(), &right_buffer);
+        assert_eq!(reader.consume(0), ());
+        let mut buffer = [0_u8; 1];
+        assert!(reader.read(&mut buffer).is_err());
+    }
+}
diff --git a/sa-index/Cargo.toml b/sa-index/Cargo.toml
index 70acb67..557549c 100644
--- a/sa-index/Cargo.toml
+++ b/sa-index/Cargo.toml
@@ -7,12 +7,13 @@ edition = "2021"
 
 [dev-dependencies]
 tempdir = "0.3.7"
+fa-compression = { path = "../fa-compression" }
 
 [dependencies]
 clap = { version = "4.4.8", features = ["derive"] }
 umgap = "1.1.0"
 rayon = "1.8.1"
 serde = { version = "1.0.197", features = ["derive"] }
-sa-builder = { path = "../sa-builder" }
 sa-mappings = { path = "../sa-mappings" }
+bitarray = { path = "../bitarray" }
 serde_json = "1.0.116"
diff --git a/sa-index/src/binary.rs b/sa-index/src/binary.rs
new file mode 100644
index 0000000..5688d4a
--- /dev/null
+++ b/sa-index/src/binary.rs
@@ -0,0 +1,416 @@
+use std::{
+    error::Error,
+    io::{
+        BufRead,
+        Read,
+        Write
+    }
+};
+
+use crate::SuffixArray;
+
+/// The `Binary` trait provides methods for reading and writing a struct as binary.
+pub trait Binary {
+    /// Writes the struct as binary to the given writer.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer` - The writer to write the binary data to.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the write operation is successful, or an `Err` if an error occurs.
+    fn write_binary<W: Write>(&self, writer: &mut W) -> std::io::Result<()>;
+
+    /// Reads binary data into a struct from the given reader.
+    ///
+    /// # Arguments
+    ///
+    /// * `reader` - The reader to read the binary data from.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the read operation is successful, or an `Err` if an error occurs.
+    fn read_binary<R: BufRead>(&mut self, reader: R) -> std::io::Result<()>;
+}
+
+/// Implements the `Binary` trait for `Vec<i64>`.
+impl Binary for Vec<i64> {
+    /// Writes the elements of the vector to a binary file.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer` - The writer to which the binary data will be written.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the write operation is successful, or an `std::io::Error` otherwise.
+    fn write_binary<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
+        for value in self {
+            writer.write_all(&value.to_le_bytes())?;
+        }
+
+        Ok(())
+    }
+
+    /// Reads binary data from a reader and populates the vector with the read values.
+    ///
+    /// # Arguments
+    ///
+    /// * `reader` - The reader from which the binary data will be read.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(())` if the read operation is successful, or an `std::io::Error` otherwise.
+    fn read_binary<R: BufRead>(&mut self, mut reader: R) -> std::io::Result<()> {
+        self.clear();
+
+        let mut buffer = vec![0; 8 * 1024];
+
+        loop {
+            let (finished, bytes_read) = fill_buffer(&mut reader, &mut buffer)?;
+            for buffer_slice in buffer[.. bytes_read].chunks_exact(8) {
+                self.push(i64::from_le_bytes(buffer_slice.try_into().unwrap()));
+            }
+
+            if finished {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Writes the suffix array to a binary file.
+///
+/// # Arguments
+///
+/// * `sa` - The suffix array to dump.
+/// * `sparseness_factor` - The sparseness factor to write to the file.
+/// * `writer` - The writer to write the binary data to.
+///
+/// # Returns
+///
+/// Returns `Ok(())` if the write operation is successful, or an `Err` if an error occurs.
+pub fn dump_suffix_array(
+    sa: &Vec<i64>,
+    sparseness_factor: u8,
+    writer: &mut impl Write
+) -> Result<(), Box<dyn Error>> {
+    // Write the required bits to the writer
+    // 01000000 indicates that the suffix array is not compressed
+    writer
+        .write(&[64_u8])
+        .map_err(|_| "Could not write the required bits to the writer")?;
+
+    // Write the sparseness factor to the writer
+    writer
+        .write(&[sparseness_factor])
+        .map_err(|_| "Could not write the sparseness factor to the writer")?;
+
+    // Write the size of the suffix array to the writer
+    let sa_len = sa.len();
+    writer
+        .write(&(sa_len).to_le_bytes())
+        .map_err(|_| "Could not write the size of the suffix array to the writer")?;
+
+    // Write the suffix array to the writer
+    sa.write_binary(writer)
+        .map_err(|_| "Could not write the suffix array to the writer")?;
+
+    Ok(())
+}
+
+/// Loads the suffix array from the file with the given `filename`
+///
+/// # Arguments
+/// * `filename` - The filename of the file where the suffix array is stored
+///
+/// # Returns
+///
+/// Returns the sample rate of the suffix array, together with the suffix array
+///
+/// # Errors
+///
+/// Returns any error from opening the file or reading the file
+pub fn load_suffix_array(reader: &mut impl BufRead) -> Result<SuffixArray, Box<dyn Error>> {
+    // Read the sample rate from the binary file (1 byte)
+    let mut sample_rate_buffer = [0_u8; 1];
+    reader
+        .read_exact(&mut sample_rate_buffer)
+        .map_err(|_| "Could not read the sample rate from the binary file")?;
+    let sample_rate = sample_rate_buffer[0];
+
+    // Read the size of the suffix array from the binary file (8 bytes)
+    let mut size_buffer = [0_u8; 8];
+    reader
+        .read_exact(&mut size_buffer)
+        .map_err(|_| "Could not read the size of the suffix array from the binary file")?;
+    let size = u64::from_le_bytes(size_buffer) as usize;
+
+    let mut sa = Vec::with_capacity(size);
+    sa.read_binary(reader)
+        .map_err(|_| "Could not read the suffix array from the binary file")?;
+
+    Ok(SuffixArray::Original(sa, sample_rate))
+}
+
+/// Fills the buffer with data read from the input.
+///
+/// # Arguments
+///
+/// * `input` - The input source to read data from.
+/// * `buffer` - The buffer to fill with data.
+///
+/// # Returns
+///
+/// Returns a tuple `(finished, bytes_read)` where `finished` indicates whether the end of the input
+/// is reached, and `bytes_read` is the number of bytes read into the buffer.
+fn fill_buffer<T: Read>(input: &mut T, buffer: &mut Vec<u8>) -> std::io::Result<(bool, usize)> {
+    // Store the buffer size in advance, because rust will complain
+    // about the buffer being borrowed mutably while it's borrowed
+    let buffer_size = buffer.len();
+
+    let mut writable_buffer_space = buffer.as_mut();
+
+    loop {
+        match input.read(writable_buffer_space) {
+            // No bytes written, which means we've completely filled the buffer
+            // or we've reached the end of the file
+            Ok(0) => {
+                return Ok((
+                    !writable_buffer_space.is_empty(),
+                    buffer_size - writable_buffer_space.len()
+                ));
+            }
+
+            // We've read {bytes_read} bytes
+            Ok(bytes_read) => {
+                // Shrink the writable buffer slice
+                writable_buffer_space = writable_buffer_space[bytes_read ..].as_mut();
+            }
+
+            // An error occurred while reading
+            Err(e) => {
+                return Err(e);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    pub struct FailingWriter {
+        /// The number of times the write function can be called before it fails.
+        pub valid_write_count: usize
+    }
+
+    impl Write for FailingWriter {
+        fn write(&mut self, _: &[u8]) -> Result<usize, std::io::Error> {
+            if self.valid_write_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Write failed"));
+            }
+
+            self.valid_write_count -= 1;
+            Ok(1)
+        }
+
+        fn flush(&mut self) -> Result<(), std::io::Error> {
+            Ok(())
+        }
+    }
+
+    pub struct FailingReader {
+        /// The number of times the read function can be called before it fails.
+        pub valid_read_count: usize
+    }
+
+    impl Read for FailingReader {
+        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+            if self.valid_read_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Read failed"));
+            }
+
+            self.valid_read_count -= 1;
+            Ok(buf.len())
+        }
+    }
+
+    impl BufRead for FailingReader {
+        fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
+            Ok(&[])
+        }
+
+        fn consume(&mut self, _: usize) {}
+    }
+
+    #[test]
+    fn test_fill_buffer() {
+        let input_str = "a".repeat(8_000);
+        let mut input = input_str.as_bytes();
+
+        let mut buffer = vec![0; 800];
+
+        loop {
+            let (finished, bytes_read) = fill_buffer(&mut input, &mut buffer).unwrap();
+
+            if finished {
+                assert!(bytes_read < 800);
+                break;
+            } else {
+                assert_eq!(bytes_read, 800);
+            }
+        }
+    }
+
+    #[test]
+    fn test_fill_buffer_read_error() {
+        let mut input = FailingReader {
+            valid_read_count: 0
+        };
+        let mut buffer = vec![0; 800];
+
+        assert!(fill_buffer(&mut input, &mut buffer).is_err());
+    }
+
+    #[test]
+    fn test_write_binary() {
+        let mut buffer = Vec::new();
+        let values = vec![1, 2, 3, 4, 5];
+
+        values.write_binary(&mut buffer).unwrap();
+
+        assert_eq!(
+            buffer,
+            vec![
+                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
+                0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0
+            ]
+        );
+    }
+
+    #[test]
+    fn test_read_binary() {
+        let buffer = vec![
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
+            0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0,
+        ];
+
+        let mut values = Vec::new();
+        values.read_binary(buffer.as_slice()).unwrap();
+
+        assert_eq!(values, vec![1, 2, 3, 4, 5]);
+    }
+
+    #[test]
+    fn test_dump_suffix_array() {
+        let mut buffer = Vec::new();
+        let sa = vec![1, 2, 3, 4, 5];
+
+        dump_suffix_array(&sa, 1, &mut buffer).unwrap();
+
+        assert_eq!(
+            buffer,
+            vec![
+                // required bits
+                64, // Sparseness factor
+                1,  // Size of the suffix array
+                5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
+                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
+                0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the required bits to the writer")]
+    fn test_dump_suffix_array_fail_required_bits() {
+        let mut writer = FailingWriter {
+            valid_write_count: 0
+        };
+
+        dump_suffix_array(&vec![], 1, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the sparseness factor to the writer")]
+    fn test_dump_suffix_array_fail_sparseness_factor() {
+        let mut writer = FailingWriter {
+            valid_write_count: 1
+        };
+
+        dump_suffix_array(&vec![], 1, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the size of the suffix array to the writer")]
+    fn test_dump_suffix_array_fail_size() {
+        let mut writer = FailingWriter {
+            valid_write_count: 2
+        };
+
+        dump_suffix_array(&vec![], 1, &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the suffix array to the writer")]
+    fn test_dump_suffix_array_fail_suffix_array() {
+        let mut writer = FailingWriter {
+            valid_write_count: 3
+        };
+
+        dump_suffix_array(&vec![1], 1, &mut writer).unwrap();
+    }
+
+    #[test]
+    fn test_load_suffix_array() {
+        let buffer = vec![
+            // Sample rate
+            1, // Size of the suffix array
+            5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
+            0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0,
+        ];
+
+        let mut reader = buffer.as_slice();
+        let sa = load_suffix_array(&mut reader).unwrap();
+
+        assert_eq!(sa.sample_rate(), 1);
+        for i in 0 .. 5 {
+            assert_eq!(sa.get(i), i as i64 + 1);
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the sample rate from the binary file")]
+    fn test_load_suffix_array_fail_sample_rate() {
+        let mut reader = FailingReader {
+            valid_read_count: 0
+        };
+
+        load_suffix_array(&mut reader).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the size of the suffix array from the binary file")]
+    fn test_load_suffix_array_fail_size() {
+        let mut reader = FailingReader {
+            valid_read_count: 1
+        };
+
+        load_suffix_array(&mut reader).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the suffix array from the binary file")]
+    fn test_load_suffix_array_fail_suffix_array() {
+        let mut reader = FailingReader {
+            valid_read_count: 2
+        };
+
+        load_suffix_array(&mut reader).unwrap();
+    }
+}
diff --git a/sa-index/src/lib.rs b/sa-index/src/lib.rs
index b017880..f276906 100644
--- a/sa-index/src/lib.rs
+++ b/sa-index/src/lib.rs
@@ -1,248 +1,94 @@
-use std::{
-    error::Error,
-    num::NonZeroUsize
-};
-
-use clap::{
-    arg,
-    Parser,
-    ValueEnum
-};
-use sa_builder::{
-    binary::{
-        load_suffix_array,
-        write_suffix_array
-    },
-    build_sa,
-    SAConstructionAlgorithm
-};
-use sa_mappings::{
-    functionality::FunctionAggregator,
-    proteins::Proteins,
-    taxonomy::{
-        AggregationMethod,
-        TaxonAggregator
-    }
-};
-
-use crate::{
-    peptide_search::{
-        analyse_all_peptides,
-        search_all_peptides
-    },
-    sa_searcher::Searcher,
-    suffix_to_protein_index::{
-        DenseSuffixToProtein,
-        SparseSuffixToProtein,
-        SuffixToProteinIndex,
-        SuffixToProteinMappingStyle
-    },
-    util::{
-        get_time_ms,
-        read_lines
-    }
-};
+use bitarray::BitArray;
 
+pub mod binary;
 pub mod peptide_search;
 pub mod sa_searcher;
 pub mod suffix_to_protein_index;
-pub mod util;
 
-/// Enum that represents the 2 kinds of search that are supported
-#[derive(ValueEnum, Clone, Debug, PartialEq)]
-pub enum SearchMode {
-    Search,
-    Analysis
+/// Represents a suffix array.
+pub enum SuffixArray {
+    /// The original suffix array.
+    Original(Vec<i64>, u8),
+    /// The compressed suffix array.
+    Compressed(BitArray, u8)
 }
 
-/// Enum that represents all possible commandline arguments
-#[derive(Parser, Debug)]
-pub struct Arguments {
-    /// File with the proteins used to build the suffix tree. All the proteins are expected to be
-    /// concatenated using a `#`.
-    #[arg(short, long)]
-    database_file:             String,
-    #[arg(short, long)]
-    search_file:               Option<String>,
-    #[arg(short, long)]
-    /// The taxonomy to be used as a tsv file. This is a preprocessed version of the NCBI taxonomy.
-    taxonomy:                  String,
-    /// This will only build the tree and stop after that is completed. Used during benchmarking.
-    #[arg(long)]
-    build_only:                bool,
-    /// Output file to store the built index.
-    #[arg(short, long)]
-    output:                    Option<String>,
-    /// The sparseness factor used on the suffix array (default value 1, which means every value in
-    /// the SA is used)
-    #[arg(long, default_value_t = 1)]
-    sparseness_factor:         u8,
-    /// Set the style used to map back from the suffix to the protein. 2 options <sparse> or
-    /// <dense>. Dense is default Dense uses O(n) memory with n the size of the input text, and
-    /// takes O(1) time to find the mapping Sparse uses O(m) memory with m the number of
-    /// proteins, and takes O(log m) to find the mapping
-    #[arg(long, value_enum, default_value_t = SuffixToProteinMappingStyle::Sparse)]
-    suffix_to_protein_mapping: SuffixToProteinMappingStyle,
-    #[arg(long)]
-    load_index:                Option<String>,
-    #[arg(short, long, value_enum, default_value_t = SAConstructionAlgorithm::LibSais)]
-    construction_algorithm:    SAConstructionAlgorithm,
-    /// Assume the resulting taxon ID is root (1) whenever a peptide matches >= cutoff proteins
-    #[arg(long, default_value_t = 10000)]
-    cutoff:                    usize,
-    #[arg(long)]
-    threads:                   Option<NonZeroUsize>,
-    #[arg(long)]
-    equalize_i_and_l:          bool,
-    #[arg(long)]
-    clean_taxa:                bool,
-    #[arg(long, value_enum, default_value_t = SearchMode::Analysis)]
-    search_mode:               SearchMode
-}
-
-/// Run the suffix array program
-///
-/// # Arguments
-/// * `args` - The commandline arguments provided to the program
-///
-/// # Returns
-///
-/// Returns Unit
-///
-/// # Errors
-///
-/// Returns all possible errors that occurred during the program
-pub fn run(mut args: Arguments) -> Result<(), Box<dyn Error>> {
-    let taxon_id_calculator =
-        TaxonAggregator::try_from_taxonomy_file(&args.taxonomy, AggregationMethod::LcaStar)?;
-
-    let sa = match &args.load_index {
-        // load SA from file
-        Some(index_file_name) => {
-            let (sparseness_factor, sa) = load_suffix_array(index_file_name)?;
-            args.sparseness_factor = sparseness_factor;
-            // TODO: some kind of security check that the loaded database file and SA match
-            sa
-        }
-        // build the SA
-        None => {
-            let protein_sequences =
-                Proteins::try_from_database_file(&args.database_file, &taxon_id_calculator)?;
-            build_sa(
-                &mut protein_sequences.input_string.clone(),
-                &args.construction_algorithm,
-                args.sparseness_factor
-            )?
+impl SuffixArray {
+    /// Returns the length of the suffix array.
+    ///
+    /// # Returns
+    ///
+    /// The length of the suffix array.
+    pub fn len(&self) -> usize {
+        match self {
+            SuffixArray::Original(sa, _) => sa.len(),
+            SuffixArray::Compressed(sa, _) => sa.len()
         }
-    };
-
-    let proteins = Proteins::try_from_database_file(&args.database_file, &taxon_id_calculator)?;
-
-    if let Some(output) = &args.output {
-        write_suffix_array(args.sparseness_factor, &sa, output)?;
     }
 
-    // option that only builds the tree, but does not allow for querying (easy for benchmark
-    // purposes)
-    if args.build_only {
-        return Ok(());
+    /// Returns the number of bits per value in the suffix array.
+    ///
+    /// # Returns
+    ///
+    /// The number of bits per value in the suffix array.
+    pub fn bits_per_value(&self) -> usize {
+        match self {
+            SuffixArray::Original(_, _) => 64,
+            SuffixArray::Compressed(sa, _) => sa.bits_per_value()
+        }
     }
 
-    // build the right mapping index, use box to be able to store both types in this variable
-    let suffix_index_to_protein: Box<dyn SuffixToProteinIndex> =
-        match args.suffix_to_protein_mapping {
-            SuffixToProteinMappingStyle::Dense => {
-                Box::new(DenseSuffixToProtein::new(&proteins.input_string))
-            }
-            SuffixToProteinMappingStyle::Sparse => {
-                Box::new(SparseSuffixToProtein::new(&proteins.input_string))
-            }
-        };
-
-    let functional_aggregator = FunctionAggregator {};
-
-    let searcher = Searcher::new(
-        sa,
-        args.sparseness_factor,
-        suffix_index_to_protein,
-        proteins,
-        taxon_id_calculator,
-        functional_aggregator
-    );
-
-    execute_search(&searcher, &args)?;
-    Ok(())
-}
-
-/// Execute the search using the provided programs
-///
-/// # Arguments
-/// * `searcher` - The Searcher which contains the protein database
-/// * `args` - The arguments used to start the program
-///
-/// # Returns
-///
-/// Returns Unit
-///
-/// # Errors
-///
-/// Returns possible errors that occurred during search
-fn execute_search(searcher: &Searcher, args: &Arguments) -> Result<(), Box<dyn Error>> {
-    let cutoff = args.cutoff;
-    let search_file = args
-        .search_file
-        .as_ref()
-        .ok_or("No peptide file provided to search in the database")?;
-
-    let start_time = get_time_ms()?;
-    let lines = read_lines(search_file)?;
-    let all_peptides: Vec<String> = lines.map_while(Result::ok).collect();
-
-    // Explicitly set the number of threads to use if the commandline argument was set
-    if let Some(threads) = args.threads {
-        rayon::ThreadPoolBuilder::new()
-            .num_threads(threads.get())
-            .build_global()?;
+    /// Returns the sample rate used for the suffix array.
+    ///
+    /// # Returns
+    ///
+    /// The sample rate used for the suffix array.
+    pub fn sample_rate(&self) -> u8 {
+        match self {
+            SuffixArray::Original(_, sample_rate) => *sample_rate,
+            SuffixArray::Compressed(_, sample_rate) => *sample_rate
+        }
     }
 
-    match args.search_mode {
-        SearchMode::Search => {
-            let search_result = search_all_peptides(
-                searcher,
-                &all_peptides,
-                cutoff,
-                args.equalize_i_and_l,
-                args.clean_taxa
-            );
-            println!("{}", serde_json::to_string(&search_result)?);
-        }
-        SearchMode::Analysis => {
-            let search_result = analyse_all_peptides(
-                searcher,
-                &all_peptides,
-                cutoff,
-                args.equalize_i_and_l,
-                args.clean_taxa
-            );
-            println!("{}", serde_json::to_string(&search_result)?);
+    /// Returns the suffix array value at the given index.
+    ///
+    /// # Arguments
+    ///
+    /// * `index` - The index of the suffix array.
+    ///
+    /// # Returns
+    ///
+    /// The suffix array at the given index.
+    pub fn get(&self, index: usize) -> i64 {
+        match self {
+            SuffixArray::Original(sa, _) => sa[index],
+            SuffixArray::Compressed(sa, _) => sa.get(index) as i64
         }
     }
 
-    let end_time = get_time_ms()?;
-
-    // output to other channel to prevent integrating it into the actual output
-    eprintln!("Spend {} ms to search the whole file", end_time - start_time);
-
-    Ok(())
+    /// Returns whether the suffix array is empty.
+    ///
+    /// # Returns
+    ///
+    /// Returns `true` if the suffix array is empty, `false` otherwise.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
 }
 
 /// Custom trait implemented by types that have a value that represents NULL
 pub trait Nullable<T> {
     const NULL: T;
 
+    /// Returns whether the value is NULL.
+    ///
+    /// # Returns
+    ///
+    /// True if the value is NULL, false otherwise.
     fn is_null(&self) -> bool;
 }
 
+/// Implementation of the `Nullable` trait for the `u32` type.
 impl Nullable<u32> for u32 {
     const NULL: u32 = u32::MAX;
 
@@ -250,3 +96,83 @@ impl Nullable<u32> for u32 {
         *self == Self::NULL
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_suffix_array_original() {
+        let sa = SuffixArray::Original(vec![1, 2, 3, 4, 5], 1);
+        assert_eq!(sa.len(), 5);
+        assert_eq!(sa.get(0), 1);
+        assert_eq!(sa.get(1), 2);
+        assert_eq!(sa.get(2), 3);
+        assert_eq!(sa.get(3), 4);
+        assert_eq!(sa.get(4), 5);
+    }
+
+    #[test]
+    fn test_suffix_array_compressed() {
+        let mut bitarray = BitArray::with_capacity(5, 40);
+        bitarray.set(0, 1);
+        bitarray.set(1, 2);
+        bitarray.set(2, 3);
+        bitarray.set(3, 4);
+        bitarray.set(4, 5);
+
+        let sa = SuffixArray::Compressed(bitarray, 1);
+        assert_eq!(sa.len(), 5);
+        assert_eq!(sa.get(0), 1);
+        assert_eq!(sa.get(1), 2);
+        assert_eq!(sa.get(2), 3);
+        assert_eq!(sa.get(3), 4);
+        assert_eq!(sa.get(4), 5);
+    }
+
+    #[test]
+    fn test_suffix_array_len() {
+        let sa = SuffixArray::Original(vec![1, 2, 3, 4, 5], 1);
+        assert_eq!(sa.len(), 5);
+
+        let bitarray = BitArray::with_capacity(5, 40);
+        let sa = SuffixArray::Compressed(bitarray, 1);
+        assert_eq!(sa.len(), 5);
+    }
+
+    #[test]
+    fn test_suffix_array_bits_per_value() {
+        let sa = SuffixArray::Original(vec![1, 2, 3, 4, 5], 1);
+        assert_eq!(sa.bits_per_value(), 64);
+
+        let bitarray = BitArray::with_capacity(5, 40);
+        let sa = SuffixArray::Compressed(bitarray, 1);
+        assert_eq!(sa.bits_per_value(), 40);
+    }
+
+    #[test]
+    fn test_suffix_array_sample_rate() {
+        let sa = SuffixArray::Original(vec![1, 2, 3, 4, 5], 1);
+        assert_eq!(sa.sample_rate(), 1);
+
+        let bitarray = BitArray::with_capacity(5, 40);
+        let sa = SuffixArray::Compressed(bitarray, 1);
+        assert_eq!(sa.sample_rate(), 1);
+    }
+
+    #[test]
+    fn test_suffix_array_is_empty() {
+        let sa = SuffixArray::Original(vec![], 1);
+        assert_eq!(sa.is_empty(), true);
+
+        let bitarray = BitArray::with_capacity(0, 0);
+        let sa = SuffixArray::Compressed(bitarray, 1);
+        assert_eq!(sa.is_empty(), true);
+    }
+
+    #[test]
+    fn test_nullable_is_null() {
+        assert_eq!(u32::NULL.is_null(), true);
+        assert_eq!(0u32.is_null(), false);
+    }
+}
diff --git a/sa-index/src/main.rs b/sa-index/src/main.rs
deleted file mode 100644
index 8fe7fad..0000000
--- a/sa-index/src/main.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-use clap::Parser;
-use sa_index::{
-    run,
-    Arguments
-};
-
-fn main() {
-    let args = Arguments::parse();
-    if let Err(error) = run(args) {
-        eprintln!("{}", error);
-        std::process::exit(1);
-    };
-}
diff --git a/sa-index/src/peptide_search.rs b/sa-index/src/peptide_search.rs
index 8e2c0dd..7d83914 100644
--- a/sa-index/src/peptide_search.rs
+++ b/sa-index/src/peptide_search.rs
@@ -70,7 +70,7 @@ pub fn search_proteins_for_peptide<'a>(
     let peptide = peptide.strip_suffix('\n').unwrap_or(peptide).to_uppercase();
 
     // words that are shorter than the sample rate are not searchable
-    if peptide.len() < searcher.sparseness_factor as usize {
+    if peptide.len() < searcher.sa.sample_rate() as usize {
         return None;
     }
 
@@ -84,6 +84,7 @@ pub fn search_proteins_for_peptide<'a>(
         }
         SearchAllSuffixesResult::SearchResult(matched_suffixes) => matched_suffixes,
         SearchAllSuffixesResult::NoMatches => {
+            eprintln!("No matches found for peptide: {}", peptide);
             return None;
         }
     };
@@ -277,3 +278,72 @@ pub fn search_all_peptides(
         result: res
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn assert_json_eq(generated_json: &str, expected_json: &str) {
+        assert_eq!(
+            generated_json.parse::<serde_json::Value>().unwrap(),
+            expected_json.parse::<serde_json::Value>().unwrap(),
+        );
+    }
+
+    #[test]
+    fn test_serialize_output_data() {
+        let output_data = OutputData {
+            result: vec![1, 2, 3]
+        };
+
+        let generated_json = serde_json::to_string(&output_data).unwrap();
+        let expected_json = "{\"result\":[1,2,3]}";
+
+        assert_json_eq(&generated_json, expected_json);
+    }
+
+    #[test]
+    fn test_serialize_search_result_with_analysis() {
+        let search_result = SearchResultWithAnalysis {
+            sequence: "MSKIAALLPSV".to_string(),
+            lca: Some(1),
+            taxa: vec![1, 2, 3],
+            uniprot_accession_numbers: vec!["P12345".to_string(), "P23456".to_string()],
+            fa: None,
+            cutoff_used: true
+        };
+
+        let generated_json = serde_json::to_string(&search_result).unwrap();
+        let expected_json = "{\"sequence\":\"MSKIAALLPSV\",\"lca\":1,\"taxa\":[1,2,3],\"uniprot_accession_numbers\":[\"P12345\",\"P23456\"],\"fa\":null,\"cutoff_used\":true}";
+
+        assert_json_eq(&generated_json, expected_json);
+    }
+
+    #[test]
+    fn test_serialize_protein_info() {
+        let protein_info = ProteinInfo {
+            taxon:                  1,
+            uniprot_accession:      "P12345".to_string(),
+            functional_annotations: vec!["GO:0001234".to_string(), "GO:0005678".to_string()]
+        };
+
+        let generated_json = serde_json::to_string(&protein_info).unwrap();
+        let expected_json = "{\"taxon\":1,\"uniprot_accession\":\"P12345\",\"functional_annotations\":[\"GO:0001234\",\"GO:0005678\"]}";
+
+        assert_json_eq(&generated_json, expected_json);
+    }
+
+    #[test]
+    fn test_serialize_search_only_result() {
+        let search_result = SearchOnlyResult {
+            sequence:    "MSKIAALLPSV".to_string(),
+            proteins:    vec![],
+            cutoff_used: true
+        };
+
+        let generated_json = serde_json::to_string(&search_result).unwrap();
+        let expected_json = "{\"sequence\":\"MSKIAALLPSV\",\"proteins\":[],\"cutoff_used\":true}";
+
+        assert_json_eq(&generated_json, expected_json);
+    }
+}
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 78cc043..29bbc9a 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -19,7 +19,8 @@ use crate::{
         Minimum
     },
     suffix_to_protein_index::SuffixToProteinIndex,
-    Nullable
+    Nullable,
+    SuffixArray
 };
 
 /// Enum indicating if we are searching for the minimum, or maximum bound in the suffix array
@@ -100,12 +101,11 @@ impl PartialEq for SearchAllSuffixesResult {
 /// * `function_aggregator` - Object used to retrieve the functional annotations and to calculate
 ///   the functional analysis provided by Unipept
 pub struct Searcher {
-    sa: Vec<i64>,
-    pub sparseness_factor: u8,
-    suffix_index_to_protein: Box<dyn SuffixToProteinIndex>,
-    proteins: Proteins,
-    taxon_id_calculator: TaxonAggregator,
-    function_aggregator: FunctionAggregator
+    pub sa: SuffixArray,
+    pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>,
+    pub proteins: Proteins,
+    pub taxon_id_calculator: TaxonAggregator,
+    pub function_aggregator: FunctionAggregator
 }
 
 impl Searcher {
@@ -126,8 +126,7 @@ impl Searcher {
     ///
     /// Returns a new Searcher object
     pub fn new(
-        sa: Vec<i64>,
-        sparseness_factor: u8,
+        sa: SuffixArray,
         suffix_index_to_protein: Box<dyn SuffixToProteinIndex>,
         proteins: Proteins,
         taxon_id_calculator: TaxonAggregator,
@@ -135,7 +134,6 @@ impl Searcher {
     ) -> Self {
         Self {
             sa,
-            sparseness_factor,
             suffix_index_to_protein,
             proteins,
             taxon_id_calculator,
@@ -240,7 +238,8 @@ impl Searcher {
         while right - left > 1 {
             let center = (left + right) / 2;
             let skip = min(lcp_left, lcp_right);
-            let (retval, lcp_center) = self.compare(search_string, self.sa[center], skip, bound);
+            let (retval, lcp_center) =
+                self.compare(search_string, self.sa.get(center), skip, bound);
 
             found |= lcp_center == search_string.len();
 
@@ -258,7 +257,7 @@ impl Searcher {
         // handle edge case to search at index 0
         if right == 1 && left == 0 {
             let (retval, lcp_center) =
-                self.compare(search_string, self.sa[0], min(lcp_left, lcp_right), bound);
+                self.compare(search_string, self.sa.get(0), min(lcp_left, lcp_right), bound);
 
             found |= lcp_center == search_string.len();
 
@@ -322,7 +321,7 @@ impl Searcher {
         }
 
         let mut skip: usize = 0;
-        while skip < self.sparseness_factor as usize {
+        while skip < self.sa.sample_rate() as usize {
             let mut il_locations_start = 0;
             while il_locations_start < il_locations.len() && il_locations[il_locations_start] < skip
             {
@@ -339,7 +338,7 @@ impl Searcher {
                 // array (stop when our max number of matches is reached)
                 let mut sa_index = min_bound;
                 while sa_index < max_bound {
-                    let suffix = self.sa[sa_index] as usize;
+                    let suffix = self.sa.get(sa_index) as usize;
                     // filter away matches where I was wrongfully equalized to L, and check the
                     // unmatched prefix when I and L equalized, we only need to
                     // check the prefix, not the whole match, when the prefix is 0, we don't need to
@@ -549,20 +548,55 @@ impl Searcher {
 
 #[cfg(test)]
 mod tests {
-    use sa_mappings::functionality::FunctionAggregator;
-    use sa_mappings::proteins::{Protein, Proteins};
-    use sa_mappings::taxonomy::{AggregationMethod, TaxonAggregator};
-    use crate::sa_searcher::{
-        BoundSearchResult, SearchAllSuffixesResult, Searcher,
-    };
-    use crate::suffix_to_protein_index::SparseSuffixToProtein;
-    use tempdir::TempDir;
     use std::{
         fs::File,
         io::Write,
         path::PathBuf
     };
 
+    use sa_mappings::{
+        functionality::FunctionAggregator,
+        proteins::{
+            Protein,
+            Proteins
+        },
+        taxonomy::{
+            AggregationMethod,
+            TaxonAggregator
+        }
+    };
+    use tempdir::TempDir;
+
+    use crate::{
+        sa_searcher::{
+            BoundSearchResult,
+            SearchAllSuffixesResult,
+            Searcher
+        },
+        suffix_to_protein_index::SparseSuffixToProtein,
+        SuffixArray
+    };
+
+    #[test]
+    fn test_partial_eq_search_all_suffixes_result() {
+        let search_all_suffixes_result_1 = SearchAllSuffixesResult::SearchResult(vec![1, 2, 3]);
+        let search_all_suffixes_result_2 = SearchAllSuffixesResult::SearchResult(vec![3, 2, 1]);
+        let search_all_suffixes_result_3 = SearchAllSuffixesResult::SearchResult(vec![1, 2, 4]);
+        let search_all_suffixes_result_4 = SearchAllSuffixesResult::MaxMatches(vec![1, 2, 3]);
+        let search_all_suffixes_result_5 = SearchAllSuffixesResult::MaxMatches(vec![3, 2, 1]);
+        let search_all_suffixes_result_6 = SearchAllSuffixesResult::MaxMatches(vec![1, 2, 4]);
+        let search_all_suffixes_result_7 = SearchAllSuffixesResult::NoMatches;
+        let search_all_suffixes_result_8 = SearchAllSuffixesResult::NoMatches;
+
+        assert_eq!(search_all_suffixes_result_1, search_all_suffixes_result_2);
+        assert_ne!(search_all_suffixes_result_1, search_all_suffixes_result_3);
+        assert_eq!(search_all_suffixes_result_4, search_all_suffixes_result_5);
+        assert_ne!(search_all_suffixes_result_4, search_all_suffixes_result_6);
+        assert_eq!(search_all_suffixes_result_7, search_all_suffixes_result_8);
+        assert_ne!(search_all_suffixes_result_1, search_all_suffixes_result_7);
+        assert_ne!(search_all_suffixes_result_4, search_all_suffixes_result_7);
+    }
+
     fn create_taxonomy_file(tmp_dir: &TempDir) -> PathBuf {
         let taxonomy_file = tmp_dir.path().join("taxonomy.tsv");
         let mut file = File::create(&taxonomy_file).unwrap();
@@ -585,52 +619,55 @@ mod tests {
         taxonomy_file
     }
 
-
     fn get_example_proteins() -> Proteins {
         let text = "AI-BLACVAA-AC-KCRLZ$".to_string().into_bytes();
         Proteins {
             input_string: text,
-            proteins: vec![
+            proteins:     vec![
                 Protein {
-                    uniprot_id: String::new(),
-                    taxon_id: 0,
-                    functional_annotations: vec![],
+                    uniprot_id:             String::new(),
+                    taxon_id:               0,
+                    functional_annotations: vec![]
                 },
                 Protein {
-                    uniprot_id: String::new(),
-                    taxon_id: 0,
-                    functional_annotations: vec![],
+                    uniprot_id:             String::new(),
+                    taxon_id:               0,
+                    functional_annotations: vec![]
                 },
                 Protein {
-                    uniprot_id: String::new(),
-                    taxon_id: 0,
-                    functional_annotations: vec![],
+                    uniprot_id:             String::new(),
+                    taxon_id:               0,
+                    functional_annotations: vec![]
                 },
                 Protein {
-                    uniprot_id: String::new(),
-                    taxon_id: 0,
-                    functional_annotations: vec![],
+                    uniprot_id:             String::new(),
+                    taxon_id:               0,
+                    functional_annotations: vec![]
                 },
-            ],
+            ]
         }
     }
 
     #[test]
     fn test_search_simple() {
         let proteins = get_example_proteins();
-        let sa = vec![
-            19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18,
-        ];
+        let sa = SuffixArray::Original(
+            vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18],
+            1
+        );
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
         let searcher = Searcher::new(
             sa,
-            1,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
@@ -650,52 +687,53 @@ mod tests {
     #[test]
     fn test_search_sparse() {
         let proteins = get_example_proteins();
-        let sa = vec![9, 0, 3, 12, 15, 6, 18];
+        let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
         let searcher = Searcher::new(
             sa,
-            3,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         // search suffix 'VAA'
         let found_suffixes =
             searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![7])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![7]));
 
         // search suffix 'AC'
         let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![5, 11])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![5, 11]));
     }
 
     #[test]
     fn test_il_equality() {
         let proteins = get_example_proteins();
-        let sa = vec![
-            19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18,
-        ];
+        let sa = SuffixArray::Original(
+            vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18],
+            1
+        );
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
         let searcher = Searcher::new(
             sa,
-            1,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
@@ -710,27 +748,27 @@ mod tests {
     #[test]
     fn test_il_equality_sparse() {
         let proteins = get_example_proteins();
-        let sa = vec![9, 0, 3, 12, 15, 6, 18];
+        let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
         let searcher = Searcher::new(
             sa,
-            3,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         // search bounds 'RIZ' with equal I and L
         let found_suffixes =
             searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, true);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![16])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![16]));
 
         // search bounds 'RIZ' without equal I and L
         let found_suffixes =
@@ -745,32 +783,32 @@ mod tests {
 
         let proteins = Proteins {
             input_string: text,
-            proteins: vec![Protein {
-                uniprot_id: String::new(),
-                taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+            proteins:     vec![Protein {
+                uniprot_id:             String::new(),
+                taxon_id:               0,
+                functional_annotations: vec![]
+            }]
         };
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
-        let sparse_sa = vec![0, 2, 4];
+        let sparse_sa = SuffixArray::Original(vec![0, 2, 4], 2);
         let searcher = Searcher::new(
             sparse_sa,
-            2,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         // search bounds 'IM' with equal I and L
         let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'M'], usize::MAX, true);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![0])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0]));
     }
 
     #[test]
@@ -779,31 +817,31 @@ mod tests {
 
         let proteins = Proteins {
             input_string: text,
-            proteins: vec![Protein {
-                uniprot_id: String::new(),
-                taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+            proteins:     vec![Protein {
+                uniprot_id:             String::new(),
+                taxon_id:               0,
+                functional_annotations: vec![]
+            }]
         };
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
-        let sparse_sa = vec![6, 0, 1, 5, 4, 3, 2];
+        let sparse_sa = SuffixArray::Original(vec![6, 0, 1, 5, 4, 3, 2], 1);
         let searcher = Searcher::new(
             sparse_sa,
-            1,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         let found_suffixes = searcher.search_matching_suffixes(&[b'I'], usize::MAX, true);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![2, 3, 4, 5])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![2, 3, 4, 5]));
     }
 
     #[test]
@@ -812,31 +850,31 @@ mod tests {
 
         let proteins = Proteins {
             input_string: text,
-            proteins: vec![Protein {
-                uniprot_id: String::new(),
-                taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+            proteins:     vec![Protein {
+                uniprot_id:             String::new(),
+                taxon_id:               0,
+                functional_annotations: vec![]
+            }]
         };
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
-        let sparse_sa = vec![6, 5, 4, 3, 2, 1, 0];
+        let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
         let searcher = Searcher::new(
             sparse_sa,
-            1,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4]));
     }
 
     #[test]
@@ -845,33 +883,33 @@ mod tests {
 
         let proteins = Proteins {
             input_string: text,
-            proteins: vec![Protein {
-                uniprot_id: String::new(),
-                taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+            proteins:     vec![Protein {
+                uniprot_id:             String::new(),
+                taxon_id:               0,
+                functional_annotations: vec![]
+            }]
         };
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
-        let sparse_sa = vec![6, 4, 2, 0];
+        let sparse_sa = SuffixArray::Original(vec![6, 4, 2, 0], 2);
         let searcher = Searcher::new(
             sparse_sa,
-            2,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         // search all places where II is in the string IIIILL, but with a sparse SA
         // this way we check if filtering the suffixes works as expected
         let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, false);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![0, 1, 2])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2]));
     }
 
     #[test]
@@ -880,31 +918,31 @@ mod tests {
 
         let proteins = Proteins {
             input_string: text,
-            proteins: vec![Protein {
-                uniprot_id: String::new(),
-                taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+            proteins:     vec![Protein {
+                uniprot_id:             String::new(),
+                taxon_id:               0,
+                functional_annotations: vec![]
+            }]
         };
 
         let tmp_dir = TempDir::new("test_try_from_taxonomy_file").unwrap();
         let taxonomy_file = create_taxonomy_file(&tmp_dir);
 
-        let sparse_sa = vec![6, 5, 4, 3, 2, 1, 0];
+        let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
         let searcher = Searcher::new(
             sparse_sa,
-            1,
             Box::new(SparseSuffixToProtein::new(&proteins.input_string)),
             proteins,
-            TaxonAggregator::try_from_taxonomy_file(taxonomy_file.to_str().unwrap(), AggregationMethod::LcaStar).unwrap(),
+            TaxonAggregator::try_from_taxonomy_file(
+                taxonomy_file.to_str().unwrap(),
+                AggregationMethod::LcaStar
+            )
+            .unwrap(),
             FunctionAggregator {}
         );
 
         // search bounds 'IM' with equal I and L
         let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true);
-        assert_eq!(
-            found_suffixes,
-            SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4])
-        );
+        assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![0, 1, 2, 3, 4]));
     }
-}
\ No newline at end of file
+}
diff --git a/sa-index/src/suffix_to_protein_index.rs b/sa-index/src/suffix_to_protein_index.rs
index 4144f38..0091fed 100644
--- a/sa-index/src/suffix_to_protein_index.rs
+++ b/sa-index/src/suffix_to_protein_index.rs
@@ -116,6 +116,7 @@ impl SparseSuffixToProtein {
 
 #[cfg(test)]
 mod tests {
+    use clap::ValueEnum;
     use sa_mappings::proteins::{
         SEPARATION_CHARACTER,
         TERMINATION_CHARACTER
@@ -125,7 +126,8 @@ mod tests {
         suffix_to_protein_index::{
             DenseSuffixToProtein,
             SparseSuffixToProtein,
-            SuffixToProteinIndex
+            SuffixToProteinIndex,
+            SuffixToProteinMappingStyle
         },
         Nullable
     };
@@ -136,6 +138,18 @@ mod tests {
         text.into_bytes()
     }
 
+    #[test]
+    fn test_suffix_to_protein_mapping_style() {
+        assert_eq!(
+            SuffixToProteinMappingStyle::Dense,
+            SuffixToProteinMappingStyle::from_str("dense", false).unwrap()
+        );
+        assert_eq!(
+            SuffixToProteinMappingStyle::Sparse,
+            SuffixToProteinMappingStyle::from_str("sparse", false).unwrap()
+        );
+    }
+
     #[test]
     fn test_dense_build() {
         let u8_text = &build_text();
diff --git a/sa-index/src/util.rs b/sa-index/src/util.rs
deleted file mode 100644
index 2f3f467..0000000
--- a/sa-index/src/util.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-use std::{
-    fs::File,
-    io,
-    io::BufRead,
-    path::Path,
-    time::{
-        SystemTime,
-        SystemTimeError,
-        UNIX_EPOCH
-    }
-};
-
-use crate::sa_searcher::Searcher;
-
-/// Gets the current time in ms
-///
-/// # Returns
-///
-/// Returns the current time in ms
-///
-/// # Errors
-///
-/// Returns a SystemTimeError if getting the current time somehow fails
-#[allow(unused)]
-pub fn get_time_ms() -> Result<f64, SystemTimeError> {
-    Ok(SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos() as f64 * 1e-6)
-}
-
-/// Times how long the function `f`, that has the searcher as only argument executed
-///
-/// # Returns
-///
-/// Returns the execution time of `f`in ms
-///
-/// # Errors
-///
-/// Returns a SystemTimeError if getting the start or end time failed
-#[allow(unused)]
-pub fn time_execution(
-    searcher: &mut Searcher,
-    f: &dyn Fn(&mut Searcher) -> bool
-) -> Result<(bool, f64), SystemTimeError> {
-    let start_ms = get_time_ms()?;
-    let found = f(searcher);
-    let end_ms = get_time_ms()?;
-    Ok((found, end_ms - start_ms))
-}
-
-/// Opens `filename` and creates an iterator over it per line
-///
-/// # Arguments
-/// * `filename` - The file we want to iterate over per line
-///
-/// # Returns
-///
-/// Returns an Iterator to the Reader of the lines of the file.
-pub fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
-where
-    P: AsRef<Path>
-{
-    let file = File::open(filename)?;
-    Ok(io::BufReader::new(file).lines())
-}
diff --git a/sa-mappings/src/functionality.rs b/sa-mappings/src/functionality.rs
index 8b60eb6..45c446e 100644
--- a/sa-mappings/src/functionality.rs
+++ b/sa-mappings/src/functionality.rs
@@ -14,9 +14,9 @@ use crate::proteins::Protein;
 #[derive(Debug, Serialize)]
 pub struct FunctionalAggregation {
     /// A HashMap representing how many GO, EC and IPR terms were found
-    pub counts: HashMap<String, usize>,
+    counts: HashMap<String, usize>,
     /// A HashMap representing how often a certain functional annotation was found
-    pub data:   HashMap<String, u32>
+    data:   HashMap<String, u32>
 }
 
 /// A struct that represents a function aggregator
@@ -32,6 +32,9 @@ impl FunctionAggregator {
     ///
     /// Returns a JSON string containing the aggregated functional annotations
     pub fn aggregate(&self, proteins: Vec<&Protein>) -> FunctionalAggregation {
+        // Keep track of the proteins that have any annotation
+        let mut proteins_with_annotations: HashSet<String> = HashSet::new();
+
         // Keep track of the proteins that have a certain annotation
         let mut proteins_with_ec: HashSet<String> = HashSet::new();
         let mut proteins_with_go: HashSet<String> = HashSet::new();
@@ -43,10 +46,19 @@ impl FunctionAggregator {
         for protein in proteins.iter() {
             for annotation in protein.get_functional_annotations().split(';') {
                 match annotation.chars().next() {
-                    Some('E') => proteins_with_ec.insert(protein.uniprot_id.clone()),
-                    Some('G') => proteins_with_go.insert(protein.uniprot_id.clone()),
-                    Some('I') => proteins_with_ipr.insert(protein.uniprot_id.clone()),
-                    _ => false
+                    Some('E') => {
+                        proteins_with_ec.insert(protein.uniprot_id.clone());
+                        proteins_with_annotations.insert(protein.uniprot_id.clone());
+                    }
+                    Some('G') => {
+                        proteins_with_go.insert(protein.uniprot_id.clone());
+                        proteins_with_annotations.insert(protein.uniprot_id.clone());
+                    }
+                    Some('I') => {
+                        proteins_with_ipr.insert(protein.uniprot_id.clone());
+                        proteins_with_annotations.insert(protein.uniprot_id.clone());
+                    }
+                    _ => {}
                 };
 
                 data.entry(annotation.to_string())
@@ -56,7 +68,7 @@ impl FunctionAggregator {
         }
 
         let mut counts: HashMap<String, usize> = HashMap::new();
-        counts.insert("all".to_string(), proteins.len());
+        counts.insert("all".to_string(), proteins_with_annotations.len());
         counts.insert("EC".to_string(), proteins_with_ec.len());
         counts.insert("GO".to_string(), proteins_with_go.len());
         counts.insert("IPR".to_string(), proteins_with_ipr.len());
@@ -100,30 +112,51 @@ mod tests {
     #[test]
     fn test_aggregate() {
         let mut proteins: Vec<Protein> = Vec::new();
-        proteins.push(Protein {
+
+        let protein1 = Protein {
             uniprot_id:             "P12345".to_string(),
             taxon_id:               9606,
             functional_annotations: encode("GO:0001234;GO:0005678")
-        });
-        proteins.push(Protein {
+        };
+        let protein2 = Protein {
             uniprot_id:             "P23456".to_string(),
             taxon_id:               9606,
             functional_annotations: encode("EC:1.1.1.-")
-        });
+        };
+        let protein3 = Protein {
+            uniprot_id:             "P23876".to_string(),
+            taxon_id:               9606,
+            functional_annotations: encode("IPR:IPR123456;EC:1.1.1.-")
+        };
+        let protein4 = Protein {
+            uniprot_id:             "P23877".to_string(),
+            taxon_id:               9606,
+            functional_annotations: encode("2345")
+        };
+
+        proteins.push(protein1);
+        proteins.push(protein2);
+        proteins.push(protein3);
+        proteins.push(protein4);
 
         let function_aggregator = FunctionAggregator {};
 
         let result = function_aggregator.aggregate(proteins.iter().collect());
 
-        assert_eq!(result.counts.get("all"), Some(&2));
-        assert_eq!(result.counts.get("EC"), Some(&1));
+        assert_eq!(result.counts.get("all"), Some(&3));
+        assert_eq!(result.counts.get("EC"), Some(&2));
         assert_eq!(result.counts.get("GO"), Some(&1));
-        assert_eq!(result.counts.get("IPR"), Some(&0));
+        assert_eq!(result.counts.get("IPR"), Some(&1));
         assert_eq!(result.counts.get("NOTHING"), None);
 
-        assert_eq!(result.data.get("GO:0001234"), Some(&1));
-        assert_eq!(result.data.get("GO:0005678"), Some(&1));
-        assert_eq!(result.data.get("EC:1.1.1.-"), Some(&1));
+        assert_eq!(result.data, {
+            let mut map = HashMap::new();
+            map.insert("GO:0001234".to_string(), 1);
+            map.insert("GO:0005678".to_string(), 1);
+            map.insert("EC:1.1.1.-".to_string(), 2);
+            map.insert("IPR:IPR123456".to_string(), 1);
+            map
+        });
         assert_eq!(result.data.get("EC:1.1.2.-"), None);
     }
 
@@ -141,17 +174,24 @@ mod tests {
             taxon_id:               9606,
             functional_annotations: encode("EC:1.1.1.-")
         };
+        let protein3 = Protein {
+            uniprot_id:             "P23876".to_string(),
+            taxon_id:               9606,
+            functional_annotations: encode("IPR:IPR123456;EC:1.1.1.-")
+        };
 
         proteins.push(&protein1);
         proteins.push(&protein2);
+        proteins.push(&protein3);
 
         let function_aggregator = FunctionAggregator {};
 
         let result = function_aggregator.get_all_functional_annotations(proteins.as_slice());
 
-        assert_eq!(result.len(), 2);
+        assert_eq!(result.len(), 3);
         assert_eq!(result[0].len(), 2);
         assert_eq!(result[1].len(), 1);
+        assert_eq!(result[2].len(), 2);
     }
 
     #[test]
diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs
index 900c531..e1d2f49 100644
--- a/sa-mappings/src/proteins.rs
+++ b/sa-mappings/src/proteins.rs
@@ -10,7 +10,10 @@ use std::{
 };
 
 use bytelines::ByteLines;
-use fa_compression::algorithm1::decode;
+use fa_compression::algorithm1::{
+    decode,
+    encode
+};
 use umgap::taxon::TaxonId;
 
 use crate::taxonomy::TaxonAggregator;
@@ -84,7 +87,7 @@ impl Proteins {
             let uniprot_id = from_utf8(fields.next().unwrap())?;
             let taxon_id = from_utf8(fields.next().unwrap())?.parse::<TaxonId>()?;
             let sequence = from_utf8(fields.next().unwrap())?;
-            let functional_annotations: Vec<u8> = fields.next().unwrap().to_vec();
+            let functional_annotations: Vec<u8> = encode(from_utf8(fields.next().unwrap())?);
 
             if !taxon_aggregator.taxon_exists(taxon_id) {
                 continue;
@@ -185,26 +188,21 @@ mod tests {
         let database_file = tmp_dir.path().join("database.tsv");
         let mut file = File::create(&database_file).unwrap();
 
-        file.write("P12345\t1\tMLPGLALLLLAAWTARALEV\t".as_bytes())
-            .unwrap();
-        file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27])
-            .unwrap();
-        file.write("\n".as_bytes()).unwrap();
-        file.write("P54321\t2\tPTDGNAGLLAEPQIAMFCGRLNMHMNVQNG\t".as_bytes())
-            .unwrap();
-        file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27])
-            .unwrap();
-        file.write("\n".as_bytes()).unwrap();
-        file.write("P67890\t6\tKWDSDPSGTKTCIDT\t".as_bytes())
-            .unwrap();
-        file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27])
-            .unwrap();
-        file.write("\n".as_bytes()).unwrap();
-        file.write("P13579\t17\tKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH\t".as_bytes())
-            .unwrap();
-        file.write_all(&[0xD1, 0x11, 0xA3, 0x8A, 0xD1, 0x27, 0x47, 0x5E, 0x11, 0x99, 0x27])
+        file.write(
+            "P12345\t1\tMLPGLALLLLAAWTARALEV\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n".as_bytes()
+        )
+        .unwrap();
+        file.write(
+            "P54321\t2\tPTDGNAGLLAEPQIAMFCGRLNMHMNVQNG\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n"
+                .as_bytes()
+        )
+        .unwrap();
+        file.write(
+            "P67890\t6\tKWDSDPSGTKTCIDT\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n".as_bytes()
+        )
+        .unwrap();
+        file.write("P13579\t17\tKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n".as_bytes())
             .unwrap();
-        file.write("\n".as_bytes()).unwrap();
 
         database_file
     }
diff --git a/sa-mappings/src/taxonomy.rs b/sa-mappings/src/taxonomy.rs
index a1aabb7..70c65f6 100644
--- a/sa-mappings/src/taxonomy.rs
+++ b/sa-mappings/src/taxonomy.rs
@@ -15,6 +15,7 @@ use umgap::{
     },
     taxon::{
         read_taxa_file,
+        Taxon,
         TaxonId,
         TaxonList,
         TaxonTree
@@ -24,11 +25,9 @@ use umgap::{
 /// A struct that represents a taxon aggregator.
 pub struct TaxonAggregator {
     /// A vector that contains the snapped taxon IDs.
-    snapping: Vec<Option<TaxonId>>,
-
+    snapping:   Vec<Option<TaxonId>>,
     /// The aggregator used to aggregate taxon IDs.
     aggregator: Box<dyn MultiThreadSafeAggregator>,
-
     /// The taxon list.
     taxon_list: TaxonList
 }
@@ -43,6 +42,33 @@ pub enum AggregationMethod {
 }
 
 impl TaxonAggregator {
+    /// Creates a new `TaxonAggregator` with the given taxa and aggregation method.
+    ///
+    /// # Arguments
+    ///
+    /// * `taxa` - A vector of `Taxon` objects representing the taxa.
+    /// * `method` - An `AggregationMethod` enum specifying the aggregation method to use.
+    ///
+    /// # Returns
+    ///
+    /// Returns a new `TaxonAggregator` instance.
+    pub fn new(taxa: Vec<Taxon>, method: AggregationMethod) -> Self {
+        let taxon_tree = TaxonTree::new(&taxa);
+        let taxon_list = TaxonList::new(taxa);
+        let snapping = taxon_tree.snapping(&taxon_list, true);
+
+        let aggregator: Box<dyn MultiThreadSafeAggregator> = match method {
+            AggregationMethod::Lca => Box::new(MixCalculator::new(taxon_tree, 1.0)),
+            AggregationMethod::LcaStar => Box::new(LCACalculator::new(taxon_tree))
+        };
+
+        Self {
+            snapping,
+            aggregator,
+            taxon_list
+        }
+    }
+
     /// Creates a new `TaxonAggregator` from a taxonomy file and an aggregation method.
     ///
     /// # Arguments
@@ -62,20 +88,7 @@ impl TaxonAggregator {
         method: AggregationMethod
     ) -> Result<Self, Box<dyn Error>> {
         let taxons = read_taxa_file(file)?;
-        let taxon_tree = TaxonTree::new(&taxons);
-        let taxon_list = TaxonList::new(taxons);
-        let snapping = taxon_tree.snapping(&taxon_list, true);
-
-        let aggregator: Box<dyn MultiThreadSafeAggregator> = match method {
-            AggregationMethod::Lca => Box::new(MixCalculator::new(taxon_tree, 1.0)),
-            AggregationMethod::LcaStar => Box::new(LCACalculator::new(taxon_tree))
-        };
-
-        Ok(Self {
-            snapping,
-            aggregator,
-            taxon_list
-        })
+        Ok(Self::new(taxons, method))
     }
 
     /// Checks if a taxon exists in the taxon list.
@@ -157,6 +170,7 @@ mod tests {
     };
 
     use tempdir::TempDir;
+    use umgap::rank::Rank;
 
     use super::*;
 
@@ -183,6 +197,30 @@ mod tests {
         taxonomy_file
     }
 
+    #[test]
+    fn test_new() {
+        TaxonAggregator::new(
+            vec![
+                Taxon::new(1, "root".to_string(), Rank::NoRank, 1, true),
+                Taxon::new(2, "Bacteria".to_string(), Rank::Superkingdom, 1, true),
+                Taxon::new(6, "Azorhizobium".to_string(), Rank::Genus, 1, true),
+                Taxon::new(7, "Azorhizobium caulinodans".to_string(), Rank::Species, 6, true),
+                Taxon::new(9, "Buchnera aphidicola".to_string(), Rank::Species, 6, true),
+                Taxon::new(10, "Cellvibrio".to_string(), Rank::Genus, 6, true),
+                Taxon::new(11, "Cellulomonas gilvus".to_string(), Rank::Species, 10, true),
+                Taxon::new(13, "Dictyoglomus".to_string(), Rank::Genus, 11, true),
+                Taxon::new(14, "Dictyoglomus thermophilum".to_string(), Rank::Species, 10, true),
+                Taxon::new(16, "Methylophilus".to_string(), Rank::Genus, 14, true),
+                Taxon::new(17, "Methylophilus methylotrophus".to_string(), Rank::Species, 16, true),
+                Taxon::new(18, "Pelobacter".to_string(), Rank::Genus, 17, true),
+                Taxon::new(19, "Syntrophotalea carbinolica".to_string(), Rank::Species, 17, true),
+                Taxon::new(20, "Phenylobacterium".to_string(), Rank::Genus, 19, true),
+                Taxon::new(21, "Invalid".to_string(), Rank::Species, 19, false),
+            ],
+            AggregationMethod::Lca
+        );
+    }
+
     #[test]
     fn test_try_from_taxonomy_file() {
         // Create a temporary directory for this test
diff --git a/sa-server/Cargo.toml b/sa-server/Cargo.toml
index c1920f5..935e36b 100644
--- a/sa-server/Cargo.toml
+++ b/sa-server/Cargo.toml
@@ -12,4 +12,5 @@ tokio = { version = "1.36.0", features = ["rt", "rt-multi-thread", "macros"] }
 sa-index = { path = "../sa-index" }
 clap = { version = "4.5.1", features = ["derive"] }
 sa-builder = { path = "../sa-builder" }
-sa-mappings = { path = "../sa-mappings" }
\ No newline at end of file
+sa-mappings = { path = "../sa-mappings" }
+sa-compression = { path = "../sa-compression" }
diff --git a/sa-server/src/main.rs b/sa-server/src/main.rs
index 98f2981..ef774b2 100644
--- a/sa-server/src/main.rs
+++ b/sa-server/src/main.rs
@@ -1,5 +1,10 @@
 use std::{
     error::Error,
+    fs::File,
+    io::{
+        BufReader,
+        Read
+    },
     sync::Arc
 };
 
@@ -17,8 +22,9 @@ use axum::{
     Router
 };
 use clap::Parser;
-use sa_builder::binary::load_suffix_array;
+use sa_compression::load_compressed_suffix_array;
 use sa_index::{
+    binary::load_suffix_array,
     peptide_search::{
         analyse_all_peptides,
         search_all_peptides,
@@ -27,7 +33,8 @@ use sa_index::{
         SearchResultWithAnalysis
     },
     sa_searcher::Searcher,
-    suffix_to_protein_index::SparseSuffixToProtein
+    suffix_to_protein_index::SparseSuffixToProtein,
+    SuffixArray
 };
 use sa_mappings::{
     functionality::FunctionAggregator,
@@ -168,23 +175,34 @@ async fn start_server(args: Arguments) -> Result<(), Box<dyn Error>> {
         taxonomy
     } = args;
 
-    eprintln!("Loading suffix array...");
-    let (sparseness_factor, sa) = load_suffix_array(&index_file)?;
+    eprintln!();
+    eprintln!("📋 Started loading the suffix array...");
+    let sa = load_suffix_array_file(&index_file)?;
+    eprintln!("✅ Successfully loaded the suffix array!");
+    eprintln!("\tAmount of items: {}", sa.len());
+    eprintln!("\tAmount of bits per item: {}", sa.bits_per_value());
+    eprintln!("\tSample rate: {}", sa.sample_rate());
 
-    eprintln!("Loading taxon file...");
+    eprintln!();
+    eprintln!("📋 Started loading the taxon file...");
     let taxon_id_calculator =
         TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar)?;
+    eprintln!("✅ Successfully loaded the taxon file!");
+    eprintln!("\tAggregation method: LCA*");
 
+    eprintln!();
+    eprintln!("📋 Started creating the function aggregator...");
     let function_aggregator = FunctionAggregator {};
+    eprintln!("✅ Successfully created the function aggregator!");
 
-    eprintln!("Loading proteins...");
+    eprintln!();
+    eprintln!("📋 Started loading the proteins...");
     let proteins = Proteins::try_from_database_file(&database_file, &taxon_id_calculator)?;
     let suffix_index_to_protein = Box::new(SparseSuffixToProtein::new(&proteins.input_string));
+    eprintln!("✅ Successfully loaded the proteins!");
 
-    eprintln!("Creating searcher...");
     let searcher = Arc::new(Searcher::new(
         sa,
-        sparseness_factor,
         suffix_index_to_protein,
         proteins,
         taxon_id_calculator,
@@ -205,8 +223,31 @@ async fn start_server(args: Arguments) -> Result<(), Box<dyn Error>> {
         .with_state(searcher);
 
     let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await?;
-    println!("server is ready...");
+
+    eprintln!();
+    eprintln!("🚀 Server is ready...");
     axum::serve(listener, app).await?;
 
     Ok(())
 }
+
+fn load_suffix_array_file(file: &str) -> Result<SuffixArray, Box<dyn Error>> {
+    // Open the suffix array file
+    let mut sa_file = File::open(file)?;
+
+    // Create a buffer reader for the file
+    let mut reader = BufReader::new(&mut sa_file);
+
+    // Read the bits per value from the binary file (1 byte)
+    let mut bits_per_value_buffer = [0_u8; 1];
+    reader
+        .read_exact(&mut bits_per_value_buffer)
+        .map_err(|_| "Could not read the flags from the binary file")?;
+    let bits_per_value = bits_per_value_buffer[0];
+
+    if bits_per_value == 64 {
+        load_suffix_array(&mut reader)
+    } else {
+        load_compressed_suffix_array(&mut reader, bits_per_value as usize)
+    }
+}