From 44d409017a0305716fb1ba0589bbabb03449ff85 Mon Sep 17 00:00:00 2001
From: David BICKEL <david.bickel@vub.be>
Date: Thu, 5 Oct 2023 15:18:26 +0000
Subject: [PATCH] Merge development into main (version update: 1.0.0)
 Approved-by: Jose Gavalda Garcia

---
 MANIFEST.in                  |   3 +-
 README.md                    | 145 +++++++++++++++++------------------
 constava/__main__.py         |  11 ++-
 constava/calc/subsampling.py |  94 +++++++++++++++++++----
 constava/utils/ensembles.py  |   9 ++-
 constava/wrapper/params.py   |   5 ++
 constava/wrapper/wrapper.py  |  15 +++-
 requirements.txt             |   4 +
 setup.py                     |  12 ++-
 9 files changed, 193 insertions(+), 105 deletions(-)
diff --git a/MANIFEST.in b/MANIFEST.in
index b80c04d..5cf8a75 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
-include constava/data/*
\ No newline at end of file
+include constava/data/*
+include requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index d28f0e4..dad7b24 100644
--- a/README.md
+++ b/README.md
@@ -99,14 +99,10 @@ directly.
   source constava/bin/activate
   ```
 
-3. Install the project dependencies:
-  ```sh
-  pip install -r requirements.txt
-  ```
-
-  Then, from the package's root directory run:
+3. Build and install the package:
 
   ```sh
+  # In the packages root directory do:
   # Build package from source
   make build
   # Install locally
@@ -122,7 +118,7 @@ from the package's root directory.
 
 The software provides two modes of interaction. Shell user may use the software
 from the command line, while users skilled in Python can import it as a module.
-We provide a couple of usage examples in a [Jupyter notebook]().
+We provide a couple of usage examples in a [Colab notebook](https://colab.research.google.com/github/Bio2Byte/public_notebooks/blob/main/constava_examples.ipynb).
 
 [<Go to top>](#constava)
 
@@ -158,8 +154,7 @@ To extract dihedral angles from a trajectory the `constava dihedrals` submodule
 is used.
 
 ```
-usage: constava dihedrals [-h] [-s <file.pdb>] [-f <file.xtc> [<file.xtc> ...]] [-o OUTPUT] [--selection SELECTION] [--precision PRECISION] [--degrees]
-                          [-O]
+usage: constava dihedrals [-h] [-s <file.pdb>] [-f <file.xtc> [<file.xtc> ...]] [-o OUTPUT] [--selection SELECTION] [--precision PRECISION] [--degrees] [-O]
 
 The `constava dihedrals` submodule is used to extract the backbone dihedrals
 needed for the analysis from confromational ensembles. By default the results
@@ -170,7 +165,7 @@ Note: For the first and last residue in a protein only one backbone dihedral
 can be extracted. Thus, those residues are omitted by default.
 
 optional arguments:
-  -h, --help            show this help message and exit
+  -h, --help            Show this help message and exit
 
 Input & output options:
   -s <file.pdb>, --structure <file.pdb>
@@ -204,27 +199,26 @@ To analyze the backbone dihedral angles extracted from a confromational ensemble
 the `constava analyze` submodule is used.
 
 ```
-usage: constava analyze [-h] [-i <file.csv> [<file.csv> ...]] [--input-format {auto,xvg,csv}] [-o <file.csv>] [--output-format {auto,csv,json,tsv}]
-                        [-m <file.pkl>] [--window <int> [<int> ...]] [--window-series <int> [<int> ...]] [--bootstrap <int> [<int> ...]]
-                        [--bootstrap-samples <int>] [--degrees] [--precision PRECISION] [--seed <int>] [-v]
+usage: constava analyze [-h] [-i <file.csv> [<file.csv> ...]] [--input-format {auto,xvg,csv}] [-o <file.csv>] [--output-format {auto,csv,json,tsv}] [-m <file.pkl>] [--window <int> [<int> ...]]
+                        [--window-series <int> [<int> ...]] [--bootstrap <int> [<int> ...]] [--bootstrap-series <int> [<int> ...]] [--bootstrap-samples <int>] [--degrees] [--precision <int>] [--seed <int>] [-v]
 
 The `constava analyze` submodule analyzes the provided backbone dihedral angles
-and infers the propensities for each residue to reside in a given
-conformational state.
+and infers the propensities for each residue to reside in a given 
+conformational state. 
 
-Each conformational state is a statistical model of based on the backbone
+Each conformational state is a statistical model of based on the backbone 
 dihedrals (phi, psi). The default models were derived from an analysis of NMR
-ensembles and chemical shifts. To analyze a conformational ensemble, the phi-
-and psi-angles for each conformational state in the ensemble need to be
-provided.
+ensembles and chemical shifts. To analyze a conformational ensemble, the phi- 
+and psi-angles for each conformational state in the ensemble need to be 
+provided. 
 
-As input data the backbone dihedral angles extracted from the conformational
-ensemble need to be provided. Those can be generated using the
+As input data the backbone dihedral angles extracted from the conformational 
+ensemble need to be provided. Those can be generated using the 
 `constava dihedrals` submodule (`--input-format csv`) or GROMACS'
 `gmx chi` module (`--input-format xvg`).
 
 optional arguments:
-  -h, --help            show this help message and exit
+  -h, --help            Show this help message and exit
 
 Input & output options:
   -i <file.csv> [<file.csv> ...], --input <file.csv> [<file.csv> ...]
@@ -238,35 +232,39 @@ Input & output options:
 
 Conformational state model options:
   -m <file.pkl>, --load-model <file.pkl>
-                        Load a conformational state model from the given pickled
+                        Load a conformational state model from the given pickled 
                         file. If not provided, the default model will be used.
 
 Subsampling options:
   --window <int> [<int> ...]
-                        Do inference using a moving reading-frame. Each reading
-                        frame consists of <int> consecutive samples. Multiple
+                        Do inference using a moving reading-frame. Each reading 
+                        frame consists of <int> consecutive samples. Multiple 
                         values can be provided.
   --window-series <int> [<int> ...]
-                        Do inference using a moving reading-frame. Each reading
-                        frame consists of <int> consecutive samples. Return the
+                        Do inference using a moving reading-frame. Each reading 
+                        frame consists of <int> consecutive samples. Return the 
                         results for every window rather than the average. This can
-                        result in very large output files. Multiple values can be
+                        result in very large output files. Multiple values can be 
                         provided.
   --bootstrap <int> [<int> ...]
-                        Do inference using <Int> samples obtained through
+                        Do inference using <Int> samples obtained through 
                         bootstrapping. Multiple values can be provided.
+  --bootstrap-series <int> [<int> ...]
+                        Do inference using <Int> samples obtained through 
+                        bootstrapping. Return the results for every subsample
+                        rather than the average. This can result in very 
+                        large output files. Multiple values can be provided.
   --bootstrap-samples <int>
                         When bootstrapping, sample <Int> times from the input data.
                         (default: 500)
 
 Miscellaneous options:
-  --degrees             Set this flag, if dihedrals in the input files are in
+  --degrees             Set this flag, if dihedrals in the input files are in 
                         degrees.
-  --precision PRECISION
-                        Sets the number of decimals in the output files.
+  --precision <int>     Sets the number of decimals in the output files.
   --seed <int>          Set random seed for bootstrap sampling
-  -v, --verbose         Set verbosity level of screen output. Flag can be given
-                        multiple times (up to 2) to gradually increase output to
+  -v, --verbose         Set verbosity level of screen output. Flag can be given 
+                        multiple times (up to 2) to gradually increase output to 
                         debugging mode.
 ```
 
@@ -293,27 +291,27 @@ usage: constava fit-model [-h] [-i <file.json>] -o <file.pkl> [--model-type {kde
 
 The `constava fit-model` submodule is used to generate the probabilistic
 conformational state models used in the analysis. By default, when running
-`constava analyze` these models are generated on-the-fly. In selected cases
+`constava analyze` these models are generated on-the-fly. In selected cases 
 generating a model beforehand and loading it can be useful, though.
 
 We provide two model types. kde-Models are the default. They are fast to fit
-but may be slow in the inference in large conformational ensembles (e.g.,
+but may be slow in the inference in large conformational ensembles (e.g., 
 long-timescale MD simulations). The idea of grid-Models is, to replace
 the continuous probability density function of the kde-Model by a fixed set
-of grid-points. The PDF for any sample is then estimated by linear
+of grid-points. The PDF for any sample is then estimated by linear 
 interpolation between the nearest grid points. This is slightly less
-accurate then the kde-Model but speeds up inference significantly.
+accurate than the kde-Model but speeds up inference significantly.
 
 optional arguments:
-  -h, --help            show this help message and exit
+  -h, --help            Show this help message and exit
 
 Input and output options:
   -i <file.json>, --input <file.json>
                         The data to which the new conformational state models will
-                        be fitted. It should be provided as a JSON file. The
-                        top-most key should indicate the names of the
+                        be fitted. It should be provided as a JSON file. The 
+                        top-most key should indicate the names of the 
                         conformational states. On the level below, lists of phi-/
-                        psi pairs for each stat should be provided. If not provided
+                        psi pairs for each stat should be provided. If not provided 
                         the default data from the publication will be used.
   -o <file.pkl>, --output <file.pkl>
                         Write the generated model to a pickled file, that can be
@@ -321,22 +319,22 @@ Input and output options:
 
 Conformational state model options:
   --model-type {kde,grid}
-                        The probabilistic conformational state model used. The
+                        The probabilistic conformational state model used. The 
                         default is `kde`. The alternative `grid` runs significantly
                         faster while slightly sacrificing accuracy: {'kde', 'grid'}
                         (default: 'kde')
   --kde-bandwidth <float>
-                        This flag controls the bandwidth of the Gaussian kernel
+                        This flag controls the bandwidth of the Gaussian kernel 
                         density estimator. (default: 0.13)
-  --grid-points <int>   This flag controls how many grid points are used to
+  --grid-points <int>   This flag controls how many grid points are used to 
                         describe the probability density function. Only applies if
                         `--model-type` is set to `grid`. (default: 10000)
 
 Miscellaneous options:
-  --degrees             Set this flag, if dihedrals in `model-data` are in degrees
+  --degrees             Set this flag, if dihedrals in `model-data` are in degrees 
                         instead of radians.
-  -v, --verbose         Set verbosity level of screen output. Flag can be given
-                        multiple times (up to 2) to gradually increase output to
+  -v, --verbose         Set verbosity level of screen output. Flag can be given 
+                        multiple times (up to 2) to gradually increase output to 
                         debugging mode.
 ```
 
@@ -355,7 +353,7 @@ constava fit-model -v \
 
 ### Execution as a python library
 
-The module provides the `Constava` class a general interface to softwares 
+The module provides the `Constava` class a general interface to software's 
 features. The only notable exception is the extraction of dihedrals,
 which is done through a separate function.
 
@@ -458,30 +456,31 @@ In the following table, all available parameters of the Python interface (`Const
 class) and their corresponding command line arguments are listed. The defaults for 
 parameters in Python and command line are the same.
 
-| Python parameter | Command line argument | Description |
-|---|---|---|
-| `input_files : List[str] or str` | `constava analyze --input <file> [<file> ...]`  | Input file(s) that contain the dihedral angles. |
-| `input_format : str` | `constava analyze --input-format <enum>` | Format of the input file: `{'auto', 'csv', 'xvg'}` |
-| `output_file : str` | `constava analyze --output <file>` | The file to write the output to. |
-| `output_format : str` | `constava analyze --output-format <enum>` | Format of output file: `{'auto', 'csv', 'json', 'tsv'}` |
-| | | |
-| `model_type : str` | `constava fit-model --model-type <enum>` | The probabilistic conformational state model used. Default is `kde`. The alternative `grid` runs significantly faster while slightly sacrificing accuracy: `{'kde', 'grid'}` |
-| `model_load : str` | `constava analyze --load-model <file>` | Load a conformational state model from the given pickled file. |
-| `model_data : str` | `constava fit-model --input <file>` | Fit conformational state models to data provided in the given file. |
-| `model_dump : str` | `constava fit-model --output <file>` | Write the generated model to a pickled file, that can be loaded again using `model_load`. |
-| | | |
-| `window : List[int] or int` | `constava analyze --window <Int>  [<Int> ...]` | Do inference using a moving reading-frame of <int> consecutive samples. Multiple values can be given as a list. |
-| `window_series : List[int] or int` | `constava analyze --window-series <Int>  [<Int> ...]` | Do inference using a moving reading-frame of <int> consecutive samples. Return the results for every window rather than the average. Multiple values can be given as a list. |
-| `bootstrap : List[int] or int` | `constava analyze --bootstrap <Int> [<Int>  ...]` | Do inference using <Int> samples obtained through bootstrapping. Multiple values can be given as a list. |
-| `bootstrap_samples : int` | `constava analyze --bootstrap-samples <Int> ` | When bootstrapping, sample <Int> times from the input data. |
-| | | |
-| `input_degrees : bool` | `constava analyze --degrees` | Set `True` if input files are in degrees. |
-| `model_data_degrees : bool` | `constava fit-model --degrees` | Set `True` if the data given under `model_data` to is given in degrees. |
-| `precision : int` | `constava analyze --precision <int> ` | Sets the number of decimals in the output files. By default, 4 decimals. |
-| `kde_bandwidth : float` | `constava fit-model --kde-bandwidth <float>` | This controls the bandwidth of the Gaussian kernel density estimator. |
-| `grid_points : int` | `constava analyze --grid-points <int>` | When `model_type` equals 'grid', this controls how many grid points are used to describe the probability density function. |
-| `seed : int` | `constava analyze --seed <int>` | Set the random seed especially for bootstrapping. |
-| `verbose : int` | `constava <...> -v [-v] ` | Set verbosity level of screen output. |
+| Python parameter                      | Command line argument                                    | Description                                                                                                                                                                  |
+|---------------------------------------|----------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `input_files : List[str] or str`      | `constava analyze --input <file> [<file> ...]`           | Input file(s) that contain the dihedral angles.                                                                                                                              |
+| `input_format : str`                  | `constava analyze --input-format <enum>`                 | Format of the input file: `{'auto', 'csv', 'xvg'}`                                                                                                                           |
+| `output_file : str`                   | `constava analyze --output <file>`                       | The file to write the output to.                                                                                                                                             |
+| `output_format : str`                 | `constava analyze --output-format <enum>`                | Format of output file: `{'auto', 'csv', 'json', 'tsv'}`                                                                                                                      |
+|                                       |                                                          |                                                                                                                                                                              |
+| `model_type : str`                    | `constava fit-model --model-type <enum>`                 | The probabilistic conformational state model used. Default is `kde`. The alternative `grid` runs significantly faster while slightly sacrificing accuracy: `{'kde', 'grid'}` |
+| `model_load : str`                    | `constava analyze --load-model <file>`                   | Load a conformational state model from the given pickled file.                                                                                                               |
+| `model_data : str`                    | `constava fit-model --input <file>`                      | Fit conformational state models to data provided in the given file.                                                                                                          |
+| `model_dump : str`                    | `constava fit-model --output <file>`                     | Write the generated model to a pickled file, that can be loaded again using `model_load`.                                                                                    |
+|                                       |                                                          |                                                                                                                                                                              |
+| `window : List[int] or int`           | `constava analyze --window <Int>  [<Int> ...]`           | Do inference using a moving reading-frame of <int> consecutive samples. Multiple values can be given as a list.                                                              |
+| `window_series : List[int] or int`    | `constava analyze --window-series <Int>  [<Int> ...]`    | Do inference using a moving reading-frame of <int> consecutive samples. Return the results for every window rather than the average. Multiple values can be given as a list. |
+| `bootstrap : List[int] or int`        | `constava analyze --bootstrap <Int> [<Int>  ...]`        | Do inference using <Int> samples obtained through bootstrapping. Multiple values can be given as a list.                                                                     |
+| `bootstrap_series : List[int] or int` | `constava analyze --bootstrap-series <Int>  [<Int> ...]` | Do inference using <Int> samples obtained through bootstrapping. Return the results for every bootstrap rather than the average. Multiple values can be given as a list.     |
+| `bootstrap_samples : int`             | `constava analyze --bootstrap-samples <Int> `            | When bootstrapping, sample <Int> times from the input data.                                                                                                                  |
+|                                       |                                                          |                                                                                                                                                                              |
+| `input_degrees : bool`                | `constava analyze --degrees`                             | Set `True` if input files are in degrees.                                                                                                                                    |
+| `model_data_degrees : bool`           | `constava fit-model --degrees`                           | Set `True` if the data given under `model_data` to is given in degrees.                                                                                                      |
+| `precision : int`                     | `constava analyze --precision <int> `                    | Sets the number of decimals in the output files. By default, 4 decimals.                                                                                                     |
+| `kde_bandwidth : float`               | `constava fit-model --kde-bandwidth <float>`             | This controls the bandwidth of the Gaussian kernel density estimator.                                                                                                        |
+| `grid_points : int`                   | `constava analyze --grid-points <int>`                   | When `model_type` equals 'grid', this controls how many grid points are used to describe the probability density function.                                                   |
+| `seed : int`                          | `constava analyze --seed <int>`                          | Set the random seed especially for bootstrapping.                                                                                                                            |
+| `verbose : int`                       | `constava <...> -v [-v] `                                | Set verbosity level of screen output.                                                                                                                                        |
 
 [<Go to top>](#constava)
 
diff --git a/constava/__main__.py b/constava/__main__.py
index 86ccf74..8156519 100644
--- a/constava/__main__.py
+++ b/constava/__main__.py
@@ -48,7 +48,7 @@ def parse_parameters(cmdline_arguments):
     genOpt = parser.add_argument_group("Generic options")
     genOpt.add_argument("-h", "--help", action="help", help=tw.dedent(
         """\
-        Show this help message and exit. For detailled 
+        Show this help message and exit. For detailed 
         information on the subcommands, run: 
         `%(prog)s SUBCOMMAND -h`"""))
     genOpt.add_argument("--version", action="version", version=f"%(prog)s {__version__}", 
@@ -82,7 +82,7 @@ def parse_parameters(cmdline_arguments):
         the continuous probability density function of the kde-Model by a fixed set
         of grid-points. The PDF for any sample is then estimated by linear 
         interpolation between the nearest grid points. This is slightly less
-        accurate then the kde-Model but speeds up inference significantly."""),
+        accurate than the kde-Model but speeds up inference significantly."""),
         formatter_class=argparse.RawTextHelpFormatter)
     
     fitIO = parser_fit_model.add_argument_group("Input and output options")
@@ -181,6 +181,12 @@ def parse_parameters(cmdline_arguments):
         """\
         Do inference using <Int> samples obtained through 
         bootstrapping. Multiple values can be provided."""))
+    anaSmpl.add_argument("--bootstrap-series", metavar="<int>", type=int, nargs='+',  help=tw.dedent(
+        """\
+        Do inference using <Int> samples obtained through 
+        bootstrapping. Return the results for every subsample
+        rather than the average. This can result in very 
+        large output files. Multiple values can be provided."""))
     anaSmpl.add_argument("--bootstrap-samples", metavar="<int>", type=int, default=500, help=tw.dedent(
         """\
         When bootstrapping, sample <Int> times from the input data.
@@ -261,6 +267,7 @@ def run_analyze(args):
     params.window = args.window
     params.window_series = args.window_series
     params.bootstrap = args.bootstrap
+    params.bootstrap_series = args.bootstrap_series
     params.bootstrap_samples = args.bootstrap_samples
     params.input_degrees = args.degrees
     params.precision = args.precision
diff --git a/constava/calc/subsampling.py b/constava/calc/subsampling.py
index 0f911d9..a2009a2 100644
--- a/constava/calc/subsampling.py
+++ b/constava/calc/subsampling.py
@@ -18,8 +18,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta):
     Methods:
     --------
         calculate(state_logpdfs)
-            Calculates the coformational state likelihoods and conformational
-            state variablility.
+            Calculates the conformational state likelihoods and conformational
+            state variability.
         calculateStatePropensities(state_likelihoods)
             Calculates the average conformational state likelihood.
         calculateStateVariability(state_likelihoods)
@@ -30,8 +30,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta):
             Subsamples from the distribution of original data points.
     """
     def calculate(self, state_logpdfs):
-        """Calculates the coformational state likelihoods and conformational
-        state variablility from the sampled state logPDFs.
+        """Calculates the conformational state likelihoods and conformational
+        state variability from the sampled state logPDFs.
 
         Parameters:
         -----------
@@ -44,7 +44,7 @@ def calculate(self, state_logpdfs):
                                 Average likelihood for samples to fall in any 
                                 of the M states
             state_variability : float
-                                Variablility fo the state propensities 
+                                variability fo the state propensities
                                 throughout the sampling
             
         """
@@ -115,8 +115,8 @@ class SubsamplingWindow(SubsamplingABC):
     Methods:
     --------
         calculate(state_logpdfs)
-            Calculates the coformational state likelihoods and conformational
-            state variablility.
+            Calculates the conformational state likelihoods and conformational
+            state variability.
         calculateStatePropensities(state_likelihoods)
             Calculates the average conformational state likelihood.
         calculateStateVariability(state_likelihoods)
@@ -177,7 +177,7 @@ class SubsamplingBootstrap(SubsamplingABC):
     Attributes:
     -----------
         sample_size : int
-            Number of originial data points in each bootstrapped sample.
+            Number of original data points in each bootstrapped sample.
         n_samples : int
             Number of samples to bootstrap.
         seed: int
@@ -186,8 +186,8 @@ class SubsamplingBootstrap(SubsamplingABC):
     Methods:
     --------
         calculate(state_logpdfs)
-            Calculates the coformational state likelihoods and conformational
-            state variablility.
+            Calculates the conformational state likelihoods and conformational
+            state variability.
         calculateStatePropensities(state_likelihoods)
             Calculates the average conformational state likelihood.
         calculateStateVariability(state_likelihoods)
@@ -205,7 +205,7 @@ def __init__(self, sample_size: int, n_samples = 500, seed: Optional[int] = None
         Parameters:
         -----------
             sample_size : int
-                Number of originial data points in each bootstrapped sample.
+                Number of original data points in each bootstrapped sample.
             n_samples : int
                 Number of samples to bootstrap.
             seed: int
@@ -270,12 +270,12 @@ class SubsamplingWindowSeries(SubsamplingWindow):
     Methods:
     --------
         calculate(state_logpdfs)
-            Calculates the coformational state likelihoods and conformational
-            state variablility.
+            Calculates the conformational state likelihoods and conformational
+            state variability.
         calculateStatePropensities(state_likelihoods)
-            Calculates the average conformational state likelihood.
+            Calculates the samples' conformational state likelihood.
         calculateStateVariability(state_likelihoods)
-            Calculates the  conformational state variability.
+            Calculates the conformational state variability.
         getShortName()
             Name of the method for reference in the output.
         _subsampling(state_logpdfs)
@@ -292,13 +292,73 @@ def calculateStatePropensities(self, state_likelihoods):
 
     def calculateStateVariability(self, state_likelihoods):
         """Calculates distance of the conformational states of each sample to
-        the average conformational state. 
+        the average conformational state.
 
         Parameters:
         -----------
             state_likelihoods : Array[M,N]
                 Likelihoods for each of the M states along N samples.
-        
+
+        Returns:
+        --------
+            state_var : Array[N]
+                Conformational state distances from the average
+        """
+        mean_likelihoods = np.mean(state_likelihoods, axis=1)
+        squard_dev = np.sum((state_likelihoods.T - mean_likelihoods) ** 2, axis=1)
+        state_var = np.sqrt(squard_dev)
+        return state_var
+
+
+class SubsamplingBootstrapSeries(SubsamplingBootstrap):
+    """Class to subsample the logPDF values obtained from the probabilistic 
+    conformational state models and calculate the conformational state 
+    propensities and conformational state variability. Subsampling is done 
+    using bootstrapping. For each bootstrapped subsample the results are 
+    returned.
+
+    Attributes:
+    -----------
+        sample_size : int
+            Number of original data points in each bootstrapped sample.
+        n_samples : int
+            Number of samples to bootstrap.
+        seed: int
+            Random seed used during bootstrapping
+
+    Methods:
+    --------
+        calculate(state_logpdfs)
+            Calculates the conformational state likelihoods and conformational
+            state variability.
+        calculateStatePropensities(state_likelihoods)
+            Calculates the samples' conformational state likelihood.
+        calculateStateVariability(state_likelihoods)
+            Calculates the conformational state variability.
+        getShortName()
+            Name of the method for reference in the output.
+        _subsampling(state_logpdfs)
+            Subsamples from the distribution of original data points.
+    """
+
+    def getShortName(self) -> str:
+        """Name of the method for reference in the output."""
+        return "bootstrap_series/{0:d}/{1:d}/{2}/".format(
+            self.sample_size, self.n_samples, self.seed or "")
+
+    def calculateStatePropensities(self, state_likelihoods):
+        """Calculates the conformational state likelihoods for the given sample."""
+        return state_likelihoods
+
+    def calculateStateVariability(self, state_likelihoods):
+        """Calculates distance of the conformational states of each sample to
+        the average conformational state.
+
+        Parameters:
+        -----------
+            state_likelihoods : Array[M,N]
+                Likelihoods for each of the M states along N samples.
+
         Returns:
         --------
             state_var : Array[N]
diff --git a/constava/utils/ensembles.py b/constava/utils/ensembles.py
index 42cf104..bde9abc 100644
--- a/constava/utils/ensembles.py
+++ b/constava/utils/ensembles.py
@@ -81,8 +81,13 @@ def __repr__(self):
     
     @property
     def n_residues(self):
-        """ Returns the number of residues form the first to last residue 
-        this might include gaps (residues without data) """
+        """Returns the number of residues in the ensemble"""
+        return len(self._residues)
+
+    @property
+    def resrange(self):
+        """Returns the range from the first to last residue.
+        This might include gaps (residues without data)"""
         return 1 + self._residues[-1].respos - self._residues[0].respos
 
     @property
diff --git a/constava/wrapper/params.py b/constava/wrapper/params.py
index 33b2ac0..1c0fecc 100644
--- a/constava/wrapper/params.py
+++ b/constava/wrapper/params.py
@@ -68,6 +68,10 @@ class ConstavaParameters:
         bootstrap : List[int] or int
             Do inference using <Int> samples obtained through bootstrapping.
             Multiple values can be given as a list.
+        bootstrap_series : List[int] or int
+            Do inference using <Int> samples obtained through bootstrapping.
+            Return the results for every subsample rather than the average. Multiple 
+            values can be given as a list.
         bootstrap_samples : int
             When bootstrapping, sample <Int> times from the input data.
 
@@ -101,6 +105,7 @@ class ConstavaParameters:
     window : typing.List[int] = field(default_factory=list)
     bootstrap : typing.List[int] = field(default_factory=list)
     window_series : typing.List[int] = field(default_factory=list)
+    bootstrap_series : typing.List[int] = field(default_factory=list)
     bootstrap_samples : int = 500
 
     # Miscellaneous Options
diff --git a/constava/wrapper/wrapper.py b/constava/wrapper/wrapper.py
index 32576a2..503f533 100644
--- a/constava/wrapper/wrapper.py
+++ b/constava/wrapper/wrapper.py
@@ -6,7 +6,7 @@
 from .params import ConstavaParameters
 from ..io import ResultsWriter, EnsembleReader
 from ..calc.calculator import ConfStateCalculator
-from ..calc.subsampling import SubsamplingBootstrap, SubsamplingWindow, SubsamplingWindowSeries
+from ..calc.subsampling import SubsamplingBootstrap, SubsamplingBootstrapSeries, SubsamplingWindow, SubsamplingWindowSeries
 from ..calc.csmodels import ConfStateModelABC, ConfStateModelKDE, ConfStateModelGrid
 
 # The logger for the wrapper
@@ -128,6 +128,7 @@ def run(self) -> None:
                 window = self.get_param("window"),
                 window_series = self.get_param("window_series"),
                 bootstrap = self.get_param("bootstrap"),
+                bootstrap_series = self.get_param("bootstrap_series"),
                 bootstrap_samples = self.get_param("bootstrap_samples"),
                 bootstrap_seed  = self.get_param("seed"))
 
@@ -252,8 +253,8 @@ def load_csmodel(self, pickled_csmodel: str) -> ConfStateModelABC:
 
     def initialize_calculator(self, csmodel: ConfStateModelABC = None, 
             window: List[int] = None, window_series: List[int] = None, 
-            bootstrap: List[int] = None, bootstrap_samples: int = 500, 
-            bootstrap_seed: int = None) -> ConfStateCalculator:
+            bootstrap: List[int] = None, bootstrap_series: List[int] = None,
+            bootstrap_samples: int = 500, bootstrap_seed: int = None) -> ConfStateCalculator:
         """Initializes a ConfStateCalculator.
 
         Parameters:
@@ -272,6 +273,10 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None,
             bootstrap : List[int]
                 Subsampling using by bootstrapping <int> datapoints. Multiple
                 values can be given as a list.
+            bootstrap_series : List[int] or int
+                Subsampling using by bootstrapping <int> datapoints. Returns the 
+                results for every subsample rather than the average. Multiple 
+                values can be given as a list.
             bootstrap_samples : int
                 When bootstrapping, sample <int> times from the input data.
             bootstrap_seed : int
@@ -301,4 +306,8 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None,
             new_method = SubsamplingWindowSeries(window_size)
             logger.info(f"... adding subsampling method: {new_method.getShortName()}")
             calculator.add_method(new_method)
+        for sample_size in (bootstrap_series or []):
+            new_method = SubsamplingBootstrapSeries(sample_size, bootstrap_samples, seed=bootstrap_seed)
+            logger.info(f"... adding subsampling method: {new_method.getShortName()}")
+            calculator.add_method(new_method)
         return calculator
diff --git a/requirements.txt b/requirements.txt
index e69de29..a43d5e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+MDAnalysis
+numpy
+pandas
+scikit-learn
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 0d8871f..4a22b15 100644
--- a/setup.py
+++ b/setup.py
@@ -3,9 +3,12 @@
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
+with open("requirements.txt", "r", encoding="utf-8") as f:
+    requirements = f.read().splitlines()
+
 setup(
     name="constava",
-    version="1.0.0b4",
+    version="1.0.0",
     author="Wim Vranken",
     author_email="wim.vranken@vub.be",
     description="This software is used to calculate conformational states probability & conformational state "
@@ -34,12 +37,7 @@
         "Development Status :: 5 - Production/Stable"
     ],
     python_requires=">=3.8",
-    install_requires=[
-        "MDAnalysis",
-        "numpy",
-        "pandas",
-        "scikit-learn",
-    ],
+    install_requires=requirements,
     entry_points={
         "console_scripts": [
             "constava = constava.__main__:main",