From 44d409017a0305716fb1ba0589bbabb03449ff85 Mon Sep 17 00:00:00 2001 From: David BICKEL Date: Thu, 5 Oct 2023 15:18:26 +0000 Subject: [PATCH] Merge development into main (version update: 1.0.0) Approved-by: Jose Gavalda Garcia --- MANIFEST.in | 3 +- README.md | 145 +++++++++++++++++------------------ constava/__main__.py | 11 ++- constava/calc/subsampling.py | 94 +++++++++++++++++++---- constava/utils/ensembles.py | 9 ++- constava/wrapper/params.py | 5 ++ constava/wrapper/wrapper.py | 15 +++- requirements.txt | 4 + setup.py | 12 ++- 9 files changed, 193 insertions(+), 105 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index b80c04d..5cf8a75 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -include constava/data/* \ No newline at end of file +include constava/data/* +include requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index d28f0e4..dad7b24 100644 --- a/README.md +++ b/README.md @@ -99,14 +99,10 @@ directly. source constava/bin/activate ``` -3. Install the project dependencies: - ```sh - pip install -r requirements.txt - ``` - - Then, from the package's root directory run: +3. Build and install the package: ```sh + # In the packages root directory do: # Build package from source make build # Install locally @@ -122,7 +118,7 @@ from the package's root directory. The software provides two modes of interaction. Shell user may use the software from the command line, while users skilled in Python can import it as a module. -We provide a couple of usage examples in a [Jupyter notebook](). +We provide a couple of usage examples in a [Colab notebook](https://colab.research.google.com/github/Bio2Byte/public_notebooks/blob/main/constava_examples.ipynb). [](#constava) @@ -158,8 +154,7 @@ To extract dihedral angles from a trajectory the `constava dihedrals` submodule is used. ``` -usage: constava dihedrals [-h] [-s ] [-f [ ...]] [-o OUTPUT] [--selection SELECTION] [--precision PRECISION] [--degrees] - [-O] +usage: constava dihedrals [-h] [-s ] [-f [ ...]] [-o OUTPUT] [--selection SELECTION] [--precision PRECISION] [--degrees] [-O] The `constava dihedrals` submodule is used to extract the backbone dihedrals needed for the analysis from confromational ensembles. By default the results @@ -170,7 +165,7 @@ Note: For the first and last residue in a protein only one backbone dihedral can be extracted. Thus, those residues are omitted by default. optional arguments: - -h, --help show this help message and exit + -h, --help Show this help message and exit Input & output options: -s , --structure @@ -204,27 +199,26 @@ To analyze the backbone dihedral angles extracted from a confromational ensemble the `constava analyze` submodule is used. ``` -usage: constava analyze [-h] [-i [ ...]] [--input-format {auto,xvg,csv}] [-o ] [--output-format {auto,csv,json,tsv}] - [-m ] [--window [ ...]] [--window-series [ ...]] [--bootstrap [ ...]] - [--bootstrap-samples ] [--degrees] [--precision PRECISION] [--seed ] [-v] +usage: constava analyze [-h] [-i [ ...]] [--input-format {auto,xvg,csv}] [-o ] [--output-format {auto,csv,json,tsv}] [-m ] [--window [ ...]] + [--window-series [ ...]] [--bootstrap [ ...]] [--bootstrap-series [ ...]] [--bootstrap-samples ] [--degrees] [--precision ] [--seed ] [-v] The `constava analyze` submodule analyzes the provided backbone dihedral angles -and infers the propensities for each residue to reside in a given -conformational state. +and infers the propensities for each residue to reside in a given +conformational state. -Each conformational state is a statistical model of based on the backbone +Each conformational state is a statistical model of based on the backbone dihedrals (phi, psi). The default models were derived from an analysis of NMR -ensembles and chemical shifts. To analyze a conformational ensemble, the phi- -and psi-angles for each conformational state in the ensemble need to be -provided. +ensembles and chemical shifts. To analyze a conformational ensemble, the phi- +and psi-angles for each conformational state in the ensemble need to be +provided. -As input data the backbone dihedral angles extracted from the conformational -ensemble need to be provided. Those can be generated using the +As input data the backbone dihedral angles extracted from the conformational +ensemble need to be provided. Those can be generated using the `constava dihedrals` submodule (`--input-format csv`) or GROMACS' `gmx chi` module (`--input-format xvg`). optional arguments: - -h, --help show this help message and exit + -h, --help Show this help message and exit Input & output options: -i [ ...], --input [ ...] @@ -238,35 +232,39 @@ Input & output options: Conformational state model options: -m , --load-model - Load a conformational state model from the given pickled + Load a conformational state model from the given pickled file. If not provided, the default model will be used. Subsampling options: --window [ ...] - Do inference using a moving reading-frame. Each reading - frame consists of consecutive samples. Multiple + Do inference using a moving reading-frame. Each reading + frame consists of consecutive samples. Multiple values can be provided. --window-series [ ...] - Do inference using a moving reading-frame. Each reading - frame consists of consecutive samples. Return the + Do inference using a moving reading-frame. Each reading + frame consists of consecutive samples. Return the results for every window rather than the average. This can - result in very large output files. Multiple values can be + result in very large output files. Multiple values can be provided. --bootstrap [ ...] - Do inference using samples obtained through + Do inference using samples obtained through bootstrapping. Multiple values can be provided. + --bootstrap-series [ ...] + Do inference using samples obtained through + bootstrapping. Return the results for every subsample + rather than the average. This can result in very + large output files. Multiple values can be provided. --bootstrap-samples When bootstrapping, sample times from the input data. (default: 500) Miscellaneous options: - --degrees Set this flag, if dihedrals in the input files are in + --degrees Set this flag, if dihedrals in the input files are in degrees. - --precision PRECISION - Sets the number of decimals in the output files. + --precision Sets the number of decimals in the output files. --seed Set random seed for bootstrap sampling - -v, --verbose Set verbosity level of screen output. Flag can be given - multiple times (up to 2) to gradually increase output to + -v, --verbose Set verbosity level of screen output. Flag can be given + multiple times (up to 2) to gradually increase output to debugging mode. ``` @@ -293,27 +291,27 @@ usage: constava fit-model [-h] [-i ] -o [--model-type {kde The `constava fit-model` submodule is used to generate the probabilistic conformational state models used in the analysis. By default, when running -`constava analyze` these models are generated on-the-fly. In selected cases +`constava analyze` these models are generated on-the-fly. In selected cases generating a model beforehand and loading it can be useful, though. We provide two model types. kde-Models are the default. They are fast to fit -but may be slow in the inference in large conformational ensembles (e.g., +but may be slow in the inference in large conformational ensembles (e.g., long-timescale MD simulations). The idea of grid-Models is, to replace the continuous probability density function of the kde-Model by a fixed set -of grid-points. The PDF for any sample is then estimated by linear +of grid-points. The PDF for any sample is then estimated by linear interpolation between the nearest grid points. This is slightly less -accurate then the kde-Model but speeds up inference significantly. +accurate than the kde-Model but speeds up inference significantly. optional arguments: - -h, --help show this help message and exit + -h, --help Show this help message and exit Input and output options: -i , --input The data to which the new conformational state models will - be fitted. It should be provided as a JSON file. The - top-most key should indicate the names of the + be fitted. It should be provided as a JSON file. The + top-most key should indicate the names of the conformational states. On the level below, lists of phi-/ - psi pairs for each stat should be provided. If not provided + psi pairs for each stat should be provided. If not provided the default data from the publication will be used. -o , --output Write the generated model to a pickled file, that can be @@ -321,22 +319,22 @@ Input and output options: Conformational state model options: --model-type {kde,grid} - The probabilistic conformational state model used. The + The probabilistic conformational state model used. The default is `kde`. The alternative `grid` runs significantly faster while slightly sacrificing accuracy: {'kde', 'grid'} (default: 'kde') --kde-bandwidth - This flag controls the bandwidth of the Gaussian kernel + This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13) - --grid-points This flag controls how many grid points are used to + --grid-points This flag controls how many grid points are used to describe the probability density function. Only applies if `--model-type` is set to `grid`. (default: 10000) Miscellaneous options: - --degrees Set this flag, if dihedrals in `model-data` are in degrees + --degrees Set this flag, if dihedrals in `model-data` are in degrees instead of radians. - -v, --verbose Set verbosity level of screen output. Flag can be given - multiple times (up to 2) to gradually increase output to + -v, --verbose Set verbosity level of screen output. Flag can be given + multiple times (up to 2) to gradually increase output to debugging mode. ``` @@ -355,7 +353,7 @@ constava fit-model -v \ ### Execution as a python library -The module provides the `Constava` class a general interface to softwares +The module provides the `Constava` class a general interface to software's features. The only notable exception is the extraction of dihedrals, which is done through a separate function. @@ -458,30 +456,31 @@ In the following table, all available parameters of the Python interface (`Const class) and their corresponding command line arguments are listed. The defaults for parameters in Python and command line are the same. -| Python parameter | Command line argument | Description | -|---|---|---| -| `input_files : List[str] or str` | `constava analyze --input [ ...]` | Input file(s) that contain the dihedral angles. | -| `input_format : str` | `constava analyze --input-format ` | Format of the input file: `{'auto', 'csv', 'xvg'}` | -| `output_file : str` | `constava analyze --output ` | The file to write the output to. | -| `output_format : str` | `constava analyze --output-format ` | Format of output file: `{'auto', 'csv', 'json', 'tsv'}` | -| | | | -| `model_type : str` | `constava fit-model --model-type ` | The probabilistic conformational state model used. Default is `kde`. The alternative `grid` runs significantly faster while slightly sacrificing accuracy: `{'kde', 'grid'}` | -| `model_load : str` | `constava analyze --load-model ` | Load a conformational state model from the given pickled file. | -| `model_data : str` | `constava fit-model --input ` | Fit conformational state models to data provided in the given file. | -| `model_dump : str` | `constava fit-model --output ` | Write the generated model to a pickled file, that can be loaded again using `model_load`. | -| | | | -| `window : List[int] or int` | `constava analyze --window [ ...]` | Do inference using a moving reading-frame of consecutive samples. Multiple values can be given as a list. | -| `window_series : List[int] or int` | `constava analyze --window-series [ ...]` | Do inference using a moving reading-frame of consecutive samples. Return the results for every window rather than the average. Multiple values can be given as a list. | -| `bootstrap : List[int] or int` | `constava analyze --bootstrap [ ...]` | Do inference using samples obtained through bootstrapping. Multiple values can be given as a list. | -| `bootstrap_samples : int` | `constava analyze --bootstrap-samples ` | When bootstrapping, sample times from the input data. | -| | | | -| `input_degrees : bool` | `constava analyze --degrees` | Set `True` if input files are in degrees. | -| `model_data_degrees : bool` | `constava fit-model --degrees` | Set `True` if the data given under `model_data` to is given in degrees. | -| `precision : int` | `constava analyze --precision ` | Sets the number of decimals in the output files. By default, 4 decimals. | -| `kde_bandwidth : float` | `constava fit-model --kde-bandwidth ` | This controls the bandwidth of the Gaussian kernel density estimator. | -| `grid_points : int` | `constava analyze --grid-points ` | When `model_type` equals 'grid', this controls how many grid points are used to describe the probability density function. | -| `seed : int` | `constava analyze --seed ` | Set the random seed especially for bootstrapping. | -| `verbose : int` | `constava <...> -v [-v] ` | Set verbosity level of screen output. | +| Python parameter | Command line argument | Description | +|---------------------------------------|----------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `input_files : List[str] or str` | `constava analyze --input [ ...]` | Input file(s) that contain the dihedral angles. | +| `input_format : str` | `constava analyze --input-format ` | Format of the input file: `{'auto', 'csv', 'xvg'}` | +| `output_file : str` | `constava analyze --output ` | The file to write the output to. | +| `output_format : str` | `constava analyze --output-format ` | Format of output file: `{'auto', 'csv', 'json', 'tsv'}` | +| | | | +| `model_type : str` | `constava fit-model --model-type ` | The probabilistic conformational state model used. Default is `kde`. The alternative `grid` runs significantly faster while slightly sacrificing accuracy: `{'kde', 'grid'}` | +| `model_load : str` | `constava analyze --load-model ` | Load a conformational state model from the given pickled file. | +| `model_data : str` | `constava fit-model --input ` | Fit conformational state models to data provided in the given file. | +| `model_dump : str` | `constava fit-model --output ` | Write the generated model to a pickled file, that can be loaded again using `model_load`. | +| | | | +| `window : List[int] or int` | `constava analyze --window [ ...]` | Do inference using a moving reading-frame of consecutive samples. Multiple values can be given as a list. | +| `window_series : List[int] or int` | `constava analyze --window-series [ ...]` | Do inference using a moving reading-frame of consecutive samples. Return the results for every window rather than the average. Multiple values can be given as a list. | +| `bootstrap : List[int] or int` | `constava analyze --bootstrap [ ...]` | Do inference using samples obtained through bootstrapping. Multiple values can be given as a list. | +| `bootstrap_series : List[int] or int` | `constava analyze --bootstrap-series [ ...]` | Do inference using samples obtained through bootstrapping. Return the results for every bootstrap rather than the average. Multiple values can be given as a list. | +| `bootstrap_samples : int` | `constava analyze --bootstrap-samples ` | When bootstrapping, sample times from the input data. | +| | | | +| `input_degrees : bool` | `constava analyze --degrees` | Set `True` if input files are in degrees. | +| `model_data_degrees : bool` | `constava fit-model --degrees` | Set `True` if the data given under `model_data` to is given in degrees. | +| `precision : int` | `constava analyze --precision ` | Sets the number of decimals in the output files. By default, 4 decimals. | +| `kde_bandwidth : float` | `constava fit-model --kde-bandwidth ` | This controls the bandwidth of the Gaussian kernel density estimator. | +| `grid_points : int` | `constava analyze --grid-points ` | When `model_type` equals 'grid', this controls how many grid points are used to describe the probability density function. | +| `seed : int` | `constava analyze --seed ` | Set the random seed especially for bootstrapping. | +| `verbose : int` | `constava <...> -v [-v] ` | Set verbosity level of screen output. | [](#constava) diff --git a/constava/__main__.py b/constava/__main__.py index 86ccf74..8156519 100644 --- a/constava/__main__.py +++ b/constava/__main__.py @@ -48,7 +48,7 @@ def parse_parameters(cmdline_arguments): genOpt = parser.add_argument_group("Generic options") genOpt.add_argument("-h", "--help", action="help", help=tw.dedent( """\ - Show this help message and exit. For detailled + Show this help message and exit. For detailed information on the subcommands, run: `%(prog)s SUBCOMMAND -h`""")) genOpt.add_argument("--version", action="version", version=f"%(prog)s {__version__}", @@ -82,7 +82,7 @@ def parse_parameters(cmdline_arguments): the continuous probability density function of the kde-Model by a fixed set of grid-points. The PDF for any sample is then estimated by linear interpolation between the nearest grid points. This is slightly less - accurate then the kde-Model but speeds up inference significantly."""), + accurate than the kde-Model but speeds up inference significantly."""), formatter_class=argparse.RawTextHelpFormatter) fitIO = parser_fit_model.add_argument_group("Input and output options") @@ -181,6 +181,12 @@ def parse_parameters(cmdline_arguments): """\ Do inference using samples obtained through bootstrapping. Multiple values can be provided.""")) + anaSmpl.add_argument("--bootstrap-series", metavar="", type=int, nargs='+', help=tw.dedent( + """\ + Do inference using samples obtained through + bootstrapping. Return the results for every subsample + rather than the average. This can result in very + large output files. Multiple values can be provided.""")) anaSmpl.add_argument("--bootstrap-samples", metavar="", type=int, default=500, help=tw.dedent( """\ When bootstrapping, sample times from the input data. @@ -261,6 +267,7 @@ def run_analyze(args): params.window = args.window params.window_series = args.window_series params.bootstrap = args.bootstrap + params.bootstrap_series = args.bootstrap_series params.bootstrap_samples = args.bootstrap_samples params.input_degrees = args.degrees params.precision = args.precision diff --git a/constava/calc/subsampling.py b/constava/calc/subsampling.py index 0f911d9..a2009a2 100644 --- a/constava/calc/subsampling.py +++ b/constava/calc/subsampling.py @@ -18,8 +18,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta): Methods: -------- calculate(state_logpdfs) - Calculates the coformational state likelihoods and conformational - state variablility. + Calculates the conformational state likelihoods and conformational + state variability. calculateStatePropensities(state_likelihoods) Calculates the average conformational state likelihood. calculateStateVariability(state_likelihoods) @@ -30,8 +30,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta): Subsamples from the distribution of original data points. """ def calculate(self, state_logpdfs): - """Calculates the coformational state likelihoods and conformational - state variablility from the sampled state logPDFs. + """Calculates the conformational state likelihoods and conformational + state variability from the sampled state logPDFs. Parameters: ----------- @@ -44,7 +44,7 @@ def calculate(self, state_logpdfs): Average likelihood for samples to fall in any of the M states state_variability : float - Variablility fo the state propensities + variability fo the state propensities throughout the sampling """ @@ -115,8 +115,8 @@ class SubsamplingWindow(SubsamplingABC): Methods: -------- calculate(state_logpdfs) - Calculates the coformational state likelihoods and conformational - state variablility. + Calculates the conformational state likelihoods and conformational + state variability. calculateStatePropensities(state_likelihoods) Calculates the average conformational state likelihood. calculateStateVariability(state_likelihoods) @@ -177,7 +177,7 @@ class SubsamplingBootstrap(SubsamplingABC): Attributes: ----------- sample_size : int - Number of originial data points in each bootstrapped sample. + Number of original data points in each bootstrapped sample. n_samples : int Number of samples to bootstrap. seed: int @@ -186,8 +186,8 @@ class SubsamplingBootstrap(SubsamplingABC): Methods: -------- calculate(state_logpdfs) - Calculates the coformational state likelihoods and conformational - state variablility. + Calculates the conformational state likelihoods and conformational + state variability. calculateStatePropensities(state_likelihoods) Calculates the average conformational state likelihood. calculateStateVariability(state_likelihoods) @@ -205,7 +205,7 @@ def __init__(self, sample_size: int, n_samples = 500, seed: Optional[int] = None Parameters: ----------- sample_size : int - Number of originial data points in each bootstrapped sample. + Number of original data points in each bootstrapped sample. n_samples : int Number of samples to bootstrap. seed: int @@ -270,12 +270,12 @@ class SubsamplingWindowSeries(SubsamplingWindow): Methods: -------- calculate(state_logpdfs) - Calculates the coformational state likelihoods and conformational - state variablility. + Calculates the conformational state likelihoods and conformational + state variability. calculateStatePropensities(state_likelihoods) - Calculates the average conformational state likelihood. + Calculates the samples' conformational state likelihood. calculateStateVariability(state_likelihoods) - Calculates the conformational state variability. + Calculates the conformational state variability. getShortName() Name of the method for reference in the output. _subsampling(state_logpdfs) @@ -292,13 +292,73 @@ def calculateStatePropensities(self, state_likelihoods): def calculateStateVariability(self, state_likelihoods): """Calculates distance of the conformational states of each sample to - the average conformational state. + the average conformational state. Parameters: ----------- state_likelihoods : Array[M,N] Likelihoods for each of the M states along N samples. - + + Returns: + -------- + state_var : Array[N] + Conformational state distances from the average + """ + mean_likelihoods = np.mean(state_likelihoods, axis=1) + squard_dev = np.sum((state_likelihoods.T - mean_likelihoods) ** 2, axis=1) + state_var = np.sqrt(squard_dev) + return state_var + + +class SubsamplingBootstrapSeries(SubsamplingBootstrap): + """Class to subsample the logPDF values obtained from the probabilistic + conformational state models and calculate the conformational state + propensities and conformational state variability. Subsampling is done + using bootstrapping. For each bootstrapped subsample the results are + returned. + + Attributes: + ----------- + sample_size : int + Number of original data points in each bootstrapped sample. + n_samples : int + Number of samples to bootstrap. + seed: int + Random seed used during bootstrapping + + Methods: + -------- + calculate(state_logpdfs) + Calculates the conformational state likelihoods and conformational + state variability. + calculateStatePropensities(state_likelihoods) + Calculates the samples' conformational state likelihood. + calculateStateVariability(state_likelihoods) + Calculates the conformational state variability. + getShortName() + Name of the method for reference in the output. + _subsampling(state_logpdfs) + Subsamples from the distribution of original data points. + """ + + def getShortName(self) -> str: + """Name of the method for reference in the output.""" + return "bootstrap_series/{0:d}/{1:d}/{2}/".format( + self.sample_size, self.n_samples, self.seed or "") + + def calculateStatePropensities(self, state_likelihoods): + """Calculates the conformational state likelihoods for the given sample.""" + return state_likelihoods + + def calculateStateVariability(self, state_likelihoods): + """Calculates distance of the conformational states of each sample to + the average conformational state. + + Parameters: + ----------- + state_likelihoods : Array[M,N] + Likelihoods for each of the M states along N samples. + Returns: -------- state_var : Array[N] diff --git a/constava/utils/ensembles.py b/constava/utils/ensembles.py index 42cf104..bde9abc 100644 --- a/constava/utils/ensembles.py +++ b/constava/utils/ensembles.py @@ -81,8 +81,13 @@ def __repr__(self): @property def n_residues(self): - """ Returns the number of residues form the first to last residue - this might include gaps (residues without data) """ + """Returns the number of residues in the ensemble""" + return len(self._residues) + + @property + def resrange(self): + """Returns the range from the first to last residue. + This might include gaps (residues without data)""" return 1 + self._residues[-1].respos - self._residues[0].respos @property diff --git a/constava/wrapper/params.py b/constava/wrapper/params.py index 33b2ac0..1c0fecc 100644 --- a/constava/wrapper/params.py +++ b/constava/wrapper/params.py @@ -68,6 +68,10 @@ class ConstavaParameters: bootstrap : List[int] or int Do inference using samples obtained through bootstrapping. Multiple values can be given as a list. + bootstrap_series : List[int] or int + Do inference using samples obtained through bootstrapping. + Return the results for every subsample rather than the average. Multiple + values can be given as a list. bootstrap_samples : int When bootstrapping, sample times from the input data. @@ -101,6 +105,7 @@ class ConstavaParameters: window : typing.List[int] = field(default_factory=list) bootstrap : typing.List[int] = field(default_factory=list) window_series : typing.List[int] = field(default_factory=list) + bootstrap_series : typing.List[int] = field(default_factory=list) bootstrap_samples : int = 500 # Miscellaneous Options diff --git a/constava/wrapper/wrapper.py b/constava/wrapper/wrapper.py index 32576a2..503f533 100644 --- a/constava/wrapper/wrapper.py +++ b/constava/wrapper/wrapper.py @@ -6,7 +6,7 @@ from .params import ConstavaParameters from ..io import ResultsWriter, EnsembleReader from ..calc.calculator import ConfStateCalculator -from ..calc.subsampling import SubsamplingBootstrap, SubsamplingWindow, SubsamplingWindowSeries +from ..calc.subsampling import SubsamplingBootstrap, SubsamplingBootstrapSeries, SubsamplingWindow, SubsamplingWindowSeries from ..calc.csmodels import ConfStateModelABC, ConfStateModelKDE, ConfStateModelGrid # The logger for the wrapper @@ -128,6 +128,7 @@ def run(self) -> None: window = self.get_param("window"), window_series = self.get_param("window_series"), bootstrap = self.get_param("bootstrap"), + bootstrap_series = self.get_param("bootstrap_series"), bootstrap_samples = self.get_param("bootstrap_samples"), bootstrap_seed = self.get_param("seed")) @@ -252,8 +253,8 @@ def load_csmodel(self, pickled_csmodel: str) -> ConfStateModelABC: def initialize_calculator(self, csmodel: ConfStateModelABC = None, window: List[int] = None, window_series: List[int] = None, - bootstrap: List[int] = None, bootstrap_samples: int = 500, - bootstrap_seed: int = None) -> ConfStateCalculator: + bootstrap: List[int] = None, bootstrap_series: List[int] = None, + bootstrap_samples: int = 500, bootstrap_seed: int = None) -> ConfStateCalculator: """Initializes a ConfStateCalculator. Parameters: @@ -272,6 +273,10 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None, bootstrap : List[int] Subsampling using by bootstrapping datapoints. Multiple values can be given as a list. + bootstrap_series : List[int] or int + Subsampling using by bootstrapping datapoints. Returns the + results for every subsample rather than the average. Multiple + values can be given as a list. bootstrap_samples : int When bootstrapping, sample times from the input data. bootstrap_seed : int @@ -301,4 +306,8 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None, new_method = SubsamplingWindowSeries(window_size) logger.info(f"... adding subsampling method: {new_method.getShortName()}") calculator.add_method(new_method) + for sample_size in (bootstrap_series or []): + new_method = SubsamplingBootstrapSeries(sample_size, bootstrap_samples, seed=bootstrap_seed) + logger.info(f"... adding subsampling method: {new_method.getShortName()}") + calculator.add_method(new_method) return calculator diff --git a/requirements.txt b/requirements.txt index e69de29..a43d5e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,4 @@ +MDAnalysis +numpy +pandas +scikit-learn \ No newline at end of file diff --git a/setup.py b/setup.py index 0d8871f..4a22b15 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,12 @@ with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() +with open("requirements.txt", "r", encoding="utf-8") as f: + requirements = f.read().splitlines() + setup( name="constava", - version="1.0.0b4", + version="1.0.0", author="Wim Vranken", author_email="wim.vranken@vub.be", description="This software is used to calculate conformational states probability & conformational state " @@ -34,12 +37,7 @@ "Development Status :: 5 - Production/Stable" ], python_requires=">=3.8", - install_requires=[ - "MDAnalysis", - "numpy", - "pandas", - "scikit-learn", - ], + install_requires=requirements, entry_points={ "console_scripts": [ "constava = constava.__main__:main",