Skip to content

Commit

Permalink
Merge development into main (version update: 1.0.0)
Browse files Browse the repository at this point in the history
Approved-by: Jose Gavalda Garcia
  • Loading branch information
aethertier committed Oct 5, 2023
1 parent 6a3a292 commit 44d4090
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 105 deletions.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include constava/data/*
include constava/data/*
include requirements.txt
145 changes: 72 additions & 73 deletions README.md

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions constava/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def parse_parameters(cmdline_arguments):
genOpt = parser.add_argument_group("Generic options")
genOpt.add_argument("-h", "--help", action="help", help=tw.dedent(
"""\
Show this help message and exit. For detailled
Show this help message and exit. For detailed
information on the subcommands, run:
`%(prog)s SUBCOMMAND -h`"""))
genOpt.add_argument("--version", action="version", version=f"%(prog)s {__version__}",
Expand Down Expand Up @@ -82,7 +82,7 @@ def parse_parameters(cmdline_arguments):
the continuous probability density function of the kde-Model by a fixed set
of grid-points. The PDF for any sample is then estimated by linear
interpolation between the nearest grid points. This is slightly less
accurate then the kde-Model but speeds up inference significantly."""),
accurate than the kde-Model but speeds up inference significantly."""),
formatter_class=argparse.RawTextHelpFormatter)

fitIO = parser_fit_model.add_argument_group("Input and output options")
Expand Down Expand Up @@ -181,6 +181,12 @@ def parse_parameters(cmdline_arguments):
"""\
Do inference using <Int> samples obtained through
bootstrapping. Multiple values can be provided."""))
anaSmpl.add_argument("--bootstrap-series", metavar="<int>", type=int, nargs='+', help=tw.dedent(
"""\
Do inference using <Int> samples obtained through
bootstrapping. Return the results for every subsample
rather than the average. This can result in very
large output files. Multiple values can be provided."""))
anaSmpl.add_argument("--bootstrap-samples", metavar="<int>", type=int, default=500, help=tw.dedent(
"""\
When bootstrapping, sample <Int> times from the input data.
Expand Down Expand Up @@ -261,6 +267,7 @@ def run_analyze(args):
params.window = args.window
params.window_series = args.window_series
params.bootstrap = args.bootstrap
params.bootstrap_series = args.bootstrap_series
params.bootstrap_samples = args.bootstrap_samples
params.input_degrees = args.degrees
params.precision = args.precision
Expand Down
94 changes: 77 additions & 17 deletions constava/calc/subsampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta):
Methods:
--------
calculate(state_logpdfs)
Calculates the coformational state likelihoods and conformational
state variablility.
Calculates the conformational state likelihoods and conformational
state variability.
calculateStatePropensities(state_likelihoods)
Calculates the average conformational state likelihood.
calculateStateVariability(state_likelihoods)
Expand All @@ -30,8 +30,8 @@ class SubsamplingABC(metaclass=abc.ABCMeta):
Subsamples from the distribution of original data points.
"""
def calculate(self, state_logpdfs):
"""Calculates the coformational state likelihoods and conformational
state variablility from the sampled state logPDFs.
"""Calculates the conformational state likelihoods and conformational
state variability from the sampled state logPDFs.
Parameters:
-----------
Expand All @@ -44,7 +44,7 @@ def calculate(self, state_logpdfs):
Average likelihood for samples to fall in any
of the M states
state_variability : float
Variablility fo the state propensities
variability fo the state propensities
throughout the sampling
"""
Expand Down Expand Up @@ -115,8 +115,8 @@ class SubsamplingWindow(SubsamplingABC):
Methods:
--------
calculate(state_logpdfs)
Calculates the coformational state likelihoods and conformational
state variablility.
Calculates the conformational state likelihoods and conformational
state variability.
calculateStatePropensities(state_likelihoods)
Calculates the average conformational state likelihood.
calculateStateVariability(state_likelihoods)
Expand Down Expand Up @@ -177,7 +177,7 @@ class SubsamplingBootstrap(SubsamplingABC):
Attributes:
-----------
sample_size : int
Number of originial data points in each bootstrapped sample.
Number of original data points in each bootstrapped sample.
n_samples : int
Number of samples to bootstrap.
seed: int
Expand All @@ -186,8 +186,8 @@ class SubsamplingBootstrap(SubsamplingABC):
Methods:
--------
calculate(state_logpdfs)
Calculates the coformational state likelihoods and conformational
state variablility.
Calculates the conformational state likelihoods and conformational
state variability.
calculateStatePropensities(state_likelihoods)
Calculates the average conformational state likelihood.
calculateStateVariability(state_likelihoods)
Expand All @@ -205,7 +205,7 @@ def __init__(self, sample_size: int, n_samples = 500, seed: Optional[int] = None
Parameters:
-----------
sample_size : int
Number of originial data points in each bootstrapped sample.
Number of original data points in each bootstrapped sample.
n_samples : int
Number of samples to bootstrap.
seed: int
Expand Down Expand Up @@ -270,12 +270,12 @@ class SubsamplingWindowSeries(SubsamplingWindow):
Methods:
--------
calculate(state_logpdfs)
Calculates the coformational state likelihoods and conformational
state variablility.
Calculates the conformational state likelihoods and conformational
state variability.
calculateStatePropensities(state_likelihoods)
Calculates the average conformational state likelihood.
Calculates the samples' conformational state likelihood.
calculateStateVariability(state_likelihoods)
Calculates the conformational state variability.
Calculates the conformational state variability.
getShortName()
Name of the method for reference in the output.
_subsampling(state_logpdfs)
Expand All @@ -292,13 +292,73 @@ def calculateStatePropensities(self, state_likelihoods):

def calculateStateVariability(self, state_likelihoods):
"""Calculates distance of the conformational states of each sample to
the average conformational state.
the average conformational state.
Parameters:
-----------
state_likelihoods : Array[M,N]
Likelihoods for each of the M states along N samples.
Returns:
--------
state_var : Array[N]
Conformational state distances from the average
"""
mean_likelihoods = np.mean(state_likelihoods, axis=1)
squard_dev = np.sum((state_likelihoods.T - mean_likelihoods) ** 2, axis=1)
state_var = np.sqrt(squard_dev)
return state_var


class SubsamplingBootstrapSeries(SubsamplingBootstrap):
"""Class to subsample the logPDF values obtained from the probabilistic
conformational state models and calculate the conformational state
propensities and conformational state variability. Subsampling is done
using bootstrapping. For each bootstrapped subsample the results are
returned.
Attributes:
-----------
sample_size : int
Number of original data points in each bootstrapped sample.
n_samples : int
Number of samples to bootstrap.
seed: int
Random seed used during bootstrapping
Methods:
--------
calculate(state_logpdfs)
Calculates the conformational state likelihoods and conformational
state variability.
calculateStatePropensities(state_likelihoods)
Calculates the samples' conformational state likelihood.
calculateStateVariability(state_likelihoods)
Calculates the conformational state variability.
getShortName()
Name of the method for reference in the output.
_subsampling(state_logpdfs)
Subsamples from the distribution of original data points.
"""

def getShortName(self) -> str:
"""Name of the method for reference in the output."""
return "bootstrap_series/{0:d}/{1:d}/{2}/".format(
self.sample_size, self.n_samples, self.seed or "")

def calculateStatePropensities(self, state_likelihoods):
"""Calculates the conformational state likelihoods for the given sample."""
return state_likelihoods

def calculateStateVariability(self, state_likelihoods):
"""Calculates distance of the conformational states of each sample to
the average conformational state.
Parameters:
-----------
state_likelihoods : Array[M,N]
Likelihoods for each of the M states along N samples.
Returns:
--------
state_var : Array[N]
Expand Down
9 changes: 7 additions & 2 deletions constava/utils/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,13 @@ def __repr__(self):

@property
def n_residues(self):
""" Returns the number of residues form the first to last residue
this might include gaps (residues without data) """
"""Returns the number of residues in the ensemble"""
return len(self._residues)

@property
def resrange(self):
"""Returns the range from the first to last residue.
This might include gaps (residues without data)"""
return 1 + self._residues[-1].respos - self._residues[0].respos

@property
Expand Down
5 changes: 5 additions & 0 deletions constava/wrapper/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class ConstavaParameters:
bootstrap : List[int] or int
Do inference using <Int> samples obtained through bootstrapping.
Multiple values can be given as a list.
bootstrap_series : List[int] or int
Do inference using <Int> samples obtained through bootstrapping.
Return the results for every subsample rather than the average. Multiple
values can be given as a list.
bootstrap_samples : int
When bootstrapping, sample <Int> times from the input data.
Expand Down Expand Up @@ -101,6 +105,7 @@ class ConstavaParameters:
window : typing.List[int] = field(default_factory=list)
bootstrap : typing.List[int] = field(default_factory=list)
window_series : typing.List[int] = field(default_factory=list)
bootstrap_series : typing.List[int] = field(default_factory=list)
bootstrap_samples : int = 500

# Miscellaneous Options
Expand Down
15 changes: 12 additions & 3 deletions constava/wrapper/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .params import ConstavaParameters
from ..io import ResultsWriter, EnsembleReader
from ..calc.calculator import ConfStateCalculator
from ..calc.subsampling import SubsamplingBootstrap, SubsamplingWindow, SubsamplingWindowSeries
from ..calc.subsampling import SubsamplingBootstrap, SubsamplingBootstrapSeries, SubsamplingWindow, SubsamplingWindowSeries
from ..calc.csmodels import ConfStateModelABC, ConfStateModelKDE, ConfStateModelGrid

# The logger for the wrapper
Expand Down Expand Up @@ -128,6 +128,7 @@ def run(self) -> None:
window = self.get_param("window"),
window_series = self.get_param("window_series"),
bootstrap = self.get_param("bootstrap"),
bootstrap_series = self.get_param("bootstrap_series"),
bootstrap_samples = self.get_param("bootstrap_samples"),
bootstrap_seed = self.get_param("seed"))

Expand Down Expand Up @@ -252,8 +253,8 @@ def load_csmodel(self, pickled_csmodel: str) -> ConfStateModelABC:

def initialize_calculator(self, csmodel: ConfStateModelABC = None,
window: List[int] = None, window_series: List[int] = None,
bootstrap: List[int] = None, bootstrap_samples: int = 500,
bootstrap_seed: int = None) -> ConfStateCalculator:
bootstrap: List[int] = None, bootstrap_series: List[int] = None,
bootstrap_samples: int = 500, bootstrap_seed: int = None) -> ConfStateCalculator:
"""Initializes a ConfStateCalculator.
Parameters:
Expand All @@ -272,6 +273,10 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None,
bootstrap : List[int]
Subsampling using by bootstrapping <int> datapoints. Multiple
values can be given as a list.
bootstrap_series : List[int] or int
Subsampling using by bootstrapping <int> datapoints. Returns the
results for every subsample rather than the average. Multiple
values can be given as a list.
bootstrap_samples : int
When bootstrapping, sample <int> times from the input data.
bootstrap_seed : int
Expand Down Expand Up @@ -301,4 +306,8 @@ def initialize_calculator(self, csmodel: ConfStateModelABC = None,
new_method = SubsamplingWindowSeries(window_size)
logger.info(f"... adding subsampling method: {new_method.getShortName()}")
calculator.add_method(new_method)
for sample_size in (bootstrap_series or []):
new_method = SubsamplingBootstrapSeries(sample_size, bootstrap_samples, seed=bootstrap_seed)
logger.info(f"... adding subsampling method: {new_method.getShortName()}")
calculator.add_method(new_method)
return calculator
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MDAnalysis
numpy
pandas
scikit-learn
12 changes: 5 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

with open("requirements.txt", "r", encoding="utf-8") as f:
requirements = f.read().splitlines()

setup(
name="constava",
version="1.0.0b4",
version="1.0.0",
author="Wim Vranken",
author_email="[email protected]",
description="This software is used to calculate conformational states probability & conformational state "
Expand Down Expand Up @@ -34,12 +37,7 @@
"Development Status :: 5 - Production/Stable"
],
python_requires=">=3.8",
install_requires=[
"MDAnalysis",
"numpy",
"pandas",
"scikit-learn",
],
install_requires=requirements,
entry_points={
"console_scripts": [
"constava = constava.__main__:main",
Expand Down

0 comments on commit 44d4090

Please sign in to comment.