From 582ffaa71c677a773fcc82e8db2fba25b8bbb16c Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 16 Feb 2021 16:41:56 +0000 Subject: [PATCH 01/34] setup hybrid module --- prody/__init__.py | 5 + prody/dynamics/__init__.py | 7 - prody/hybrid/__init__.py | 19 + prody/{dynamics => hybrid}/clustenm.py | 18 +- prody/hybrid/hybrid.py | 1027 ++++++++++++++++++++++++ 5 files changed, 1061 insertions(+), 15 deletions(-) create mode 100644 prody/hybrid/__init__.py rename prody/{dynamics => hybrid}/clustenm.py (99%) create mode 100644 prody/hybrid/hybrid.py diff --git a/prody/__init__.py b/prody/__init__.py index ba1a38861..f2347a35d 100644 --- a/prody/__init__.py +++ b/prody/__init__.py @@ -130,6 +130,11 @@ def turnonDepracationWarnings(action='always'): __all__.extend(chromatin.__all__) __all__.append('chromatin') +from . import hybrid +from .hybrid import * +__all__.extend(hybrid.__all__) +__all__.append('hybrid') + from . import domain_decomposition from .domain_decomposition import * __all__.extend(domain_decomposition.__all__) diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py index eb3267d81..c7359e5de 100644 --- a/prody/dynamics/__init__.py +++ b/prody/dynamics/__init__.py @@ -343,15 +343,8 @@ from .adaptive import * __all__.extend(adaptive.__all__) -from . import clustenm -from .clustenm import * -__all__.extend(clustenm.__all__) - from . import essa from .essa import * __all__.extend(essa.__all__) -# workaround for circular dependency to accommodate original design style -from prody.ensemble import functions -functions.ClustENM = ClustENM diff --git a/prody/hybrid/__init__.py b/prody/hybrid/__init__.py new file mode 100644 index 000000000..b3ffb11fc --- /dev/null +++ b/prody/hybrid/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +"""This module defines classes and functions for hybrid simulations. + +""" + +__all__ = [] + +from . import hybrid +from .hybrid import * +__all__.extend(hybrid.__all__) + +from . import clustenm +from .clustenm import * +__all__.extend(clustenm.__all__) + +# workaround for circular dependency to accommodate original design style +from prody.ensemble import functions +functions.ClustENM = ClustENM + diff --git a/prody/dynamics/clustenm.py b/prody/hybrid/clustenm.py similarity index 99% rename from prody/dynamics/clustenm.py rename to prody/hybrid/clustenm.py index 5fe1a18ac..f8ae1b4b5 100644 --- a/prody/dynamics/clustenm.py +++ b/prody/hybrid/clustenm.py @@ -36,25 +36,27 @@ from scipy.cluster.hierarchy import fcluster, linkage from prody import LOGGER -from .anm import ANM -from .gnm import GNM, ZERO -from .rtb import RTB -from .imanm import imANM -from .exanm import exANM -from .editing import extendModel -from .sampling import sampleModes +from prody.dynamics.anm import ANM +from prody.dynamics.gnm import GNM, ZERO +from prody.dynamics.rtb import RTB +from prody.dynamics.imanm import imANM +from prody.dynamics.exanm import exANM +from prody.dynamics.editing import extendModel +from prody.dynamics.sampling import sampleModes from prody.atomic import AtomGroup from prody.measure import calcTransformation, applyTransformation, calcRMSD from prody.ensemble import Ensemble from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream from prody.utilities import createStringIO, importLA, mad +from .hybrid import Hybrid + la = importLA() norm = la.norm __all__ = ['ClustENM', 'ClustRTB', 'ClustImANM', 'ClustExANM'] -class ClustENM(Ensemble): +class ClustENM(Hybrid): ''' ClustENMv2 is the new version of ClustENM(v1) conformation sampling algorithm [KZ16]_. diff --git a/prody/hybrid/hybrid.py b/prody/hybrid/hybrid.py new file mode 100644 index 000000000..0ade19c56 --- /dev/null +++ b/prody/hybrid/hybrid.py @@ -0,0 +1,1027 @@ +''' +Copyright (c) 2020 Burak Kaynak, Pemra Doruker. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +''' + +__author__ = 'Burak Kaynak' +__credits__ = ['Pemra Doruker', 'She Zhang'] +__email__ = ['burak.kaynak@pitt.edu', 'doruker@pitt.edu', 'shz66@pitt.edu'] + +from itertools import product +from multiprocessing import cpu_count, Pool +from collections import OrderedDict +from os import chdir, mkdir +from os.path import isdir +from sys import stdout + +import numpy as np +from scipy.spatial.distance import pdist, squareform +from scipy.cluster.hierarchy import fcluster, linkage + +from prody import LOGGER +from prody.dynamics.anm import ANM +from prody.dynamics.gnm import GNM, ZERO +from prody.dynamics.rtb import RTB +from prody.dynamics.imanm import imANM +from prody.dynamics.exanm import exANM +from prody.dynamics.editing import extendModel +from prody.dynamics.sampling import sampleModes +from prody.atomic import AtomGroup +from prody.measure import calcTransformation, applyTransformation, calcRMSD +from prody.ensemble import Ensemble +from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream +from prody.utilities import createStringIO, importLA, mad + +la = importLA() +norm = la.norm + +__all__ = ['Hybrid'] + +class Hybrid(Ensemble): + + ''' + Base class for hybrid simulations, based on ClustENM class. + ''' + + def __init__(self, title=None): + + self._atoms = None + self._nuc = None + self._ph = 7.0 + + self._cutoff = 15. + self._gamma = 1. + self._n_modes = 3 + self._n_confs = 50 + self._rmsd = (0.,) + self._n_gens = 5 + + self._maxclust = None + self._threshold = None + + self._sol = 'imp' + self._padding = None + self._ionicStrength = 0.0 + self._force_field = None + self._tolerance = 10.0 + self._maxIterations = 0 + self._sim = True + self._temp = 303.15 + self._t_steps = None + + self._outlier = True + self._mzscore = 3.5 + self._v1 = False + self._platform = None + self._parallel = False + + self._topology = None + self._positions = None + self._idx_cg = None + self._n_cg = None + self._cycle = 0 + self._time = 0 + self._indexer = None + self._targeted = False + self._tmdk = 10. + + super(Hybrid, self).__init__('Unknown') # dummy title; will be replaced in the next line + self._title = title + + def __getitem__(self, index): + + if isinstance(index, tuple): + I = self._slice(index) + if I is None: + raise IndexError('index out of range %s' % str(index)) + index = I + + return super(Hybrid, self).__getitem__(index) + + def getAtoms(self): + + 'Returns atoms.' + + return self._atoms + + def _isBuilt(self): + + return self._confs is not None + + def setAtoms(self, atoms, pH=7.0): + + ''' + Sets atoms. + + :arg atoms: *atoms* parsed by parsePDB + + :arg pH: pH based on which to select protonation states for adding missing hydrogens, default is 7.0. + :type pH: float + ''' + + def getTitle(self): + + 'Returns the title.' + + title = 'Unknown' + if self._title is None: + atoms = self.getAtoms() + if atoms is not None: + title = atoms.getTitle() + '_clustenm' + else: + title = self._title + + return title + + def setTitle(self, title): + + ''' + Set title. + + :arg title: Title of the Hybrid object. + :type title: str + ''' + + if not isinstance(title, str) and title is not None: + raise TypeError('title must be either str or None') + self._title = title + + def _fix(self, atoms): + '''''' + + def _prep_sim(self, coords, external_forces=[]): + + try: + from simtk.openmm import Platform, LangevinIntegrator, Vec3 + from simtk.openmm.app import Modeller, ForceField, \ + CutoffNonPeriodic, PME, Simulation, HBonds + from simtk.unit import angstrom, nanometers, picosecond, \ + kelvin, Quantity, molar + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + + positions = Quantity([Vec3(*xyz) for xyz in coords], angstrom) + modeller = Modeller(self._topology, positions) + + if self._sol == 'imp': + forcefield = ForceField(*self._force_field) + + system = forcefield.createSystem(modeller.topology, + nonbondedMethod=CutoffNonPeriodic, + nonbondedCutoff=1.0*nanometers, + constraints=HBonds) + + if self._sol == 'exp': + forcefield = ForceField(*self._force_field) + + modeller.addSolvent(forcefield, + padding=self._padding*nanometers, + ionicStrength=self._ionicStrength*molar) + + system = forcefield.createSystem(modeller.topology, + nonbondedMethod=PME, + nonbondedCutoff=1.0*nanometers, + constraints=HBonds) + + for force in external_forces: + system.addForce(force) + + integrator = LangevinIntegrator(self._temp*kelvin, + 1/picosecond, + 0.002*picosecond) + + # precision could be mixed, but single is okay. + platform = self._platform if self._platform is None else Platform.getPlatformByName(self._platform) + properties = None + + if self._platform is None: + properties = {'Precision': 'single'} + elif self._platform in ['CUDA', 'OpenCL']: + properties = {'Precision': 'single'} + + simulation = Simulation(modeller.topology, system, integrator, + platform, properties) + + simulation.context.setPositions(modeller.positions) + + return simulation + + def _min_sim(self, coords): + + # coords: coordset (numAtoms, 3) in Angstrom, which should be converted into nanometer + + try: + from simtk.openmm.app import StateDataReporter + from simtk.unit import kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use Hybrid.') + + simulation = self._prep_sim(coords=coords) + + # automatic conversion into nanometer will be carried out. + # simulation.context.setPositions(coords * angstrom) + + try: + simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole, maxIterations=self._maxIterations) + if self._sim: + # heating-up the system incrementally + sdr = StateDataReporter(stdout, 1, step=True, temperature=True) + sdr._initializeConstants(simulation) + temp = 0.0 + + # instantaneous temperature could be obtained by openmmtools module + # but its installation using conda may lead to problem due to repository freezing, + # therefore, we are here evaluating it by hand. + + while temp < self._temp: + simulation.step(1) + ke = simulation.context.getState(getEnergy=True).getKineticEnergy() + temp = (2 * ke / (sdr._dof * MOLAR_GAS_CONSTANT_R)).value_in_unit(kelvin) + + simulation.step(self._t_steps[self._cycle]) + + pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom)[:self._topology.getNumAtoms()] + pot = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilojoule_per_mole) + + return pot, pos + + except BaseException as be: + LOGGER.warning('OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!') + + return np.nan, np.full_like(coords, np.nan) + + def _targeted_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=10000, ddtol=1e-3, n_conv=5): + + try: + from simtk.openmm import CustomExternalForce + from simtk.openmm.app import StateDataReporter + from simtk.unit import nanometer, kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use Hybrid.') + + tmdk *= kilojoule_per_mole/angstrom**2 + tmdk = tmdk.value_in_unit(kilojoule_per_mole/nanometer**2) + + # coords1_ca = coords1[self._idx_cg, :] + pos1 = coords1 * angstrom + # pos1_ca = pos1[self._idx_cg, :] + + force = CustomExternalForce('tmdk*((x-x0)^2+(y-y0)^2+(z-z0)^2)') + force.addGlobalParameter('tmdk', 0.) + force.addPerParticleParameter('x0') + force.addPerParticleParameter('y0') + force.addPerParticleParameter('z0') + force.setForceGroup(1) + # for i, atm_idx in enumerate(self._idx_cg): + # pars = pos1_ca[i, :].value_in_unit(nanometer) + # force.addParticle(int(atm_idx), pars) + + n_atoms = coords0.shape[0] + atom_indices = np.arange(n_atoms) + for i, atm_idx in enumerate(atom_indices): + pars = pos1[i, :].value_in_unit(nanometer) + force.addParticle(int(atm_idx), pars) + + simulation = self._prep_sim([force]) + + # automatic conversion into nanometer will be carried out. + simulation.context.setPositions(coords0 * angstrom) + + dist = dist0 = calcRMSD(coords0, coords1) + m_conv = 0 + n_steps = 0 + try: + simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole, + maxIterations=self._maxIterations) + + # update parameters + while n_steps < n_max_steps: + simulation.context.setParameter('tmdk', tmdk) + force.updateParametersInContext(simulation.context) + + simulation.step(d_steps) + n_steps += d_steps + + # evaluate distance to destination + pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom) + d = calcRMSD(pos, coords1) + dd = np.abs(dist - d) + + if dd < ddtol: + m_conv += 1 + + if m_conv >= n_conv: + break + + dist = d + + LOGGER.debug('RMSD: %4.2f -> %4.2f' % (dist0, dist)) + + simulation.context.setParameter('tmdk', 0.0) + simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole, + maxIterations=self._maxIterations) + + pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom) + pot = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilojoule_per_mole) + + return pot, pos + + except BaseException as be: + LOGGER.warning('OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!') + + return np.nan, np.full_like(coords0, np.nan) + + def _checkANM(self, anm): + + # use prody's ZERO parameter + + H = anm.getHessian() + rank = np.linalg.matrix_rank(anm.getHessian(), tol=ZERO, hermitian=True) + rank_diff = H.shape[0] - 6 - rank + + good = rank_diff <= 0 # '<' needed due to RTB + + if not good: + # taking care cases with more than 6 zeros + LOGGER.warn('Abnormal number of zero modes detected (%d detected, 6 expected), so this conformer is discarded!' % (6 + rank_diff)) + + return good + + def _sample_v1(self, conf): + + tmp = self._atoms.copy() + tmp.setCoords(conf) + cg = tmp[self._idx_cg] + + anm_cg = self._buildANM(cg) + + if not self._checkANM(anm_cg): + return None + + anm_cg.calcModes(self._n_modes) + + anm_ex = self._extendModel(anm_cg, cg, tmp) + a = np.array(list(product([-1, 0, 1], repeat=self._n_modes))) + + nv = (anm_ex.getEigvecs() / np.sqrt(anm_ex.getEigvals())).__matmul__(a.T) + + nvn = nv / norm(nv, axis=0).max() + + d = (self._rmsd[self._cycle] * np.sqrt(tmp.numAtoms()) * nvn).T + d = d.reshape(d.shape[0], -1, 3) + + r0 = tmp.getCoords() + r = r0 + d + + return r + + def _multi_targeted_sim(self, args): + + conf = args[0] + coords = args[1] + + return self._targeted_sim(conf, coords, tmdk=self._tmdk) + + def _buildANM(self, cg): + + anm = ANM() + anm.buildHessian(cg, cutoff=self._cutoff, gamma=self._gamma) + + return anm + + def _extendModel(self, model, nodes, atoms): + + if self._nuc is None: + pass + else: + _, idx_n3, cnt = np.unique(nodes.nucleotide.getResindices(), + return_index=True, return_counts=True) + idx_c4p = np.where(nodes.getNames() == "C4'")[0] + + vpn3 = model.getEigvecs() + + for i, n, j in zip(idx_n3, cnt, idx_c4p): + vpn3[3*i:3*(i+n)] = np.tile(vpn3[3*j:3*(j + 1), :], (n, 1)) + + model.setEigens(vpn3, model.getEigvals()) + + ext, _ = extendModel(model, nodes, atoms, norm=True) + + return ext + + def _sample(self, conf): + '''''' + + def _rmsds(self, coords): + + # as long as there is no need for superposing conformations + + # coords: (n_conf, n_cg, 3) + + tmp = coords.reshape(-1, 3 * self._n_cg) + + return pdist(tmp) / np.sqrt(self._n_cg) + + def _hc(self, arg): + + # arg: coords (n_conf, n_cg, 3) + + rmsds = self._rmsds(arg) + # optimal_ordering=True can be slow, particularly on large datasets. + link = linkage(rmsds, method='average') + + # fcluster gives cluster labels starting from 1 + + if self._threshold is not None: + hcl = fcluster(link, t=self._threshold[self._cycle], + criterion='distance') - 1 + + if self._maxclust is not None: + hcl = fcluster(link, t=self._maxclust[self._cycle], + criterion='maxclust') - 1 + + return hcl + + def _centroid(self, arg): + + # arg: coords (n_conf_clust, n_cg, 3) + + if arg.shape[0] > 2: + rmsds = self._rmsds(arg) + sim = np.exp(- squareform(rmsds) / rmsds.std()) + idx = sim.sum(1).argmax() + return idx + else: + return 0 # or np.random.randint(low=0, high=arg.shape[0]) + + def _centers(self, *args): + + # args[0]: coords (n_conf, n_cg, 3) + # args[1]: labels + + nl = np.unique(args[1]) + idx = OrderedDict() + for i in nl: + idx[i] = np.where(args[1] == i)[0] + + # Dictionary order is guaranteed to be insertion order by Python 3.7! + wei = [idx[k].size for k in idx.keys()] + centers = np.empty(nl.size, dtype=int) + for i in nl: + tmp = self._centroid(args[0][idx[i]]) + centers[i] = idx[i][tmp] + + return centers, wei + + def _generate(self, confs): + + LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) + LOGGER.timeit('_clustenm_gen') + + sample_method = self._sample_v1 if self._v1 else self._sample + + if self._parallel: + with Pool(cpu_count()) as p: + tmp = p.map(sample_method, [conf for conf in confs]) + else: + tmp = [sample_method(conf) for conf in confs] + + tmp = [r for r in tmp if r is not None] + + confs_ex = np.concatenate(tmp) + + confs_cg = confs_ex[:, self._idx_cg] + + LOGGER.info('Clustering in generation %d ...' % self._cycle) + label_cg = self._hc(confs_cg) + centers, wei = self._centers(confs_cg, label_cg) + LOGGER.report('Centroids were generated in %.2fs.', + label='_clustenm_gen') + + return confs_ex[centers], wei + + def _outliers(self, arg): + + # arg : potential energies + # outliers are detected by modified z_score. + + tmp = 0.6745 * (arg - np.median(arg)) / mad(arg) + # here the assumption is that there is not just one conformer. + + return tmp > 3.5 + + def _superpose_cg(self, confs): + tmp0 = self._getCoords() + n = confs.shape[0] + tmp1 = [] + for i in range(n): + tmp2 = calcTransformation(confs[i, self._idx_cg], + tmp0[self._idx_cg]) + tmp1.append(applyTransformation(tmp2, confs[i])) + + return np.array(tmp1) + + def _build(self, conformers, keys, potentials, sizes): + + self.addCoordset(conformers) + self.setData('size', sizes) + self.setData('key', keys) + self.setData('potential', potentials) + + def addCoordset(self, coords): + + ''' + Add coordinate set(s) to the ensemble. + + :arg coords: coordinate set(s) + :type coords: :class:`~numpy.ndarray` + ''' + + self._indexer = None + super(Hybrid, self).addCoordset(coords) + + def getData(self, key, gen=None): + + ''' + Returns data. + + :arg key: Key + :type key: str + + :arg gen: Generation + :type gen: int + ''' + + keys = super(Hybrid, self)._getData('key') + data = super(Hybrid, self).getData(key) + + if gen is not None: + data_ = [] + for k, d in zip(keys, data): + g, _ = k + if g == gen: + data_.append(d) + data = np.array(data_) + return data + + def getKeys(self, gen=None): + + ''' + Returns keys. + + :arg gen: Generation number. + :type gen: int + ''' + + return self.getData('key', gen) + + def getLabels(self, gen=None): + + ''' + Returns labels. + + :arg gen: Generation number. + :type gen: int + ''' + + keys = self.getKeys(gen) + labels = ['%d_%d' % tuple(k) for k in keys] + + return labels + + def getPotentials(self, gen=None): + + ''' + Returns potentials. + + :arg gen: Generation number. + :type gen: int + ''' + + return self.getData('potential', gen) + + def getSizes(self, gen=None): + + ''' + Returns the number of unminimized conformers represented by a cluster centroid. + + :arg gen: Generation number. + :type gen: int + ''' + + return self.getData('size', gen) + + def numGenerations(self): + + 'Returns the number of generations.' + + return self._n_gens + + def numConfs(self, gen=None): + + ''' + Returns the number of conformers. + + :arg gen: Generation number. + :type gen: int + ''' + + if gen is None: + return super(Hybrid, self).numConfs() + + keys = self._getData('key') + n_confs = 0 + for g, _ in keys: + if g == gen: + n_confs += 1 + + return n_confs + + def _slice(self, indices): + + if len(indices) == 0: + raise ValueError('indices (tuple) cannot be empty') + + if self._indexer is None: + keys = self._getData('key') + entries = [[] for _ in range(self.numGenerations() + 1)] + for i, (gen, _) in enumerate(keys): + entries[gen].append(i) + + n_conf_per_gen = np.max([len(entry) for entry in entries]) + for entry in entries: + for i in range(len(entry), n_conf_per_gen): + entry.append(-1) + + indexer = self._indexer = np.array(entries) + else: + indexer = self._indexer + + full_serials = indexer[indices] + + if np.isscalar(full_serials): + index = full_serials + indices = None if index == -1 else index + else: + full_serials = full_serials.flatten() + indices = [] + for s in full_serials: + if s != -1: + indices.append(s) + indices = np.array(indices) if indices else None + + return indices + + def _getCoordsets(self, indices=None, selected=True): + + ''' + Returns the coordinate set(s) at given *indices*, which may be + an integer, a list of integers, a tuple of (generation, index), or **None**. + **None** returns all coordinate sets. For reference coordinates, use :meth:`getCoords` + method. + ''' + + if isinstance(indices, tuple): + I = self._slice(indices) + if I is None: + raise IndexError('index out of range %s' % str(indices)) + else: + I = indices + + return super(Hybrid, self)._getCoordsets(I, selected) + + def writePDBFixed(self): + + 'Write the fixed (initial) structure to a pdb file.' + + from simtk.openmm.app import PDBFile + + PDBFile.writeFile(self._topology, + self._positions, + open(self.getTitle()[:-8] + 'fixed.pdb', 'w'), + keepIds=True) + + def writePDB(self, filename=None, single=True, **kwargs): + + ''' + Write conformers in PDB format to a file. + + :arg filename: The name of the file. If it is None (default), the title of the Hybrid will be used. + :type filename: str + + :arg single: If it is True (default), then the conformers will be saved into a single PDB file with + each conformer as a model. Otherwise, a directory will be created with the filename, + and each conformer will be saved as a separate PDB fle. + :type single: bool + ''' + + if filename is None: + filename = self.getTitle() + + if single: + filename = writePDB(filename, self) + LOGGER.info('PDB file saved as %s' % filename) + else: + direc = filename + if isdir(direc): + LOGGER.warn('%s is not empty; will be flooded' % direc) + else: + mkdir(direc) + + LOGGER.info('Saving files ...') + for i, lab in enumerate(self.getLabels()): + filename = '%s/%s'%(direc, lab) + writePDB(filename, self, csets=i) + LOGGER.info('PDB files saved in %s ...'%direc) + + def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, + n_gens=5, solvent='imp', sim=True, force_field=None, temp=303.15, + t_steps_i=1000, t_steps_g=7500, + outlier=True, mzscore=3.5, **kwargs): + + ''' + Performs a ClustENM-like run. + + :arg cutoff: Cutoff distance (A) for pairwise interactions used in ANM + computations, default is 15.0 A. + :type cutoff: float + + :arg gamma: Spring constant of ANM, default is 1.0. + :type gamma: float + + :arg n_modes: Number of non-zero eigenvalues/vectors to calculate. + :type n_modes: int + + :arg n_confs: Number of new conformers to be generated based on any conformer + from the previous generation, default is 50. + :type n_confs: int + + :arg rmsd: Average RMSD of the new conformers with respect to the conformer + from which they are generated, default is 1.0 A. + A tuple of floats can be given, e.g. (1.0, 1.5, 1.5) for subsequent generations. + Note: In the case of ClustENMv1, this value is the maximum rmsd, not the average. + :type rmsd: float, tuple of floats + + :arg n_gens: Number of generations. + :type n_gens: int + + :arg solvent: Solvent model to be used. If it is set to 'imp' (default), + implicit solvent model will be used, whereas 'exp' stands for explicit solvent model. + Warning: In the case of nucleotide chains, explicit solvent model is automatically set. + :type solvent: str + + :arg padding: Padding distance to use for solvation. Default is 1.0 nm. + :type padding: float + + :arg ionicStrength: Total concentration of ions (both positive and negative) to add. + This does not include ions that are added to neutralize the system. + Default concentration is 0.0 molar. + :type ionicStrength: float + + :arg force_field: Implicit solvent force field is ('amber99sbildn.xml', 'amber99_obc.xml'). + Explicit solvent force field is ('amber14-all.xml', 'amber14/tip3pfb.xml'). + Experimental feature: Forcefields already implemented in OpenMM can be used. + :type force_field: a tuple of str + + :arg tolerance: Energy tolerance to which the system should be minimized, default is 10.0 kJ/mole. + :type tolerance: float + + :arg maxIterations: Maximum number of iterations to perform during energy minimization. + If this is 0 (default), minimization is continued until the results converge without + regard to how many iterations it takes. + :type maxIterations: int + + :arg sim: If it is True (default), a short MD simulation is performed after energy minimization. + Note: There is also a heating-up phase until the desired temperature is reached. + :type sim: bool + + :arg temp: Temperature at which the simulations are conducted, default is 303.15 K. + :type temp: float + + :arg t_steps_i: Duration of MD simulation (number of time steps) for the starting structure + following the heating-up phase, default is 1000. Each time step is 2.0 fs. + Note: Default value reduces possible drift from the starting structure. + :type t_steps_i : int + + :arg t_steps_g: Duration of MD simulations (number of time steps) to run for each conformer + following the heating-up phase, default is 7500. Each time step is 2.0 fs. + A tuple of int's can be given, e.g. (3000, 5000, 7000) for subsequent generations. + :type t_steps_g: int or tuple of int's + + :arg outlier: Exclusion of conformers detected as outliers in each generation. + Default is True for implicit solvent. Outliers, if any, are detected by + the modified z-scores of the conformers' potential energies over a generation. + Note: It is automatically set to False when explicit solvent model is being used. + :type outlier: bool + + :arg mzscore: Modified z-score threshold to label conformers as outliers. Default is 3.5. + :type mzscore: float + + :arg v1: Original sampling method with complete enumeration of desired ANM modes is used. + Default is False. Maximum number of modes should not exceed 5 for efficiency. + :type v1: bool + + :arg platform: Architecture on which the OpenMM part runs, default is None. + It can be chosen as 'CUDA', 'OpenCL' or 'CPU'. + For efficiency, 'CUDA' or 'OpenCL' is recommended. + :type platform: str + + :arg parallel: If it is True (default is False), conformer generation will be parallelized. + :type parallel: bool + ''' + + if self._isBuilt(): + raise ValueError('ClustENM ensemble has been built; please start a new instance') + + # set up parameters + self._cutoff = cutoff + self._n_modes = n_modes + self._gamma = gamma + self._n_confs = n_confs + self._rmsd = (0.,) + rmsd if isinstance(rmsd, tuple) else (0.,) + (rmsd,) * n_gens + self._n_gens = n_gens + self._platform = kwargs.pop('platform', None) + self._parallel = kwargs.pop('parallel', False) + self._targeted = kwargs.pop('targeted', False) + self._tmdk = kwargs.pop('tmdk', 15.) + + self._sol = solvent if self._nuc is None else 'exp' + self._padding = kwargs.pop('padding', 1.0) + self._ionicStrength = kwargs.pop('ionicStrength', 0.0) + if self._sol == 'imp': + self._force_field = ('amber99sbildn.xml', 'amber99_obc.xml') if force_field is None else force_field + if self._sol == 'exp': + self._force_field = ('amber14-all.xml', 'amber14/tip3pfb.xml') if force_field is None else force_field + self._tolerance = kwargs.pop('tolerance', 10.0) + self._maxIterations = kwargs.pop('maxIterations', 0) + self._sim = sim + self._temp = temp + + if self._sim: + if isinstance(t_steps_g, tuple): + self._t_steps = (t_steps_i,) + t_steps_g + else: + self._t_steps = (t_steps_i,) + (t_steps_g,) * n_gens + + self._outlier = False if self._sol == 'exp' else outlier + self._mzscore = mzscore + self._v1 = kwargs.pop('v1', False) + + self._cycle = 0 + + # check for discontinuity in the structure + gnm = GNM() + gnm.buildKirchhoff(self._atoms[self._idx_cg], cutoff=self._cutoff) + K = gnm.getKirchhoff() + rank_diff = (len(K) - 1 + - np.linalg.matrix_rank(K, tol=ZERO, hermitian=True)) + if rank_diff != 0: + raise ValueError('atoms has disconnected parts; please check the structure') + + LOGGER.timeit('_clustenm_overall') + + LOGGER.info('Generation 0 ...') + + if self._sim: + if self._t_steps[0] != 0: + LOGGER.info('Minimization, heating-up & simulation in generation 0 ...') + else: + LOGGER.info('Minimization & heating-up in generation 0 ...') + else: + LOGGER.info('Minimization in generation 0 ...') + LOGGER.timeit('_clustenm_min') + potential, conformer = self._min_sim(self._atoms.getCoords()) + if np.isnan(potential): + raise ValueError('Initial structure could not be minimized. Try again and/or check your structure.') + + LOGGER.report(label='_clustenm_min') + + LOGGER.info('#' + '-' * 19 + '/*\\' + '-' * 19 + '#') + + self.setCoords(conformer) + + potentials = [potential] + sizes = [1] + new_shape = [1] + for s in conformer.shape: + new_shape.append(s) + conf = conformer.reshape(new_shape) + conformers = start_confs = conf + keys = [(0, 0)] + + for i in range(1, self._n_gens+1): + self._cycle += 1 + LOGGER.info('Generation %d ...' % i) + confs, weights = self._generate(start_confs) + if self._sim: + if self._t_steps[i] != 0: + LOGGER.info('Minimization, heating-up & simulation in generation %d ...' % i) + else: + LOGGER.info('Minimization & heating-up in generation %d ...' % i) + else: + LOGGER.info('Minimization in generation %d ...' % i) + LOGGER.timeit('_clustenm_min_sim') + + pot_conf = [self._min_sim(conf) for conf in confs] + + LOGGER.report('Structures were sampled in %.2fs.', + label='_clustenm_min_sim') + LOGGER.info('#' + '-' * 19 + '/*\\' + '-' * 19 + '#') + + pots, confs = list(zip(*pot_conf)) + idx = np.logical_not(np.isnan(pots)) + weights = np.array(weights)[idx] + pots = np.array(pots)[idx] + confs = np.array(confs)[idx] + + if self._outlier: + idx = np.logical_not(self._outliers(pots)) + else: + idx = np.full(pots.size, True, dtype=bool) + + sizes.extend(weights[idx]) + potentials.extend(pots[idx]) + start_confs = self._superpose_cg(confs[idx]) + + for j in range(start_confs.shape[0]): + keys.append((i, j)) + conformers = np.vstack((conformers, start_confs)) + + LOGGER.timeit('_clustenm_ens') + LOGGER.info('Creating an ensemble of conformers ...') + + self._build(conformers, keys, potentials, sizes) + LOGGER.report('Ensemble was created in %.2fs.', label='_clustenm_ens') + + self._time = LOGGER.timing(label='_clustenm_overall') + LOGGER.report('All completed in %.2fs.', label='_clustenm_overall') + + def writeParameters(self, filename=None): + + ''' + Write the parameters defined to a text file. + + :arg filename: The name of the file. If it is None (default), the title of the Hybrid will be used. + :type filename: str + ''' + + title = self.getTitle() + if filename is None: + filename = '%s_parameters.txt' % title + + with open(filename, 'w') as f: + f.write('title = %s\n' % title) + f.write('pH = %4.2f\n' % self._ph) + f.write('cutoff = %4.2f A\n' % self._cutoff) + f.write('n_modes = %d\n' % self._n_modes) + if not self._v1: + f.write('n_confs = %d\n' % self._n_confs) + f.write('rmsd = (%s)\n' % ', '.join([str(i) + ' A' for i in self._rmsd[1:]])) + f.write('n_gens = %d\n' % self._n_gens) + if self._threshold is not None: + f.write('threshold = %s\n' % str(self._threshold[1:])) + if self._maxclust is not None: + f.write('maxclust = %s\n' % str(self._maxclust[1:])) + f.write('solvent = %slicit\n' % self._sol) + if self._sol == 'exp': + f.write('padding = %4.2f nm\n' % self._padding) + if self._ionicStrength != 0.0: + f.write('ionicStrength = %4.2f molar\n' % self._ionicStrength) + f.write('force_field = (%s, %s)\n' % self._force_field) + f.write('tolerance = %4.2f kJ/mole\n' % self._tolerance) + if self._maxIterations != 0: + f.write('maxIteration = %d\n' % self._maxIterations) + if self._sim: + f.write('temp = %4.2f K\n' % self._temp) + f.write('t_steps = %s\n' % str(self._t_steps)) + if self._outlier: + f.write('outlier = %s\n' % self._outlier) + f.write('mzscore = %4.2f\n' % self._mzscore) + if self._v1: + f.write('v1 = %s\n' % self._v1) + if self._platform is not None: + f.write('platform = %s\n' % self._platform) + else: + f.write('platform = Default\n') + if self._parallel: + f.write('parallel = %s\n' % self._parallel) + + f.write('total time = %4.2f s' % self._time) + From 830d84e7bc913dec66f6c10a074056946dbc8879 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 16 Feb 2021 16:43:21 +0000 Subject: [PATCH 02/34] adaptive ANM hybrid method --- prody/dynamics/__init__.py | 4 - prody/hybrid/__init__.py | 3 + prody/{dynamics => hybrid}/adaptive.py | 220 ++++++++++++++++++++++++- 3 files changed, 216 insertions(+), 11 deletions(-) rename prody/{dynamics => hybrid}/adaptive.py (67%) diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py index c7359e5de..77fe83969 100644 --- a/prody/dynamics/__init__.py +++ b/prody/dynamics/__init__.py @@ -339,10 +339,6 @@ from .signature import * __all__.extend(signature.__all__) -from . import adaptive -from .adaptive import * -__all__.extend(adaptive.__all__) - from . import essa from .essa import * __all__.extend(essa.__all__) diff --git a/prody/hybrid/__init__.py b/prody/hybrid/__init__.py index b3ffb11fc..4f049cf93 100644 --- a/prody/hybrid/__init__.py +++ b/prody/hybrid/__init__.py @@ -17,3 +17,6 @@ from prody.ensemble import functions functions.ClustENM = ClustENM +from . import adaptive +from .adaptive import * +__all__.extend(adaptive.__all__) diff --git a/prody/dynamics/adaptive.py b/prody/hybrid/adaptive.py similarity index 67% rename from prody/dynamics/adaptive.py rename to prody/hybrid/adaptive.py index 801355495..4e7bd45a2 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/hybrid/adaptive.py @@ -1,20 +1,39 @@ # -*- coding: utf-8 -*- """This module defines functions for performing adaptive ANM.""" -from prody.atomic import Atomic, AtomMap +__author__ = 'James Krieger' +__credits__ = ['Hongchun Li', 'She Zhang', 'Burak Kaynak'] +__email__ = ['jamesmkrieger@gmail.com', 'hongchun28@gmail.com', 'shz66@pitt.edu', 'burak.kaynak@pitt.edu'] + +from itertools import product +from multiprocessing import cpu_count, Pool +from collections import OrderedDict +from os import chdir, mkdir +from os.path import isdir +from prody.measure.transform import calcTransformation +from prody.measure.measure import calcDeformVector +from sys import stdout + import time from numbers import Integral, Number import numpy as np from prody import LOGGER -from prody.utilities import getCoords, importLA -from prody.measure import calcRMSD, calcDistance, superpose +from prody.atomic import Atomic, AtomMap +from prody.utilities import getCoords, createStringIO, importLA, mad + +from prody.measure import calcRMSD, calcDistance, superpose, applyTransformation +from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream from prody.ensemble import Ensemble -from .functions import calcENM -from .modeset import ModeSet +from prody.dynamics.functions import calcENM +from prody.dynamics.modeset import ModeSet +from prody.dynamics.nma import NMA -__all__ = ['calcAdaptiveANM', 'AANM_ONEWAY', 'AANM_ALTERNATING', 'AANM_BOTHWAYS', 'AANM_DEFAULT'] +from .hybrid import Hybrid + +__all__ = ['calcAdaptiveANM', 'AANM_ONEWAY', 'AANM_ALTERNATING', 'AANM_BOTHWAYS', 'AANM_DEFAULT', + 'AdaptiveHybrid'] AANM_ALTERNATING = 0 AANM_ONEWAY = 1 @@ -88,7 +107,7 @@ def calcStep(initial, target, n_modes, ensemble, defvecs, rmsds, mask=None, call weights = ensemble.getWeights() if weights is not None: weights = weights.flatten() - #coords_init, _ = superpose(initial, target, weights) # we should keep this off otherwise RMSD calculations are off + coords_init = initial coords_tar = target @@ -464,3 +483,190 @@ def calcBothWaysAdaptiveANM(a, b, n_steps, **kwargs): return ensemble +class AdaptiveHybrid(Hybrid): + ''' + This is a new version of the Adaptive ANM transition sampling algorithm [ZY09]_, + that is an ANM-based hybrid algorithm. It requires PDBFixer and OpenMM for performing + energy minimization and MD simulations in implicit/explicit solvent. + + Instantiate a ClustENM object. + ''' + def __init__(self, title): + super().__init__(title=title) + self._atomsB = None + self._defvecs = [] + self._resetFmin = True + self._rmsds = [] + self._cg_ens = Ensemble(title=title) + + def _sample(self, conf, **kwargs): + + tmp = self._atoms.copy() + tmp.setCoords(conf) + cg = tmp[self._idx_cg] + + anm_cg = self._buildANM(cg) + + if not self._checkANM(anm_cg): + return None + + tmpB = self._atomsB.copy() + cgB = tmpB[self._idx_cg] + + coordsA, coordsB, title, atoms, weights, maskA, maskB, rmsd = checkInput(cg, cgB, **kwargs) + coordsA = coordsA.copy() + + anm_cg = [] + self._n_modes = calcStep(coordsA, coordsB, self._n_modes, self._cg_ens, self._defvecs, self._rmsds, mask=maskA, + resetFmin=self._resetFmin, **kwargs) + self._resetFmin = False + + defvec = calcDeformVector(cg, self._cg_ens.getCoordsets()[-1]) + model = NMA() + model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) + model_ex = self._extendModel(model, cg, tmp) + coordsets = [tmp.getCoords() + model_ex.getEigvecs()[0]] + + if self._targeted: + if self._parallel: + with Pool(cpu_count()) as p: + pot_conf = p.map(self._multi_targeted_sim, + [(conf, coords) for coords in coordsets]) + else: + pot_conf = [self._multi_targeted_sim((conf, coords)) for coords in coordsets] + + pots, poses = list(zip(*pot_conf)) + + idx = np.logical_not(np.isnan(pots)) + coordsets = np.array(poses)[idx] + + LOGGER.debug('%d/%d sets of coordinates were moved to the target' % (len(poses), len(coordsets))) + + return coordsets + + def setAtoms(self, atomsA, atomsB, pH=7.0, **kwargs): + aligned = kwargs.get('aligned', False) + if not aligned: + T = calcTransformation(atomsA.ca, atomsB.ca, weights=atomsA.ca.getFlags("mapped")) + _ = applyTransformation(T, atomsA) + + if self._isBuilt(): + super(Hybrid, self).setAtoms(atomsA) + self._atomsB = atomsB + else: + for i, atoms in enumerate([atomsA, atomsB]): + atoms = atoms.select('not hetatm') + + self._nuc = atoms.select('nucleotide') + + if self._nuc is not None: + + idx_p = [] + for c in self._nuc.getChids(): + tmp = self._nuc[c].iterAtoms() + for a in tmp: + if a.getName() in ['P', 'OP1', 'OP2', 'OP3']: + idx_p.append(a.getIndex()) + + if idx_p: + nsel = 'not index ' + ' '.join([str(i) for i in idx_p]) + atoms = atoms.select(nsel) + + LOGGER.info('Fixing structure {0}...'.format(i)) + LOGGER.timeit('_clustenm_fix') + self._ph = pH + self._fix(atoms, i) + LOGGER.report('The structure was fixed in %.2fs.', + label='_clustenm_fix') + + if self._nuc is None: + self._idx_cg = self._atoms.ca.getIndices() + self._n_cg = self._atoms.ca.numAtoms() + else: + self._idx_cg = self._atoms.select("name CA C2 C4' P").getIndices() + self._n_cg = self._atoms.select("name CA C2 C4' P").numAtoms() + + self._n_atoms = self._atoms.numAtoms() + self._indices = None + + self._cg_ens.setAtoms(self._atoms[self._idx_cg]) + + def _fix(self, atoms, i): + try: + from pdbfixer import PDBFixer + from simtk.openmm.app import PDBFile + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + + stream = createStringIO() + title = atoms.getTitle() + writePDBStream(stream, atoms) + stream.seek(0) + fixed = PDBFixer(pdbfile=stream) + stream.close() + + fixed.missingResidues = {} + fixed.findNonstandardResidues() + fixed.replaceNonstandardResidues() + fixed.removeHeterogens(False) + fixed.findMissingAtoms() + fixed.addMissingAtoms() + fixed.addMissingHydrogens(self._ph) + + stream = createStringIO() + PDBFile.writeFile(fixed.topology, fixed.positions, + stream, keepIds=True) + stream.seek(0) + if i == 0: + self._atoms = parsePDBStream(stream) + self._atoms.setTitle(title) + else: + self._atomsB = parsePDBStream(stream) + self._atomsB.setTitle(title) + stream.close() + + self._topology = fixed.topology + self._positions = fixed.positions + + def getAtomsA(self): + + 'Returns atoms for structure A (main atoms).' + + return self._atoms + + def getAtomsB(self): + + 'Returns atoms for structure B.' + + return self._atomsB + + def getRMSDsB(self): + if self._confs is None or self._coords is None: + return None + + indices = self._indices + if indices is None: + indices = np.arange(self._confs.shape[1]) + + weights = self._weights[indices] if self._weights is not None else None + + return calcRMSD(self._atomsB, self._confs[:, indices], weights) + + def _generate(self, confs): + + LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) + LOGGER.timeit('_clustenm_gen') + + sample_method = self._sample + + if self._parallel: + with Pool(cpu_count()) as p: + tmp = p.map(sample_method, [conf for conf in confs]) + else: + tmp = [sample_method(conf) for conf in confs] + + tmp = [r for r in tmp if r is not None] + + confs_ex = np.concatenate(tmp) + + return confs_ex, [1] \ No newline at end of file From 21b005e62d375a86cb80b5aeb1cafec2aefe74ab Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 16 Feb 2021 18:13:43 +0000 Subject: [PATCH 03/34] added filename to writePDBFixed --- prody/hybrid/hybrid.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/prody/hybrid/hybrid.py b/prody/hybrid/hybrid.py index 0ade19c56..4959432c3 100644 --- a/prody/hybrid/hybrid.py +++ b/prody/hybrid/hybrid.py @@ -707,15 +707,18 @@ def _getCoordsets(self, indices=None, selected=True): return super(Hybrid, self)._getCoordsets(I, selected) - def writePDBFixed(self): + def writePDBFixed(self, filename=None): 'Write the fixed (initial) structure to a pdb file.' from simtk.openmm.app import PDBFile + if filename is None: + filename = self.getTitle()[:-8] + 'fixed.pdb' + PDBFile.writeFile(self._topology, self._positions, - open(self.getTitle()[:-8] + 'fixed.pdb', 'w'), + open(filename, 'w'), keepIds=True) def writePDB(self, filename=None, single=True, **kwargs): From d925e9b8261a61aae3a631a57319c87504dfb0c7 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 17 Feb 2021 10:43:57 +0000 Subject: [PATCH 04/34] saveEnsemble works for AdaptiveHybrid --- prody/ensemble/functions.py | 42 ++++++++++++++++++++++++++++++++++--- prody/hybrid/__init__.py | 10 +++++---- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index 8edad3742..bc823945c 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -35,17 +35,21 @@ def saveEnsemble(ensemble, filename=None, **kwargs): raise ValueError('ensemble instance does not contain data') dict_ = ensemble.__dict__ + atomsB = None attr_list = ['_title', '_confs', '_weights', '_coords', '_indices'] if isinstance(ensemble, PDBEnsemble): attr_list.append('_labels') attr_list.append('_trans') - elif isinstance(ensemble, ClustENM): + elif isinstance(ensemble, Hybrid): attr_list.extend(['_ph', '_cutoff', '_gamma', '_n_modes', '_n_confs', '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_padding', '_ionicStrength', '_force_field', '_tolerance', '_maxIterations', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_cg', '_n_cg', '_cycle', '_time', '_targeted', '_tmdk']) + if isinstance(ensemble, AdaptiveHybrid): + attr_list.extend(['_atomsB', '_defvecs', '_resetFmin', '_rmsds']) + atomsB = dict_['_atomsB'] if filename is None: filename = ensemble.getTitle().replace(' ', '_') @@ -59,6 +63,9 @@ def saveEnsemble(ensemble, filename=None, **kwargs): if atoms is not None: attr_dict['_atoms'] = np.array([atoms, None], dtype=object) + if atomsB is not None: + attr_dict['_atomsB'] = np.array([atomsB, None], + dtype=object) data = dict_['_data'] if len(data): @@ -129,6 +136,10 @@ def loadEnsemble(filename, **kwargs): ensemble = PDBEnsemble(title) elif type_ == 'ClustENM': ensemble = ClustENM(title) + elif type_ == 'AdaptiveHybrid': + ensemble = AdaptiveHybrid(title) + elif type_ == 'Hybrid': + ensemble = Hybrid(title) else: ensemble = Ensemble(title) @@ -152,7 +163,7 @@ def loadEnsemble(filename, **kwargs): if '_msa' in attr_dict.files: ensemble._msa = attr_dict['_msa'][0] else: - if type_ == 'ClustENM': + if type_ in ['ClustENM', 'Hybrid', 'AdaptiveHybrid']: attrs = ['_ph', '_cutoff', '_gamma', '_n_modes', '_n_confs', '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', @@ -162,6 +173,13 @@ def loadEnsemble(filename, **kwargs): for attr in attrs: if attr in attr_dict.files: setattr(ensemble, attr, attr_dict[attr]) + + if type_ == 'AdaptiveHybrid': + attrs = ['_atomsB', '_defvecs', '_resetFmin', '_rmsds'] + + for attr in attrs: + if attr in attr_dict.files: + setattr(ensemble, attr, attr_dict[attr]) ensemble.addCoordset(confs) if weights is not None: @@ -183,7 +201,25 @@ def loadEnsemble(filename, **kwargs): data[key] = arr else: atoms = None - ensemble.setAtoms(atoms) + + if type_ == 'AdaptiveHybrid': + atomsB = attr_dict['_atomsB'][0] + + if isinstance(atomsB, AtomGroup): + dataB = atomsB._data + else: + dataB = atomsB._ag._data + + for key in dataB: + arrB = dataB[key] + char = arrB.dtype.char + if char in 'SU' and char != DTYPE: + arrB = arrB.astype(str) + dataB[key] = arrB + + ensemble.setAtoms(atoms, atomsB) + else: + ensemble.setAtoms(atoms) if '_indices' in attr_dict: indices = attr_dict['_indices'] diff --git a/prody/hybrid/__init__.py b/prody/hybrid/__init__.py index 4f049cf93..f4da8dd3a 100644 --- a/prody/hybrid/__init__.py +++ b/prody/hybrid/__init__.py @@ -13,10 +13,12 @@ from .clustenm import * __all__.extend(clustenm.__all__) -# workaround for circular dependency to accommodate original design style -from prody.ensemble import functions -functions.ClustENM = ClustENM - from . import adaptive from .adaptive import * __all__.extend(adaptive.__all__) + +# workaround for circular dependency to accommodate original design style +from prody.ensemble import functions +functions.Hybrid = Hybrid +functions.ClustENM = ClustENM +functions.AdaptiveHybrid = AdaptiveHybrid From a83133a1e015ed5d1b4a3dbd1ff24b784560da6a Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 17 Feb 2021 12:33:26 +0000 Subject: [PATCH 05/34] setAtoms fixes --- prody/ensemble/functions.py | 4 +-- prody/hybrid/adaptive.py | 23 +++++++++-------- prody/hybrid/clustenm.py | 6 ++--- prody/hybrid/hybrid.py | 50 ++++++++++++++++++++++++++++++------- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index bc823945c..282c45abf 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -46,7 +46,7 @@ def saveEnsemble(ensemble, filename=None, **kwargs): '_padding', '_ionicStrength', '_force_field', '_tolerance', '_maxIterations', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_cg', '_n_cg', '_cycle', - '_time', '_targeted', '_tmdk']) + '_time', '_targeted', '_tmdk', '_topology', '_positions']) if isinstance(ensemble, AdaptiveHybrid): attr_list.extend(['_atomsB', '_defvecs', '_resetFmin', '_rmsds']) atomsB = dict_['_atomsB'] @@ -168,7 +168,7 @@ def loadEnsemble(filename, **kwargs): '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_ca', '_n_ca', '_cycle', '_time', '_targeted', - '_tmdk'] + '_tmdk', '_topology', '_positions'] for attr in attrs: if attr in attr_dict.files: diff --git a/prody/hybrid/adaptive.py b/prody/hybrid/adaptive.py index 4e7bd45a2..21fc386eb 100644 --- a/prody/hybrid/adaptive.py +++ b/prody/hybrid/adaptive.py @@ -544,9 +544,9 @@ def _sample(self, conf, **kwargs): return coordsets - def setAtoms(self, atomsA, atomsB, pH=7.0, **kwargs): + def setAtoms(self, atomsA, atomsB=None, pH=7.0, **kwargs): aligned = kwargs.get('aligned', False) - if not aligned: + if not aligned and atomsB is not None: T = calcTransformation(atomsA.ca, atomsB.ca, weights=atomsA.ca.getFlags("mapped")) _ = applyTransformation(T, atomsA) @@ -555,6 +555,9 @@ def setAtoms(self, atomsA, atomsB, pH=7.0, **kwargs): self._atomsB = atomsB else: for i, atoms in enumerate([atomsA, atomsB]): + if i == 1 and atomsB is None: + break + atoms = atoms.select('not hetatm') self._nuc = atoms.select('nucleotide') @@ -628,17 +631,17 @@ def _fix(self, atoms, i): self._topology = fixed.topology self._positions = fixed.positions - def getAtomsA(self): - + def getAtomsA(self, selected=True): 'Returns atoms for structure A (main atoms).' + return super(AdaptiveHybrid, self).getAtoms(selected) - return self._atoms - - def getAtomsB(self): - + def getAtomsB(self, selected=True): 'Returns atoms for structure B.' - - return self._atomsB + if self._atomsB is None: + return None + if self._indices is None or not selected: + return self._atomsB + return self._atomsB[self._indices] def getRMSDsB(self): if self._confs is None or self._coords is None: diff --git a/prody/hybrid/clustenm.py b/prody/hybrid/clustenm.py index f8ae1b4b5..4b4ba3f3b 100644 --- a/prody/hybrid/clustenm.py +++ b/prody/hybrid/clustenm.py @@ -125,11 +125,9 @@ def __getitem__(self, index): return super(ClustENM, self).__getitem__(index) - def getAtoms(self): - + def getAtoms(self, selected=True): 'Returns atoms.' - - return self._atoms + return super(ClustENM, self).getAtoms(selected) def _isBuilt(self): diff --git a/prody/hybrid/hybrid.py b/prody/hybrid/hybrid.py index 4959432c3..b3244dd05 100644 --- a/prody/hybrid/hybrid.py +++ b/prody/hybrid/hybrid.py @@ -36,6 +36,7 @@ from scipy.cluster.hierarchy import fcluster, linkage from prody import LOGGER + from prody.dynamics.anm import ANM from prody.dynamics.gnm import GNM, ZERO from prody.dynamics.rtb import RTB @@ -43,11 +44,12 @@ from prody.dynamics.exanm import exANM from prody.dynamics.editing import extendModel from prody.dynamics.sampling import sampleModes -from prody.atomic import AtomGroup + +from prody.atomic import AtomGroup, Atomic from prody.measure import calcTransformation, applyTransformation, calcRMSD from prody.ensemble import Ensemble -from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream -from prody.utilities import createStringIO, importLA, mad +from prody.proteins import writePDB, writePDBStream, parsePDBStream, EMDMAP +from prody.utilities import createStringIO, importLA, mad, getCoords la = importLA() norm = la.norm @@ -115,17 +117,15 @@ def __getitem__(self, index): return super(Hybrid, self).__getitem__(index) - def getAtoms(self): - + def getAtoms(self, selected=True): 'Returns atoms.' - - return self._atoms + return super(Hybrid, self).getAtoms(selected) def _isBuilt(self): return self._confs is not None - def setAtoms(self, atoms, pH=7.0): + def setAtoms(self, atoms): ''' Sets atoms. @@ -135,6 +135,7 @@ def setAtoms(self, atoms, pH=7.0): :arg pH: pH based on which to select protonation states for adding missing hydrogens, default is 7.0. :type pH: float ''' + super(Hybrid, self).setAtoms(atoms) def getTitle(self): @@ -164,7 +165,38 @@ def setTitle(self, title): self._title = title def _fix(self, atoms): - '''''' + + try: + from pdbfixer import PDBFixer + from simtk.openmm.app import PDBFile + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + + stream = createStringIO() + title = atoms.getTitle() + writePDBStream(stream, atoms) + stream.seek(0) + fixed = PDBFixer(pdbfile=stream) + stream.close() + + fixed.missingResidues = {} + fixed.findNonstandardResidues() + fixed.replaceNonstandardResidues() + fixed.removeHeterogens(False) + fixed.findMissingAtoms() + fixed.addMissingAtoms() + fixed.addMissingHydrogens(self._ph) + + stream = createStringIO() + PDBFile.writeFile(fixed.topology, fixed.positions, + stream, keepIds=True) + stream.seek(0) + self._atoms = parsePDBStream(stream) + self._atoms.setTitle(title) + stream.close() + + self._topology = fixed.topology + self._positions = fixed.positions def _prep_sim(self, coords, external_forces=[]): From fac7df20e9958b21ecb8fbdca97451cd92f66c3d Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 17 Feb 2021 13:36:44 +0000 Subject: [PATCH 06/34] added ensemble filtering --- prody/ensemble/ensemble.py | 98 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/prody/ensemble/ensemble.py b/prody/ensemble/ensemble.py index 5f1426566..06e0ab8e2 100644 --- a/prody/ensemble/ensemble.py +++ b/prody/ensemble/ensemble.py @@ -11,6 +11,7 @@ from prody.atomic import Atomic, sliceAtoms from prody.atomic.atomgroup import checkLabel from prody.measure import getRMSD, calcDeformVector +from prody.proteins import EMDMAP from prody.utilities import importLA, checkCoords, checkWeights, copy, isListLike from .conformation import * @@ -899,3 +900,100 @@ def getDataType(self, label): return self._data[label].dtype except KeyError: return None + + def filterConformers(self, ref=None, target=None, **kwargs): + """Filter conformers to only include those towards *target*. + + If target is an atomic object, then RMSD is used. + If it's an EM density map, then deltaCCC is used.""" + if ref is None: + ref = self.getAtoms() + if ref is None: + raise ValueError('Please set a ref for filtering.') + else: + LOGGER.info('Using structure A as ref') + + if target is None: + if hasattr(self, 'getAtomsB'): + LOGGER.info('Using structure B as target') + target = self.getAtomsB() + if target is None: + raise ValueError('Please set a target for filtering.') + + if self._confs is None or self._coords is None: + return None + + n = 0 + n_csets = self.numCoordsets() + + if isinstance(target, Atomic): + LOGGER.info('Using Atomic target and RMSD for filtering') + + indices = self._indices + if indices is None: + indices = np.arange(self._confs.shape[1]) + + weights = self._weights[indices] if self._weights is not None else None + rmsds = calcRMSD(target, self._confs[:, indices], weights) + rmsd_ref = calcRMSD(target, ref) + + LOGGER.progress('Filtering the ensemble...', n_csets, '_prody_FilterEnsemble') + for i, rmsd in enumerate(reversed(rmsds)): + curr_conf = n_csets - i - 1 + LOGGER.update(i + 1, label='_prody_FilterEnsemble') + if rmsd > rmsd_ref: + self.delCoordset(curr_conf) + n += 1 + else: + try: + from TEMPy.maps.em_map import Map + from TEMPy.protein.structure_blurrer import StructureBlurrer + from TEMPy.protein.scoring_functions import ScoringFunctions + except: + raise ImportError("TEMPy is needed to filter by a non-Atomic target") + + if isinstance(target, EMDMAP): + target = target.toTEMPyMap() + + if isinstance(target, Map): + LOGGER.info('Using EM map target and CCC for filtering') + + resolution = kwargs.get('resolution', 5) + blurrer = StructureBlurrer() + scorer = ScoringFunctions() + + if not isinstance(ref, Atomic): + coords = getCoords(ref) + ref = self.atoms.copy() + ref.setCoords(coords) + + ref_tempy = ref.toTEMPyStructure() + ref_blurred = blurrer.gaussian_blur(ref_tempy, resolution, target, + filename=ref.getTitle()+'.mrc') + ref_ccc = scorer.CCC(ref_blurred, target) + + conf = self.getAtoms().copy() + + LOGGER.progress('Filtering the ensemble...', n_csets, '_prody_FilterEnsemble') + for i, coordset in enumerate(reversed(self.getCoordsets())): + curr_conf = n_csets - i - 1 + LOGGER.update(i + 1, label='_prody_FilterEnsemble') + conf.setCoords(coordset) + blurred_conf = blurrer.gaussian_blur(conf.toTEMPyStructure(), + resolution, target, + filename='{0}_{1}'.format(conf.getTitle(), + curr_conf)) + ccc = scorer.CCC(blurred_conf, target) + if ccc < ref_ccc: + self.delCoordset(curr_conf) + n += 1 + else: + raise TypeError('target should be an Atomic or EM density map object') + + LOGGER.finish() + if n == 1: + LOGGER.info('{0} conformer removed, leaving {1} conformers remaining'.format(n, self.numConfs())) + elif self.numConfs() == 1: + LOGGER.info('{0} conformers removed, leaving {1} conformer remaining'.format(n, self.numConfs())) + else: + LOGGER.info('{0} conformers removed, leaving {1} conformers remaining'.format(n, self.numConfs())) From c7cab3886f60e0b8d72eea17c6d4ded73b3f7f07 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 17 Feb 2021 14:12:20 +0000 Subject: [PATCH 07/34] import hybrid before ensemble --- prody/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prody/__init__.py b/prody/__init__.py index f2347a35d..cf6ff2b10 100644 --- a/prody/__init__.py +++ b/prody/__init__.py @@ -115,6 +115,11 @@ def turnonDepracationWarnings(action='always'): __all__.extend(dynamics.__all__) __all__.append('dynamics') +from . import hybrid +from .hybrid import * +__all__.extend(hybrid.__all__) +__all__.append('hybrid') + from . import ensemble from .ensemble import * __all__.extend(ensemble.__all__) @@ -130,11 +135,6 @@ def turnonDepracationWarnings(action='always'): __all__.extend(chromatin.__all__) __all__.append('chromatin') -from . import hybrid -from .hybrid import * -__all__.extend(hybrid.__all__) -__all__.append('hybrid') - from . import domain_decomposition from .domain_decomposition import * __all__.extend(domain_decomposition.__all__) From 003d486e140d7c8ee1dfe73fa1f7fff5702905b0 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 17 Feb 2021 14:25:59 +0000 Subject: [PATCH 08/34] tightened imports --- prody/hybrid/adaptive.py | 8 ++++---- prody/hybrid/clustenm.py | 8 +++----- prody/hybrid/hybrid.py | 14 +++++--------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/prody/hybrid/adaptive.py b/prody/hybrid/adaptive.py index 21fc386eb..da267d040 100644 --- a/prody/hybrid/adaptive.py +++ b/prody/hybrid/adaptive.py @@ -20,11 +20,11 @@ from prody import LOGGER from prody.atomic import Atomic, AtomMap -from prody.utilities import getCoords, createStringIO, importLA, mad +from prody.utilities import getCoords, createStringIO, importLA -from prody.measure import calcRMSD, calcDistance, superpose, applyTransformation -from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream -from prody.ensemble import Ensemble +from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD +from prody.ensemble.ensemble import Ensemble +from prody.proteins.pdbfile import writePDBStream, parsePDBStream from prody.dynamics.functions import calcENM from prody.dynamics.modeset import ModeSet diff --git a/prody/hybrid/clustenm.py b/prody/hybrid/clustenm.py index 4b4ba3f3b..1d0d54074 100644 --- a/prody/hybrid/clustenm.py +++ b/prody/hybrid/clustenm.py @@ -43,11 +43,9 @@ from prody.dynamics.exanm import exANM from prody.dynamics.editing import extendModel from prody.dynamics.sampling import sampleModes -from prody.atomic import AtomGroup -from prody.measure import calcTransformation, applyTransformation, calcRMSD -from prody.ensemble import Ensemble -from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream -from prody.utilities import createStringIO, importLA, mad + +from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD +from prody.proteins.pdbfile import writePDB, writePDBStream, parsePDBStream from .hybrid import Hybrid diff --git a/prody/hybrid/hybrid.py b/prody/hybrid/hybrid.py index b3244dd05..2809e6d51 100644 --- a/prody/hybrid/hybrid.py +++ b/prody/hybrid/hybrid.py @@ -39,17 +39,13 @@ from prody.dynamics.anm import ANM from prody.dynamics.gnm import GNM, ZERO -from prody.dynamics.rtb import RTB -from prody.dynamics.imanm import imANM -from prody.dynamics.exanm import exANM from prody.dynamics.editing import extendModel -from prody.dynamics.sampling import sampleModes -from prody.atomic import AtomGroup, Atomic -from prody.measure import calcTransformation, applyTransformation, calcRMSD -from prody.ensemble import Ensemble -from prody.proteins import writePDB, writePDBStream, parsePDBStream, EMDMAP -from prody.utilities import createStringIO, importLA, mad, getCoords +from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD +from prody.ensemble.ensemble import Ensemble +from prody.proteins.pdbfile import writePDB, writePDBStream, parsePDBStream + +from prody.utilities import createStringIO, importLA, mad la = importLA() norm = la.norm From d59c9f2b95beecf5d561cf6c6a1a3d6347731a93 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 18 Feb 2021 09:21:11 +0000 Subject: [PATCH 09/34] added direction mode --- prody/hybrid/__init__.py | 4 ++ prody/hybrid/adaptive.py | 110 ++++++++++++++++++++++++++++----------- prody/hybrid/clustenm.py | 2 + prody/hybrid/hybrid.py | 48 ++++++++++++----- 4 files changed, 119 insertions(+), 45 deletions(-) diff --git a/prody/hybrid/__init__.py b/prody/hybrid/__init__.py index f4da8dd3a..bd642413d 100644 --- a/prody/hybrid/__init__.py +++ b/prody/hybrid/__init__.py @@ -17,6 +17,10 @@ from .adaptive import * __all__.extend(adaptive.__all__) +from . import comd +from .comd import * +__all__.extend(comd.__all__) + # workaround for circular dependency to accommodate original design style from prody.ensemble import functions functions.Hybrid = Hybrid diff --git a/prody/hybrid/adaptive.py b/prody/hybrid/adaptive.py index da267d040..cfae88223 100644 --- a/prody/hybrid/adaptive.py +++ b/prody/hybrid/adaptive.py @@ -10,8 +10,6 @@ from collections import OrderedDict from os import chdir, mkdir from os.path import isdir -from prody.measure.transform import calcTransformation -from prody.measure.measure import calcDeformVector from sys import stdout import time @@ -22,7 +20,9 @@ from prody.atomic import Atomic, AtomMap from prody.utilities import getCoords, createStringIO, importLA -from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD +from prody.measure.transform import calcTransformation, superpose, applyTransformation, calcRMSD +from prody.measure.measure import calcDeformVector, calcDistance + from prody.ensemble.ensemble import Ensemble from prody.proteins.pdbfile import writePDBStream, parsePDBStream @@ -32,14 +32,14 @@ from .hybrid import Hybrid -__all__ = ['calcAdaptiveANM', 'AANM_ONEWAY', 'AANM_ALTERNATING', 'AANM_BOTHWAYS', 'AANM_DEFAULT', +__all__ = ['calcAdaptiveANM', 'ONEWAY', 'ALTERNATING', 'SERIAL', 'DEFAULT', 'AdaptiveHybrid'] -AANM_ALTERNATING = 0 -AANM_ONEWAY = 1 -AANM_BOTHWAYS = 2 +ALTERNATING = 0 +ONEWAY = 1 +SERIAL = 2 -AANM_DEFAULT = AANM_ALTERNATING +DEFAULT = ALTERNATING norm = importLA().norm @@ -261,18 +261,18 @@ def checkDisconnection(coords, cutoff): return False -def calcAdaptiveANM(a, b, n_steps, mode=AANM_DEFAULT, **kwargs): +def calcAdaptiveANM(a, b, n_steps, mode=DEFAULT, **kwargs): """Runs adaptive ANM analysis of proteins ([ZY09]_) that creates a path that connects two conformations using normal modes. This function can be run in three modes: - 1. *AANM_ONEWAY*: all steps are run in one direction: from *a* to *b*. + 1. *ONEWAY*: all steps are run in one direction: from *a* to *b*. - 2. *AANM_ALTERNATING*: steps are run in alternating directions: from *a* to *b*, + 2. *ALTERNATING*: steps are run in alternating directions: from *a* to *b*, then *b* to *a*, then back again, and so on. - 3. *AANM_BOTHWAYS*: steps are run in one direction (from *a* to + 3. *SERIAL*: steps are run in one direction (from *a* to *b*) until convergence is reached and then the other way. This also implementation differs from the original one in that it sorts the @@ -288,12 +288,12 @@ def calcAdaptiveANM(a, b, n_steps, mode=AANM_DEFAULT, **kwargs): :arg b: structure B for the transition :type b: :class:`.Atomic`, :class:`~numpy.ndarray` - :arg n_steps: the maximum number of steps to be calculated. For *AANM_BOTHWAYS*, + :arg n_steps: the maximum number of steps to be calculated. For *SERIAL*, this means the maximum number of steps from each direction :type n_steps: int - :arg mode: the way of the calculation to be performed, which can be either *AANM_ONEWAY*, - *AANM_ALTERNATING*, or *AANM_BOTHWAYS*. Default is *AANM_ALTERNATING* + :arg mode: the way of the calculation to be performed, which can be either *ONEWAY*, + *ALTERNATING*, or *SERIAL*. Default is *ALTERNATING* :type mode: int :kwarg f: step size. Default is 0.2 @@ -335,11 +335,11 @@ def calcAdaptiveANM(a, b, n_steps, mode=AANM_DEFAULT, **kwargs): Please see keyword arguments for calculating the modes in :func:`.calcENM`. """ - if mode == AANM_ONEWAY: + if mode == ONEWAY: return calcOneWayAdaptiveANM(a, b, n_steps, **kwargs) - elif mode == AANM_ALTERNATING: + elif mode == ALTERNATING: return calcAlternatingAdaptiveANM(a, b, n_steps, **kwargs) - elif mode == AANM_BOTHWAYS: + elif mode == SERIAL: return calcBothWaysAdaptiveANM(a, b, n_steps, **kwargs) else: raise ValueError('unknown aANM mode: %d'%mode) @@ -491,13 +491,14 @@ class AdaptiveHybrid(Hybrid): Instantiate a ClustENM object. ''' - def __init__(self, title): + def __init__(self, title, **kwargs): super().__init__(title=title) self._atomsB = None self._defvecs = [] - self._resetFmin = True self._rmsds = [] - self._cg_ens = Ensemble(title=title) + self._cg_ensA = Ensemble(title=title) + self._cg_ensB = Ensemble(title=title) + self._n_modes0 = self._n_modes = kwargs.pop('n_modes', 20) def _sample(self, conf, **kwargs): @@ -516,12 +517,55 @@ def _sample(self, conf, **kwargs): coordsA, coordsB, title, atoms, weights, maskA, maskB, rmsd = checkInput(cg, cgB, **kwargs) coordsA = coordsA.copy() - anm_cg = [] - self._n_modes = calcStep(coordsA, coordsB, self._n_modes, self._cg_ens, self._defvecs, self._rmsds, mask=maskA, - resetFmin=self._resetFmin, **kwargs) - self._resetFmin = False + self._direction_mode = kwargs.get('mode', DEFAULT) + + if self._direction_mode == ONEWAY: + LOGGER.info('\nStarting cycle with structure A') + self._n_modes = calcStep(coordsA, coordsB, self._n_modes, self._cg_ensA, + self._defvecs, self._rmsds, mask=maskA, + resetFmin=self._resetFmin, **kwargs) + self._resetFmin = False + cg_ens = self._cg_ensA + + elif self._direction_mode == ALTERNATING: + if self._direction == 1: + LOGGER.info('\nStarting cycle with structure A') + self._n_modes = calcStep(coordsA, coordsB, self._n_modes, self._cg_ensA, + self._defvecs, self._rmsds, mask=maskA, + resetFmin=self._resetFmin, **kwargs) + self._resetFmin = False + cg_ens = self._cg_ensA + self._direction = 2 + + else: + LOGGER.info('\nStarting cycle with structure B') + self._n_modes = calcStep(coordsB, coordsA, self._n_modes, self._cg_ensB, + self._defvecs, self._rmsds, mask=maskB, + resetFmin=self._resetFmin, **kwargs) + cg_ens = self._cg_ensB + self._direction = 1 + + elif self._direction_mode == SERIAL: + if self._direction == 1: + LOGGER.info('\nStarting cycle with structure A') + self._n_modes = calcStep(coordsA, coordsB, self._n_modes, self._cg_ensA, + self._defvecs, self._rmsds, mask=maskA, + resetFmin=self._resetFmin, **kwargs) + self._resetFmin = False + cg_ens = self._cg_ensA + + else: + LOGGER.info('\nStarting cycle with structure B') + self._n_modes = calcStep(coordsB, coordsA, self._n_modes, self._cg_ensB, + self._defvecs, self._rmsds, mask=maskB, + resetFmin=self._resetFmin, **kwargs) + self._resetFmin = False + cg_ens = self._cg_ensB + + else: + raise ValueError('unknown aANM mode: %d' % self._direction_mode) - defvec = calcDeformVector(cg, self._cg_ens.getCoordsets()[-1]) + defvec = calcDeformVector(cg, cg_ens.getCoordsets()[-1]) model = NMA() model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) model_ex = self._extendModel(model, cg, tmp) @@ -554,6 +598,7 @@ def setAtoms(self, atomsA, atomsB=None, pH=7.0, **kwargs): super(Hybrid, self).setAtoms(atomsA) self._atomsB = atomsB else: + A_B_dict = {0: 'A', 1: 'B'} for i, atoms in enumerate([atomsA, atomsB]): if i == 1 and atomsB is None: break @@ -575,7 +620,7 @@ def setAtoms(self, atomsA, atomsB=None, pH=7.0, **kwargs): nsel = 'not index ' + ' '.join([str(i) for i in idx_p]) atoms = atoms.select(nsel) - LOGGER.info('Fixing structure {0}...'.format(i)) + LOGGER.info('Fixing structure {0}...'.format(A_B_dict[i])) LOGGER.timeit('_clustenm_fix') self._ph = pH self._fix(atoms, i) @@ -592,7 +637,10 @@ def setAtoms(self, atomsA, atomsB=None, pH=7.0, **kwargs): self._n_atoms = self._atoms.numAtoms() self._indices = None - self._cg_ens.setAtoms(self._atoms[self._idx_cg]) + if i == 1: + self._cg_ensA.setAtoms(self._atoms[self._idx_cg]) + else: + self._cg_ensB.setAtoms(self._atomsB[self._idx_cg]) def _fix(self, atoms, i): try: @@ -655,7 +703,7 @@ def getRMSDsB(self): return calcRMSD(self._atomsB, self._confs[:, indices], weights) - def _generate(self, confs): + def _generate(self, confs, **kwargs): LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) LOGGER.timeit('_clustenm_gen') @@ -666,10 +714,10 @@ def _generate(self, confs): with Pool(cpu_count()) as p: tmp = p.map(sample_method, [conf for conf in confs]) else: - tmp = [sample_method(conf) for conf in confs] + tmp = [sample_method(conf, **kwargs) for conf in confs] tmp = [r for r in tmp if r is not None] confs_ex = np.concatenate(tmp) - return confs_ex, [1] \ No newline at end of file + return confs_ex, [1] diff --git a/prody/hybrid/clustenm.py b/prody/hybrid/clustenm.py index 1d0d54074..02f5b472b 100644 --- a/prody/hybrid/clustenm.py +++ b/prody/hybrid/clustenm.py @@ -47,6 +47,8 @@ from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD from prody.proteins.pdbfile import writePDB, writePDBStream, parsePDBStream +from prody.utilities import createStringIO, importLA, mad + from .hybrid import Hybrid la = importLA() diff --git a/prody/hybrid/hybrid.py b/prody/hybrid/hybrid.py index 2809e6d51..b75163a40 100644 --- a/prody/hybrid/hybrid.py +++ b/prody/hybrid/hybrid.py @@ -518,7 +518,7 @@ def _centers(self, *args): return centers, wei - def _generate(self, confs): + def _generate(self, confs, **kwargs): LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) LOGGER.timeit('_clustenm_gen') @@ -529,7 +529,7 @@ def _generate(self, confs): with Pool(cpu_count()) as p: tmp = p.map(sample_method, [conf for conf in confs]) else: - tmp = [sample_method(conf) for conf in confs] + tmp = [sample_method(conf, **kwargs) for conf in confs] tmp = [r for r in tmp if r is not None] @@ -567,11 +567,18 @@ def _superpose_cg(self, confs): return np.array(tmp1) def _build(self, conformers, keys, potentials, sizes): - - self.addCoordset(conformers) - self.setData('size', sizes) - self.setData('key', keys) - self.setData('potential', potentials) + if self._direction_mode == 0: + # Split alternating values and use forwards As then backwards Bs + self.addCoordset(conformers[::2]) + self.addCoordset(conformers[-1::-2]) + self.setData('size', sizes[::2] + sizes[-1::-2]) + self.setData('key', keys[::2] + keys[-1::-2]) + self.setData('potential', potentials[::2] + potentials[-1::-2]) + else: + self.addCoordset(conformers) + self.setData('size', sizes) + self.setData('key', keys) + self.setData('potential', potentials) def addCoordset(self, coords): @@ -883,7 +890,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, # set up parameters self._cutoff = cutoff - self._n_modes = n_modes + self._n_modes0 = self._n_modes = n_modes self._gamma = gamma self._n_confs = n_confs self._rmsd = (0.,) + rmsd if isinstance(rmsd, tuple) else (0.,) + (rmsd,) * n_gens @@ -893,6 +900,10 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, self._targeted = kwargs.pop('targeted', False) self._tmdk = kwargs.pop('tmdk', 15.) + self._direction_mode = kwargs.get('mode', None) + self._direction = 1 + self._resetFmin = True + self._sol = solvent if self._nuc is None else 'exp' self._padding = kwargs.pop('padding', 1.0) self._ionicStrength = kwargs.pop('ionicStrength', 0.0) @@ -926,7 +937,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, if rank_diff != 0: raise ValueError('atoms has disconnected parts; please check the structure') - LOGGER.timeit('_clustenm_overall') + LOGGER.timeit('_hybrid_overall') LOGGER.info('Generation 0 ...') @@ -960,7 +971,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, for i in range(1, self._n_gens+1): self._cycle += 1 LOGGER.info('Generation %d ...' % i) - confs, weights = self._generate(start_confs) + confs, weights = self._generate(start_confs, **kwargs) if self._sim: if self._t_steps[i] != 0: LOGGER.info('Minimization, heating-up & simulation in generation %d ...' % i) @@ -995,14 +1006,23 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, keys.append((i, j)) conformers = np.vstack((conformers, start_confs)) - LOGGER.timeit('_clustenm_ens') + if self._n_modes == 0 and self._direction_mode is not None: + if self._direction_mode == 2 and self._direction == 1: + self._direction = 2 + self._n_modes = self._n_modes0 + self._resetFmin = True + else: + LOGGER.report('Transition converged in %.2fs.', '_hybrid_overall') + break + + LOGGER.timeit('_hybrid_ens') LOGGER.info('Creating an ensemble of conformers ...') self._build(conformers, keys, potentials, sizes) - LOGGER.report('Ensemble was created in %.2fs.', label='_clustenm_ens') + LOGGER.report('Ensemble was created in %.2fs.', label='_hybrid_ens') - self._time = LOGGER.timing(label='_clustenm_overall') - LOGGER.report('All completed in %.2fs.', label='_clustenm_overall') + self._time = LOGGER.timing(label='_hybrid_overall') + LOGGER.report('All completed in %.2fs.', label='_hybrid_overall') def writeParameters(self, filename=None): From ff2fd5ed8f85c1ba8c970a6aa3e611660437ded5 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 18 Feb 2021 10:32:30 +0000 Subject: [PATCH 10/34] aANM convergence docs fix --- prody/hybrid/adaptive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prody/hybrid/adaptive.py b/prody/hybrid/adaptive.py index cfae88223..1c96e356d 100644 --- a/prody/hybrid/adaptive.py +++ b/prody/hybrid/adaptive.py @@ -219,7 +219,8 @@ def checkConvergence(rmsds, coords, **kwargs): Convergence is reached if one of three conditions is met: 1. Difference between *rmsds* from previous step to current < *min_rmsd_diff* - 2. Current rmsd < *target_rmsd* for the last five runs + for the last five runs + 2. Current rmsd < *target_rmsd* 3. A node in *coords* gets disconnected from another by > *cutoff* """ min_rmsd_diff = kwargs.get('min_rmsd_diff', 0.05) @@ -231,7 +232,7 @@ def checkConvergence(rmsds, coords, **kwargs): if np.all(drmsd[-4:] < min_rmsd_diff): LOGGER.warn( - 'The RMSD decrease fell below {0}'.format(min_rmsd_diff)) + 'The RMSD decrease stayed below {0} for five cycles'.format(min_rmsd_diff)) return True if rmsds[-1] < target_rmsd: From 7ca495d50d56c4784adf1687723b4fab1305c41f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 18 Feb 2021 12:25:28 +0000 Subject: [PATCH 11/34] moved hybrid methods back to dynamics --- prody/__init__.py | 5 ----- prody/dynamics/__init__.py | 17 ++++++++++++++++ prody/{hybrid => dynamics}/adaptive.py | 6 +++--- prody/{hybrid => dynamics}/clustenm.py | 14 ++++++------- prody/{hybrid => dynamics}/hybrid.py | 6 +++--- prody/hybrid/__init__.py | 28 -------------------------- 6 files changed, 30 insertions(+), 46 deletions(-) rename prody/{hybrid => dynamics}/adaptive.py (99%) rename prody/{hybrid => dynamics}/clustenm.py (99%) rename prody/{hybrid => dynamics}/hybrid.py (99%) delete mode 100644 prody/hybrid/__init__.py diff --git a/prody/__init__.py b/prody/__init__.py index cf6ff2b10..ba1a38861 100644 --- a/prody/__init__.py +++ b/prody/__init__.py @@ -115,11 +115,6 @@ def turnonDepracationWarnings(action='always'): __all__.extend(dynamics.__all__) __all__.append('dynamics') -from . import hybrid -from .hybrid import * -__all__.extend(hybrid.__all__) -__all__.append('hybrid') - from . import ensemble from .ensemble import * __all__.extend(ensemble.__all__) diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py index 77fe83969..c28d1eb38 100644 --- a/prody/dynamics/__init__.py +++ b/prody/dynamics/__init__.py @@ -339,8 +339,25 @@ from .signature import * __all__.extend(signature.__all__) +from . import adaptive +from .adaptive import * +__all__.extend(adaptive.__all__) + +from . import hybrid +from .hybrid import * +__all__.extend(hybrid.__all__) + +from . import clustenm +from .clustenm import * +__all__.extend(clustenm.__all__) + from . import essa from .essa import * __all__.extend(essa.__all__) +# workaround for circular dependency to accommodate original design style +from prody.ensemble import functions +functions.Hybrid = Hybrid +functions.ClustENM = ClustENM +functions.AdaptiveHybrid = AdaptiveHybrid diff --git a/prody/hybrid/adaptive.py b/prody/dynamics/adaptive.py similarity index 99% rename from prody/hybrid/adaptive.py rename to prody/dynamics/adaptive.py index 1c96e356d..327f537a3 100644 --- a/prody/hybrid/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -26,9 +26,9 @@ from prody.ensemble.ensemble import Ensemble from prody.proteins.pdbfile import writePDBStream, parsePDBStream -from prody.dynamics.functions import calcENM -from prody.dynamics.modeset import ModeSet -from prody.dynamics.nma import NMA +from .functions import calcENM +from .modeset import ModeSet +from .nma import NMA from .hybrid import Hybrid diff --git a/prody/hybrid/clustenm.py b/prody/dynamics/clustenm.py similarity index 99% rename from prody/hybrid/clustenm.py rename to prody/dynamics/clustenm.py index 02f5b472b..cee7f9d60 100644 --- a/prody/hybrid/clustenm.py +++ b/prody/dynamics/clustenm.py @@ -36,13 +36,13 @@ from scipy.cluster.hierarchy import fcluster, linkage from prody import LOGGER -from prody.dynamics.anm import ANM -from prody.dynamics.gnm import GNM, ZERO -from prody.dynamics.rtb import RTB -from prody.dynamics.imanm import imANM -from prody.dynamics.exanm import exANM -from prody.dynamics.editing import extendModel -from prody.dynamics.sampling import sampleModes +from .anm import ANM +from .gnm import GNM, ZERO +from .rtb import RTB +from .imanm import imANM +from .exanm import exANM +from .editing import extendModel +from .sampling import sampleModes from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD from prody.proteins.pdbfile import writePDB, writePDBStream, parsePDBStream diff --git a/prody/hybrid/hybrid.py b/prody/dynamics/hybrid.py similarity index 99% rename from prody/hybrid/hybrid.py rename to prody/dynamics/hybrid.py index b75163a40..e31010fb3 100644 --- a/prody/hybrid/hybrid.py +++ b/prody/dynamics/hybrid.py @@ -37,9 +37,9 @@ from prody import LOGGER -from prody.dynamics.anm import ANM -from prody.dynamics.gnm import GNM, ZERO -from prody.dynamics.editing import extendModel +from .anm import ANM +from .gnm import GNM, ZERO +from .editing import extendModel from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD from prody.ensemble.ensemble import Ensemble diff --git a/prody/hybrid/__init__.py b/prody/hybrid/__init__.py deleted file mode 100644 index bd642413d..000000000 --- a/prody/hybrid/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -"""This module defines classes and functions for hybrid simulations. - -""" - -__all__ = [] - -from . import hybrid -from .hybrid import * -__all__.extend(hybrid.__all__) - -from . import clustenm -from .clustenm import * -__all__.extend(clustenm.__all__) - -from . import adaptive -from .adaptive import * -__all__.extend(adaptive.__all__) - -from . import comd -from .comd import * -__all__.extend(comd.__all__) - -# workaround for circular dependency to accommodate original design style -from prody.ensemble import functions -functions.Hybrid = Hybrid -functions.ClustENM = ClustENM -functions.AdaptiveHybrid = AdaptiveHybrid From 901e607b7860f463ed2bf1bb70d06df6c120d709 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 19 Feb 2021 15:16:34 +0000 Subject: [PATCH 12/34] clustenm docs fix --- prody/dynamics/clustenm.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/prody/dynamics/clustenm.py b/prody/dynamics/clustenm.py index cee7f9d60..0f907e590 100644 --- a/prody/dynamics/clustenm.py +++ b/prody/dynamics/clustenm.py @@ -893,17 +893,17 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :type n_gens: int :arg maxclust: Maximum number of clusters for each generation, default in None. - A tuple of int's can be given, e.g. (10, 30, 50) for subsequent generations. + A tuple of ints can be given, e.g. (10, 30, 50) for subsequent generations. Warning: Either maxclust or RMSD threshold should be given! For large number of generations and/or structures, specifying maxclust is more efficient. - :type maxclust: int or a tuple of int's + :type maxclust: int, tuple :arg threshold: RMSD threshold to apply when forming clusters, default is None. This parameter has been used in ClustENMv1, setting it to 75% of the maximum RMSD value used for sampling. A tuple of floats can be given, e.g. (1.5, 2.0, 2.5) for subsequent generations. Warning: This threshold should be chosen carefully in ClustENMv2 for efficiency. - :type threshold: float or tuple of floats. + :type threshold: float, tuple :arg solvent: Solvent model to be used. If it is set to 'imp' (default), implicit solvent model will be used, whereas 'exp' stands for explicit solvent model. @@ -921,7 +921,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :arg force_field: Implicit solvent force field is ('amber99sbildn.xml', 'amber99_obc.xml'). Explicit solvent force field is ('amber14-all.xml', 'amber14/tip3pfb.xml'). Experimental feature: Forcefields already implemented in OpenMM can be used. - :type force_field: a tuple of str + :type force_field: tuple :arg tolerance: Energy tolerance to which the system should be minimized, default is 10.0 kJ/mole. :type tolerance: float @@ -941,12 +941,12 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :arg t_steps_i: Duration of MD simulation (number of time steps) for the starting structure following the heating-up phase, default is 1000. Each time step is 2.0 fs. Note: Default value reduces possible drift from the starting structure. - :type t_steps_i : int + :type t_steps_i: int :arg t_steps_g: Duration of MD simulations (number of time steps) to run for each conformer following the heating-up phase, default is 7500. Each time step is 2.0 fs. - A tuple of int's can be given, e.g. (3000, 5000, 7000) for subsequent generations. - :type t_steps_g: int or tuple of int's + A tuple of ints can be given, e.g. (3000, 5000, 7000) for subsequent generations. + :type t_steps_g: int, tuple :arg outlier: Exclusion of conformers detected as outliers in each generation. Default is True for implicit solvent. Outliers, if any, are detected by From 9ae1cdfdb474e91ea4cd862ac28bb6d15f744a8b Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 22 Feb 2021 18:25:19 +0000 Subject: [PATCH 13/34] AdaptiveHybrid_docs_fix --- prody/dynamics/adaptive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 327f537a3..5d725c8eb 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -490,7 +490,7 @@ class AdaptiveHybrid(Hybrid): that is an ANM-based hybrid algorithm. It requires PDBFixer and OpenMM for performing energy minimization and MD simulations in implicit/explicit solvent. - Instantiate a ClustENM object. + Instantiate a ClustENM-like hybrid method object for AdaptiveANM. ''' def __init__(self, title, **kwargs): super().__init__(title=title) From b386c991a2dab4a1f34717e9250095b41650ba0b Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 22 Feb 2021 18:29:59 +0000 Subject: [PATCH 14/34] AdaptiveHybrid warning fix --- prody/dynamics/adaptive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 5d725c8eb..30594a779 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -648,7 +648,7 @@ def _fix(self, atoms, i): from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile except ImportError: - raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM and related hybrid methods.') stream = createStringIO() title = atoms.getTitle() From 593b003dc2c02b846222dc2e651cd3e3c14b98c7 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 22 Feb 2021 18:28:50 +0000 Subject: [PATCH 15/34] remove changes to clustenm --- prody/dynamics/clustenm.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/prody/dynamics/clustenm.py b/prody/dynamics/clustenm.py index 0f907e590..5fe1a18ac 100644 --- a/prody/dynamics/clustenm.py +++ b/prody/dynamics/clustenm.py @@ -43,20 +43,18 @@ from .exanm import exANM from .editing import extendModel from .sampling import sampleModes - -from prody.measure.transform import calcTransformation, applyTransformation, calcRMSD -from prody.proteins.pdbfile import writePDB, writePDBStream, parsePDBStream - +from prody.atomic import AtomGroup +from prody.measure import calcTransformation, applyTransformation, calcRMSD +from prody.ensemble import Ensemble +from prody.proteins import writePDB, parsePDB, writePDBStream, parsePDBStream from prody.utilities import createStringIO, importLA, mad -from .hybrid import Hybrid - la = importLA() norm = la.norm __all__ = ['ClustENM', 'ClustRTB', 'ClustImANM', 'ClustExANM'] -class ClustENM(Hybrid): +class ClustENM(Ensemble): ''' ClustENMv2 is the new version of ClustENM(v1) conformation sampling algorithm [KZ16]_. @@ -125,9 +123,11 @@ def __getitem__(self, index): return super(ClustENM, self).__getitem__(index) - def getAtoms(self, selected=True): + def getAtoms(self): + 'Returns atoms.' - return super(ClustENM, self).getAtoms(selected) + + return self._atoms def _isBuilt(self): @@ -893,17 +893,17 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :type n_gens: int :arg maxclust: Maximum number of clusters for each generation, default in None. - A tuple of ints can be given, e.g. (10, 30, 50) for subsequent generations. + A tuple of int's can be given, e.g. (10, 30, 50) for subsequent generations. Warning: Either maxclust or RMSD threshold should be given! For large number of generations and/or structures, specifying maxclust is more efficient. - :type maxclust: int, tuple + :type maxclust: int or a tuple of int's :arg threshold: RMSD threshold to apply when forming clusters, default is None. This parameter has been used in ClustENMv1, setting it to 75% of the maximum RMSD value used for sampling. A tuple of floats can be given, e.g. (1.5, 2.0, 2.5) for subsequent generations. Warning: This threshold should be chosen carefully in ClustENMv2 for efficiency. - :type threshold: float, tuple + :type threshold: float or tuple of floats. :arg solvent: Solvent model to be used. If it is set to 'imp' (default), implicit solvent model will be used, whereas 'exp' stands for explicit solvent model. @@ -921,7 +921,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :arg force_field: Implicit solvent force field is ('amber99sbildn.xml', 'amber99_obc.xml'). Explicit solvent force field is ('amber14-all.xml', 'amber14/tip3pfb.xml'). Experimental feature: Forcefields already implemented in OpenMM can be used. - :type force_field: tuple + :type force_field: a tuple of str :arg tolerance: Energy tolerance to which the system should be minimized, default is 10.0 kJ/mole. :type tolerance: float @@ -941,12 +941,12 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, :arg t_steps_i: Duration of MD simulation (number of time steps) for the starting structure following the heating-up phase, default is 1000. Each time step is 2.0 fs. Note: Default value reduces possible drift from the starting structure. - :type t_steps_i: int + :type t_steps_i : int :arg t_steps_g: Duration of MD simulations (number of time steps) to run for each conformer following the heating-up phase, default is 7500. Each time step is 2.0 fs. - A tuple of ints can be given, e.g. (3000, 5000, 7000) for subsequent generations. - :type t_steps_g: int, tuple + A tuple of int's can be given, e.g. (3000, 5000, 7000) for subsequent generations. + :type t_steps_g: int or tuple of int's :arg outlier: Exclusion of conformers detected as outliers in each generation. Default is True for implicit solvent. Outliers, if any, are detected by From f5304e719fb5407b01a08c43b9eaf93dccf50070 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 24 Feb 2021 13:56:57 +0000 Subject: [PATCH 16/34] added comd --- prody/dynamics/__init__.py | 4 + prody/dynamics/comd.py | 254 +++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 prody/dynamics/comd.py diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py index c28d1eb38..5058bf458 100644 --- a/prody/dynamics/__init__.py +++ b/prody/dynamics/__init__.py @@ -347,6 +347,10 @@ from .hybrid import * __all__.extend(hybrid.__all__) +from . import comd +from .comd import * +__all__.extend(comd.__all__) + from . import clustenm from .clustenm import * __all__.extend(clustenm.__all__) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py new file mode 100644 index 000000000..0bbe35f5e --- /dev/null +++ b/prody/dynamics/comd.py @@ -0,0 +1,254 @@ +from prody.proteins.pdbfile import parsePDB +from prody.dynamics.anm import ANM + +from prody.measure.transform import calcRMSD +from prody.measure.measure import buildDistMatrix +from prody.ensemble.ensemble import Ensemble + +from numpy import * +from random import random +import os.path +import sys + +__all__ = ['calcANMMC'] + +def calcANMMC(initial, final, **kwargs): + """Perform ANM-MC calculations from CoMD. + """ + devi = kwargs.get('devi', 0.5) + stepcutoff = kwargs.get('stepcutoff', 2.) + acceptance_ratio = kwargs.get('acceptance_ratio', 0.9) + + cutoff = kwargs.get('cutoff', 15.) + anm_cut = kwargs.get('anm_cut', cutoff) + + N = kwargs.get('N', 10000) + usePseudoatoms = kwargs.get('usePseudoatoms', False) + + initial_pdb_id = kwargs.get('initial_pdb_id', initial.getTitle()) + original_initial_pdb = kwargs.get('original_initial_pdb', initial.getTitle()) + original_final_pdb = kwargs.get('original_final_pdb', final.getTitle()) + + if usePseudoatoms: + initial_ca = initial + final_ca = final + else: + initial_ca = initial.select('name CA or name BB') + final_ca = final.select('name CA or name BB') + + # ANM calculation based on current + pdb_anm = ANM('pdb ca') + pdb_anm.buildHessian(initial_ca, cutoff=anm_cut) + pdb_anm.calcModes() + + # Cumulative sum vector preparation for metropolis sampling + eigs = 1/sqrt(pdb_anm.getEigvals()) + eigs_n = zeros(eigs.shape) + eigs_n = eigs / sum(eigs) + eigscumsum = eigs_n.cumsum() + U = pdb_anm.getEigvecs() + + # Take a step along mode 1 (ID 0) to calculate the scale factor + pdb_ca = initial_ca + pdb_ca_temp = pdb_ca.copy() + ID = 0 + direction = 1. + coords_temp = pdb_ca_temp.getCoords() + coords_temp[0:,0] = coords_temp[0:,0] + direction * U[range(0,len(U),3),ID] * eigs[ID] + coords_temp[0:,1] = coords_temp[0:,1] + direction * U[range(1,len(U),3),ID] * eigs[ID] + coords_temp[0:,2] = coords_temp[0:,2] + direction * U[range(2,len(U),3),ID] * eigs[ID] + pdb_ca_temp.setCoords(coords_temp) + pdb_ca = pdb_ca_temp.copy() + biggest_rmsd = calcRMSD(pdb_ca.getCoords(), initial_ca.getCoords()) + scale_factor = devi/biggest_rmsd # This means that devi is the maximum deviation in RMSD for any step + + # counts for metropolis sampling + count1 = 0 # Up-hill moves + count2 = 0 # Accepted up-hill moves + count3 = 0 # Down-hill moves + + # read MC parameter from file + if os.path.isfile(initial_pdb_id + '_ratio.dat') and os.stat(initial_pdb_id + '_ratio.dat').st_size != 0: + MCpara = loadtxt(initial_pdb_id + '_ratio.dat') + accept_para = MCpara[4] + if MCpara[1] > acceptance_ratio + 0.05: + accept_para *= 1.5 + elif MCpara[1] < acceptance_ratio - 0.05: + accept_para /= 1.5 + else: + savetxt(initial_pdb_id + '_status.dat',[1]) + else: + accept_para = 0.1 + + # MC parameter 1 is the acceptance ratio, which should converge on + # the selected value with a tolerance of 0.05 either side + # and accept_para is adjusted to help bring it within these limits. + # This also happens every 5 steps during the run (lines 173 to 181). + + if original_initial_pdb != original_final_pdb: + # difference from the target structure is defined as the energy and the minimum is zero. + + native_dist = buildDistMatrix(final_ca) + dist = buildDistMatrix(initial_ca) + Ep = sum((native_dist - dist)**2) + + # Reset pdb_ca (the current structure whole the steps back to the original) + pdb_ca = initial_ca + + step_count = 0 + check_step_counts = [0] + + sys.stdout.write(' '*2 + 'rmsd' + ' '*2 + 'rand' + ' '*2 + 'ID' + ' '*3 + 'step' \ + + ' '*2 + 'accept_para' + ' '*5 + 'f' + '\n') + + # MC Loop + for k in range(N): + pdb_ca_temp = pdb_ca.copy() + rand = random() + ID = argmax(rand0.5)-1 + + coords_temp = pdb_ca_temp.getCoords() + coords_temp[0:,0] = coords_temp[0:,0] + direction * U[range(0,len(U),3),ID] * eigs[ID] * scale_factor + coords_temp[0:,1] = coords_temp[0:,1] + direction * U[range(1,len(U),3),ID] * eigs[ID] * scale_factor + coords_temp[0:,2] = coords_temp[0:,2] + direction * U[range(2,len(U),3),ID] * eigs[ID] * scale_factor + pdb_ca_temp.setCoords(coords_temp) + + if original_initial_pdb != original_final_pdb: + dist = buildDistMatrix(pdb_ca_temp) + En = sum((native_dist - dist)**2) + + # Check whether you are heading the right way and accept uphill moves + # depending on the Metropolis criterion. Classically this depends on RT + # but this is subsumed by the unknown units from having a uniform + # spring constant that is set to 1. + if Ep > En: + count3 += 1 + pdb_ca = pdb_ca_temp.copy() + Ep = En + accepted = 1 + + elif exp(-(En-Ep) * accept_para) > random(): + pdb_ca = pdb_ca_temp.copy() + count1 += 1 + count2 += 1 + Ep = En + accepted = 1 + + else: + count1 += 1 + accepted = 0 + + if count1 == 0: + f = 1. + else: + f = float(count2)/float(count1) + + if (mod(k,5)==0 and not(k==0)): + # Update of the accept_para to keep the MC para reasonable + # See comment lines 82 to 85. + if f > acceptance_ratio + 0.05: + accept_para /= 1.5 + elif f < acceptance_ratio - 0.05: + accept_para *= 1.5 + + if accept_para < 0.001: accept_para = 0.001 + + else: + # for exploration based on one structure + # all moves are uphill but will be accepted anyway + pdb_ca = pdb_ca_temp.copy() + count3 += 1 + accepted = 1 + f = 1. + + rmsd = calcRMSD(pdb_ca.getCoords(), initial_ca.getCoords()) + sys.stdout.write('{:6.2f}'.format(rmsd) + ' ' + '{:5.2f}'.format(rand) + \ + '{:4d}'.format(ID) + '{:7d}'.format(k) + ' '*2 + str(accepted) + ' '*2 + \ + '{:5.4f}'.format(accept_para) + ' '*2 + '{:5.4f}'.format(f) + '\n') + + if rmsd > stepcutoff: + break + + # Build an ensemble for writing the final structure to a dcd file + ensemble_final = Ensemble() + ensemble_final.setAtoms(initial_ca) + ensemble_final.setCoords(initial_ca) + ensemble_final.addCoordset(pdb_ca.getCoords()) + + return ensemble_final, count1, count2, count3, k, accept_para + +if __name__ == '__main__': + + from prody import * + from numpy import * + import time + + time.sleep(10) + ar = [] + for arg in sys.argv: + ar.append(arg) + + initial_pdbn=ar[1] + final_pdbn=ar[2] + initial_pdb_id = initial_pdbn[:initial_pdbn.rfind('.')] + final_pdb_id = final_pdbn[:final_pdbn.rfind('.')] + + original_initial_pdb = ar[3] + original_final_pdb = ar[4] + + comd_cycle_number = ar[5] + + if len(ar) > 6 and ar[6].strip() != '0': + devi = float(ar[6]) + else: + devi = 0.5 + + if len(ar) > 7 and ar[7].strip() != '0': + stepcutoff=float(ar[7]) + else: + stepcutoff=2. + + if len(ar) > 8 and ar[8].strip() != '0': + acceptance_ratio = float(ar[8]) + else: + acceptance_ratio = 0.9 + + if len(ar) > 9 and ar[9].strip() != '0': + anm_cut=float(ar[9]) + else: + anm_cut=15 + + if len(ar) > 10 and ar[10].strip() != '0': + N=int(ar[10]) + else: + N=10000 + + if len(ar) > 11 and ar[11].strip() != '0': + final_structure_dcd_name = ar[11] + else: + final_structure_dcd_name = 'cycle_{0}_'.format(int(comd_cycle_number)) + \ + initial_pdb_id + '_' + final_pdb_id + '_final_structure.dcd' + + if len(ar) > 12 and ar[12].strip() != '0': + usePseudoatoms = int(ar[12]) + else: + usePseudoatoms = 0 + + initial_pdb = parsePDB(initial_pdbn) + final_pdb = parsePDB(final_pdbn) + + ensemble_final, count1, count2, count3, k, accept_para = calcANMMC(initial_pdb, final_pdb, + initial_pdb_id=initial_pdb_id, + original_initial_pdb=original_initial_pdb, + original_final_pdb=original_final_pdb, + comd_cycle_number=comd_cycle_number, + devi=devi, stepcutoff=stepcutoff, + acceptance_ratio=acceptance_ratio, + anm_cut=anm_cut, N=N, + usePseudoatoms=usePseudoatoms) + writeDCD(final_structure_dcd_name, ensemble_final) + + ratios = [count2/N, count2/count1 if count1 != 0 else 0, count2, k, accept_para ] + savetxt(initial_pdb_id + '_ratio.dat', ratios, fmt='%.2e') + From e37de5ea0b00e97cc8e1932874949af13b22b499 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 24 Feb 2021 14:17:03 +0000 Subject: [PATCH 17/34] comd checks input arguments --- prody/dynamics/comd.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 0bbe35f5e..cb16a0e97 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -189,15 +189,33 @@ def calcANMMC(initial, final, **kwargs): for arg in sys.argv: ar.append(arg) - initial_pdbn=ar[1] - final_pdbn=ar[2] + if len(ar) > 1: + initial_pdbn=ar[1] + else: + raise ValueError('Please provide at least 1 argument (a PDB filename)') + + if len(ar) > 2: + final_pdbn=ar[2] + else: + final_pdbn = initial_pdbn + initial_pdb_id = initial_pdbn[:initial_pdbn.rfind('.')] final_pdb_id = final_pdbn[:final_pdbn.rfind('.')] - original_initial_pdb = ar[3] - original_final_pdb = ar[4] + if len(ar) > 3 and ar[3].strip() != '0': + original_initial_pdb = ar[3] + else: + original_initial_pdb = initial_pdb_id + + if len(ar) > 4 and ar[4].strip() != '0': + original_final_pdb = ar[4] + else: + original_final_pdb = final_pdb_id - comd_cycle_number = ar[5] + if len(ar) > 5 and ar[5].strip() != '0': + comd_cycle_number = ar[5] + else: + comd_cycle_number = 1 if len(ar) > 6 and ar[6].strip() != '0': devi = float(ar[6]) From 43398ed670fdcf58ad8d69d7d4792852bd481cc2 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 24 Feb 2021 18:41:11 +0000 Subject: [PATCH 18/34] CoMD as hybrid method --- prody/dynamics/comd.py | 739 +++++++++++++++++++++++++++++++++++++-- prody/dynamics/hybrid.py | 11 +- 2 files changed, 714 insertions(+), 36 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index cb16a0e97..dd751f62d 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -1,33 +1,46 @@ -from prody.proteins.pdbfile import parsePDB -from prody.dynamics.anm import ANM - -from prody.measure.transform import calcRMSD -from prody.measure.measure import buildDistMatrix +from prody.proteins.pdbfile import parsePDB, writePDBStream, parsePDBStream +from prody.measure.transform import calcRMSD, calcTransformation, superpose, applyTransformation +from prody.measure.measure import buildDistMatrix, calcDeformVector, calcDistance from prody.ensemble.ensemble import Ensemble +from prody.utilities import createStringIO, importLA, checkCoords, copy + +norm = importLA().norm + +from prody import LOGGER from numpy import * +import numpy as np +from multiprocessing import cpu_count, Pool from random import random import os.path import sys -__all__ = ['calcANMMC'] +from .adaptive import ONEWAY, ALTERNATING, SERIAL, DEFAULT +from .hybrid import Hybrid +from .nma import NMA +from .gnm import GNM, ZERO +from .anm import ANM +from .sampling import traverseMode + +__all__ = ['calcANMMC', 'CoMD'] def calcANMMC(initial, final, **kwargs): """Perform ANM-MC calculations from CoMD. """ - devi = kwargs.get('devi', 0.5) - stepcutoff = kwargs.get('stepcutoff', 2.) - acceptance_ratio = kwargs.get('acceptance_ratio', 0.9) + devi = kwargs.pop('devi', 0.5) + stepcutoff = kwargs.pop('stepcutoff', 2.) + acceptance_ratio = kwargs.pop('acceptance_ratio', 0.9) - cutoff = kwargs.get('cutoff', 15.) - anm_cut = kwargs.get('anm_cut', cutoff) + cutoff = kwargs.pop('cutoff', 15.) + anm_cut = kwargs.pop('anm_cut', cutoff) - N = kwargs.get('N', 10000) - usePseudoatoms = kwargs.get('usePseudoatoms', False) + N = kwargs.pop('N', 10000) + usePseudoatoms = kwargs.pop('usePseudoatoms', False) - initial_pdb_id = kwargs.get('initial_pdb_id', initial.getTitle()) - original_initial_pdb = kwargs.get('original_initial_pdb', initial.getTitle()) - original_final_pdb = kwargs.get('original_final_pdb', final.getTitle()) + initial_pdb_id = kwargs.pop('initial_pdb_id', initial.getTitle()) + original_initial_pdb = kwargs.pop('original_initial_pdb', initial_pdb_id) + + original_final_pdb = kwargs.pop('original_final_pdb', final.getTitle()) if usePseudoatoms: initial_ca = initial @@ -38,8 +51,8 @@ def calcANMMC(initial, final, **kwargs): # ANM calculation based on current pdb_anm = ANM('pdb ca') - pdb_anm.buildHessian(initial_ca, cutoff=anm_cut) - pdb_anm.calcModes() + pdb_anm.buildHessian(initial_ca, cutoff=anm_cut, **kwargs) + pdb_anm.calcModes(**kwargs) # Cumulative sum vector preparation for metropolis sampling eigs = 1/sqrt(pdb_anm.getEigvals()) @@ -68,7 +81,8 @@ def calcANMMC(initial, final, **kwargs): count3 = 0 # Down-hill moves # read MC parameter from file - if os.path.isfile(initial_pdb_id + '_ratio.dat') and os.stat(initial_pdb_id + '_ratio.dat').st_size != 0: + if (os.path.isfile(initial_pdb_id + '_ratio.dat') and + os.stat(initial_pdb_id + '_ratio.dat').st_size != 0): MCpara = loadtxt(initial_pdb_id + '_ratio.dat') accept_para = MCpara[4] if MCpara[1] > acceptance_ratio + 0.05: @@ -83,7 +97,7 @@ def calcANMMC(initial, final, **kwargs): # MC parameter 1 is the acceptance ratio, which should converge on # the selected value with a tolerance of 0.05 either side # and accept_para is adjusted to help bring it within these limits. - # This also happens every 5 steps during the run (lines 173 to 181). + # This also happens every 5 steps during the run (lines 150 to 156). if original_initial_pdb != original_final_pdb: # difference from the target structure is defined as the energy and the minimum is zero. @@ -146,7 +160,7 @@ def calcANMMC(initial, final, **kwargs): if (mod(k,5)==0 and not(k==0)): # Update of the accept_para to keep the MC para reasonable - # See comment lines 82 to 85. + # See comment lines 86 to 89. if f > acceptance_ratio + 0.05: accept_para /= 1.5 elif f < acceptance_ratio - 0.05: @@ -176,7 +190,670 @@ def calcANMMC(initial, final, **kwargs): ensemble_final.setCoords(initial_ca) ensemble_final.addCoordset(pdb_ca.getCoords()) - return ensemble_final, count1, count2, count3, k, accept_para + return ensemble_final, count1, count2, count3, k, accept_para, rmsd + + +class CoMD(Hybrid): + ''' + This is a new version of the Collective Molecular Dynamics (CoMD) sampling algorithm [MG13]_. + + It requires PDBFixer and OpenMM for performing energy minimization and MD simulations + in implicit/explicit solvent. + + Instantiate a ClustENM-like hybrid method object for CoMD. + + .. [MG13] Mert Gur, Jeffry D Madura, Ivet Bahar. Global transitions of proteins + explored by a multiscale hybrid methodology: application to adenylate kinase. + *Biophys J* **2013** 105:1643-1652. + ''' + def __init__(self, title, **kwargs): + super().__init__(title=title) + self._atomsB = None + self._defvecs = [] + self._rmsds = [] + self._traj_rmsds = [] + self._cg_ensA = Ensemble(title=title) + self._cg_ensB = Ensemble(title=title) + self._targeted = True + + self._tmdk = kwargs.get('tmdk', 2000) + # 20,000 (like paper) works for forces on CA but leads to unrealistic deformations + # 200 (like NAMD website) is too weak for such small conformational changes + + def _sample(self, **kwargs): + + conf, conf2 = self._conformers[-2], self._conformers[-1] + + tmp = self._atoms.copy() + tmpB = self._atomsB.copy() + + if self._direction == 1: + tmp.setCoords(conf) + tmpB.setCoords(conf2) + else: + tmpB.setCoords(conf) + tmp.setCoords(conf2) + + cg = tmp[self._idx_cg] + cgB = tmpB[self._idx_cg] + + anm_cg = self._buildANM(cg) + if not self._checkANM(anm_cg): + return None + + anm_cgB = self._buildANM(cgB) + if not self._checkANM(anm_cgB): + return None + + self._direction_mode = kwargs.pop('mode', DEFAULT) + rmsd = self._rmsd[self._cycle] + + if self._direction_mode == ONEWAY: + LOGGER.info('\nStarting cycle with structure A') + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + stepcutoff=rmsd, + n_modes=self._n_modes, + **kwargs) + cg_ens = self._cg_ensA + + elif self._direction_mode == ALTERNATING: + if self._direction == 1: + LOGGER.info('\nStarting cycle with structure A') + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + stepcutoff=rmsd, + **kwargs) + cg_ens = self._cg_ensA + + else: + LOGGER.info('\nStarting cycle with structure B') + self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, + stepcutoff=rmsd, + **kwargs) + cg_ens = self._cg_ensB + + elif self._direction_mode == SERIAL: + if self._direction == 1: + LOGGER.info('\nStarting cycle with structure A') + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + stepcutoff=rmsd, + **kwargs) + cg_ens = self._cg_ensA + + else: + LOGGER.info('\nStarting cycle with structure B') + self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, + stepcutoff=rmsd, + **kwargs) + cg_ens = self._cg_ensB + + else: + raise ValueError('unknown aANM mode: %d' % self._direction_mode) + + if self._direction == 1: + defvec = calcDeformVector(cg, cg_ens.getCoordsets()[-1]) + model = NMA() + model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) + model_ex = self._extendModel(model, cg, tmp) + def_ens = traverseMode(model_ex[0], tmp, 1, rmsd) + coordsets = [def_ens.getCoordsets()[-1]] + + if self._direction_mode == ALTERNATING: + self._direction = 2 + else: + defvec = calcDeformVector(cgB, cg_ens.getCoordsets()[-1]) + model = NMA() + model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) + model_ex = self._extendModel(model, cgB, tmpB) + def_ens = traverseMode(model_ex[0], tmpB, 1, rmsd) + coordsets = [def_ens.getCoordsets()[-1]] + + if self._direction_mode == ALTERNATING: + self._direction = 1 + + if self._targeted: + if self._parallel: + with Pool(cpu_count()) as p: + pot_conf = p.map(self._multi_targeted_sim, + [(conf, coords) for coords in coordsets]) + else: + pot_conf = [self._multi_targeted_sim((conf, coords)) for coords in coordsets] + + pots, poses = list(zip(*pot_conf)) + + idx = np.logical_not(np.isnan(pots)) + coordsets = np.array(poses)[idx] + + LOGGER.debug('%d/%d sets of coordinates were moved to the target' % (len(coordsets), len(poses))) + + return coordsets + + + def _multi_targeted_sim(self, args): + + conf = args[0] + coords = args[1] + + return self._targeted_sim(conf, coords, tmdk=self._tmdk) + + + def _targeted_sim(self, coords0, coords1, tmdk=15., + d_steps=100, n_max_steps=10000, ddtol=1e-3, n_conv=5): + + try: + from simtk.openmm import CustomExternalForce + from simtk.openmm.app import StateDataReporter + from simtk.unit import nanometer, angstrom, kilocalorie_per_mole, kilojoule_per_mole + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use Hybrid.') + + tmdk *= kilocalorie_per_mole/angstrom**2 + n_atoms = coords0.shape[0] + tmdk /= n_atoms + + pos1 = coords1 * angstrom + + force = CustomExternalForce("tmdk*periodicdistance(x, y, z, x0, y0, z0)^2") + force.addGlobalParameter('tmdk', 0) + force.addPerParticleParameter('x0') + force.addPerParticleParameter('y0') + force.addPerParticleParameter('z0') + force.setForceGroup(1) + + for i, atm_idx in enumerate(np.arange(n_atoms)): + pars = pos1[i, :].value_in_unit(nanometer) + force.addParticle(int(atm_idx), pars) + + simulation = self._prep_sim(coords0, external_forces=[force]) + + # automatic conversion into nanometer will be carried out. + simulation.context.setPositions(coords0 * angstrom) + + dist = dist0 = calcRMSD(coords0, coords1) + m_conv = 0 + n_steps = 0 + try: + simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole, + maxIterations=self._maxIterations) + + # update parameters + while n_steps < n_max_steps: + simulation.context.setParameter('tmdk', tmdk) + force.updateParametersInContext(simulation.context) + + simulation.step(d_steps) + n_steps += d_steps + + # evaluate distance to destination + pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom) + d = calcRMSD(pos, coords1) + dd = np.abs(dist - d) + + dist = d + + if dd < ddtol: + m_conv += 1 + + if m_conv >= n_conv: + break + + LOGGER.debug('RMSD: %4.2f -> %4.2f' % (dist0, dist)) + + simulation.context.setParameter('tmdk', 0.0) + simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole, + maxIterations=self._maxIterations) + + pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom) + pot = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilojoule_per_mole) + + return pot, pos + + except BaseException as be: + LOGGER.warning('OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!') + + return np.nan, np.full_like(coords0, np.nan) + + def setAtoms(self, atomsA, atomsB=None, pH=7.0, **kwargs): + aligned = kwargs.get('aligned', False) + if not aligned and atomsB is not None: + T = calcTransformation(atomsA.ca, atomsB.ca, weights=atomsA.ca.getFlags("mapped")) + _ = applyTransformation(T, atomsA) + + if self._isBuilt(): + super(Hybrid, self).setAtoms(atomsA) + self._atomsB = atomsB + else: + A_B_dict = {0: 'A', 1: 'B'} + for i, atoms in enumerate([atomsA, atomsB]): + if i == 1 and atomsB is None: + break + + atoms = atoms.select('not hetatm') + + self._nuc = atoms.select('nucleotide') + + if self._nuc is not None: + + idx_p = [] + for c in self._nuc.getChids(): + tmp = self._nuc[c].iterAtoms() + for a in tmp: + if a.getName() in ['P', 'OP1', 'OP2', 'OP3']: + idx_p.append(a.getIndex()) + + if idx_p: + nsel = 'not index ' + ' '.join([str(i) for i in idx_p]) + atoms = atoms.select(nsel) + + LOGGER.info('Fixing structure {0}...'.format(A_B_dict[i])) + LOGGER.timeit('_clustenm_fix') + self._ph = pH + self._fix(atoms, i) + LOGGER.report('The structure was fixed in %.2fs.', + label='_clustenm_fix') + + if self._nuc is None: + self._idx_cg = self._atoms.ca.getIndices() + self._n_cg = self._atoms.ca.numAtoms() + else: + self._idx_cg = self._atoms.select("name CA C2 C4' P").getIndices() + self._n_cg = self._atoms.select("name CA C2 C4' P").numAtoms() + + self._n_atoms = self._atoms.numAtoms() + self._indices = None + + if i == 1: + self._cg_ensA.setAtoms(self._atoms[self._idx_cg]) + else: + self._cg_ensB.setAtoms(self._atomsB[self._idx_cg]) + + def _fix(self, atoms, i): + try: + from pdbfixer import PDBFixer + from simtk.openmm.app import PDBFile + except ImportError: + raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM and related hybrid methods.') + + stream = createStringIO() + title = atoms.getTitle() + writePDBStream(stream, atoms) + stream.seek(0) + fixed = PDBFixer(pdbfile=stream) + stream.close() + + fixed.missingResidues = {} + fixed.findNonstandardResidues() + fixed.replaceNonstandardResidues() + fixed.removeHeterogens(False) + fixed.findMissingAtoms() + fixed.addMissingAtoms() + fixed.addMissingHydrogens(self._ph) + + stream = createStringIO() + PDBFile.writeFile(fixed.topology, fixed.positions, + stream, keepIds=True) + stream.seek(0) + if i == 0: + self._atoms = parsePDBStream(stream) + self._atoms.setTitle(title) + else: + self._atomsB = parsePDBStream(stream) + self._atomsB.setTitle(title) + stream.close() + + self._topology = fixed.topology + self._positions = fixed.positions + + def getAtomsA(self, selected=True): + 'Returns atoms for structure A (main atoms).' + return super(CoMD, self).getAtoms(selected) + + def getAtomsB(self, selected=True): + 'Returns atoms for structure B.' + if self._atomsB is None: + return None + if self._indices is None or not selected: + return self._atomsB + return self._atomsB[self._indices] + + def getRMSDsB(self): + if self._confs is None or self._coords is None: + return None + + indices = self._indices + if indices is None: + indices = np.arange(self._confs.shape[1]) + + weights = self._weights[indices] if self._weights is not None else None + + return calcRMSD(self._atomsB, self._confs[:, indices], weights) + + def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, + n_gens=5, solvent='imp', sim=False, force_field=None, temp=303.15, + t_steps_i=1000, t_steps_g=7500, + outlier=True, mzscore=3.5, **kwargs): + + ''' + Performs a ClustENM-like run. + + :arg cutoff: Cutoff distance (A) for pairwise interactions used in ANM + computations, default is 15.0 A. + :type cutoff: float + + :arg gamma: Spring constant of ANM, default is 1.0. + :type gamma: float + + :arg n_modes: Number of non-zero eigenvalues/vectors to calculate. + :type n_modes: int + + :arg n_confs: Number of new conformers to be generated based on any conformer + from the previous generation, default is 50. + :type n_confs: int + + :arg rmsd: Average RMSD of the new conformers with respect to the conformer + from which they are generated, default is 1.0 A. + A tuple of floats can be given, e.g. (1.0, 1.5, 1.5) for subsequent generations. + Note: In the case of ClustENMv1, this value is the maximum rmsd, not the average. + :type rmsd: float, tuple of floats + + :arg n_gens: Number of generations. + :type n_gens: int + + :arg solvent: Solvent model to be used. If it is set to 'imp' (default), + implicit solvent model will be used, whereas 'exp' stands for explicit solvent model. + Warning: In the case of nucleotide chains, explicit solvent model is automatically set. + :type solvent: str + + :arg padding: Padding distance to use for solvation. Default is 1.0 nm. + :type padding: float + + :arg ionicStrength: Total concentration of ions (both positive and negative) to add. + This does not include ions that are added to neutralize the system. + Default concentration is 0.0 molar. + :type ionicStrength: float + + :arg force_field: Implicit solvent force field is ('amber99sbildn.xml', 'amber99_obc.xml'). + Explicit solvent force field is ('amber14-all.xml', 'amber14/tip3pfb.xml'). + Experimental feature: Forcefields already implemented in OpenMM can be used. + :type force_field: a tuple of str + + :arg tolerance: Energy tolerance to which the system should be minimized, default is 10.0 kJ/mole. + :type tolerance: float + + :arg maxIterations: Maximum number of iterations to perform during energy minimization. + If this is 0 (default), minimization is continued until the results converge without + regard to how many iterations it takes. + :type maxIterations: int + + :arg sim: If it is True (default), a short MD simulation is performed after energy minimization. + Note: There is also a heating-up phase until the desired temperature is reached. + :type sim: bool + + :arg temp: Temperature at which the simulations are conducted, default is 303.15 K. + :type temp: float + + :arg t_steps_i: Duration of MD simulation (number of time steps) for the starting structure + following the heating-up phase, default is 1000. Each time step is 2.0 fs. + Note: Default value reduces possible drift from the starting structure. + :type t_steps_i : int + + :arg t_steps_g: Duration of MD simulations (number of time steps) to run for each conformer + following the heating-up phase, default is 7500. Each time step is 2.0 fs. + A tuple of int's can be given, e.g. (3000, 5000, 7000) for subsequent generations. + :type t_steps_g: int or tuple of int's + + :arg outlier: Exclusion of conformers detected as outliers in each generation. + Default is True for implicit solvent. Outliers, if any, are detected by + the modified z-scores of the conformers' potential energies over a generation. + Note: It is automatically set to False when explicit solvent model is being used. + :type outlier: bool + + :arg mzscore: Modified z-score threshold to label conformers as outliers. Default is 3.5. + :type mzscore: float + + :arg v1: Original sampling method with complete enumeration of desired ANM modes is used. + Default is False. Maximum number of modes should not exceed 5 for efficiency. + :type v1: bool + + :arg platform: Architecture on which the OpenMM part runs, default is None. + It can be chosen as 'CUDA', 'OpenCL' or 'CPU'. + For efficiency, 'CUDA' or 'OpenCL' is recommended. + :type platform: str + + :arg parallel: If it is True (default is False), conformer generation will be parallelized. + :type parallel: bool + ''' + + if self._isBuilt(): + raise ValueError('ClustENM ensemble has been built; please start a new instance') + + # set up parameters + self._cutoff = cutoff + self._n_modes0 = self._n_modes = n_modes + self._gamma = gamma + self._n_confs = n_confs + self._rmsd = (0.,) + rmsd if isinstance(rmsd, tuple) else (0.,) + (rmsd,) * n_gens + self._n_gens = n_gens + self._platform = kwargs.pop('platform', None) + self._parallel = kwargs.pop('parallel', False) + self._targeted = kwargs.pop('targeted', self._targeted) + self._tmdk = kwargs.pop('tmdk', self._tmdk) + + self._direction_mode = kwargs.get('mode', DEFAULT) + if self._direction_mode == ALTERNATING and self._n_gens % 2: + raise ValueError('ALTERNATING modes needs even n_gens') + + self._direction = 1 + + self._sol = solvent if self._nuc is None else 'exp' + self._padding = kwargs.pop('padding', 1.0) + self._ionicStrength = kwargs.pop('ionicStrength', 0.0) + if self._sol == 'imp': + self._force_field = ('amber99sbildn.xml', 'amber99_obc.xml') if force_field is None else force_field + if self._sol == 'exp': + self._force_field = ('amber14-all.xml', 'amber14/tip3pfb.xml') if force_field is None else force_field + self._tolerance = kwargs.pop('tolerance', 10.0) + self._maxIterations = kwargs.pop('maxIterations', 0) + self._sim = sim + self._temp = temp + + if self._sim: + if isinstance(t_steps_g, tuple): + self._t_steps = (t_steps_i,) + t_steps_g + else: + self._t_steps = (t_steps_i,) + (t_steps_g,) * n_gens + + self._outlier = False if self._sol == 'exp' else outlier + self._mzscore = mzscore + self._v1 = kwargs.pop('v1', False) + + self._cycle = 0 + + # check for discontinuity in the structure A + gnm = GNM() + gnm.buildKirchhoff(self._atoms[self._idx_cg], cutoff=self._cutoff) + K = gnm.getKirchhoff() + rank_diff = (len(K) - 1 + - np.linalg.matrix_rank(K, tol=ZERO, hermitian=True)) + if rank_diff != 0: + raise ValueError('atoms has disconnected parts; please check the structure') + + LOGGER.timeit('_hybrid_overall') + + LOGGER.info('Generation 0 for structure A ...') + + if self._sim: + if self._t_steps[0] != 0: + LOGGER.info('Minimization, heating-up & simulation in generation 0 for structure A ...') + else: + LOGGER.info('Minimization & heating-up in generation 0 for structure A ...') + else: + LOGGER.info('Minimization in generation 0 for structure A ...') + LOGGER.timeit('_clustenm_min') + potential, conformer = self._min_sim(self._atoms.getCoords()) + if np.isnan(potential): + raise ValueError('Initial structure A could not be minimized. Try again and/or check your structure.') + + LOGGER.report(label='_clustenm_min') + + LOGGER.info('#' + '-' * 19 + '/*\\' + '-' * 19 + '#') + + self.setCoords(conformer) + + potentials = [potential] + sizes = [1] + new_shape = [1] + for s in conformer.shape: + new_shape.append(s) + conf = conformer.reshape(new_shape) + self._conformers = start_confs = conf + keys = [(0, 0)] + + # check for discontinuity in the structure B + gnmB = GNM() + gnmB.buildKirchhoff(self._atomsB[self._idx_cg], cutoff=self._cutoff) + KB = gnmB.getKirchhoff() + rank_diffB = (len(KB) - 1 + - np.linalg.matrix_rank(KB, tol=ZERO, hermitian=True)) + if rank_diffB != 0: + raise ValueError('atoms B has disconnected parts; please check the structure') + + LOGGER.info('Generation 0 for structure B ...') + + if self._sim: + if self._t_steps[0] != 0: + LOGGER.info('Minimization, heating-up & simulation in generation 0 for structure B ...') + else: + LOGGER.info('Minimization & heating-up in generation 0 for structure B ...') + else: + LOGGER.info('Minimization in generation 0 for structure B ...') + LOGGER.timeit('_clustenm_min') + potentialB, conformerB = self._min_sim(self._atomsB.getCoords()) + if np.isnan(potentialB): + raise ValueError('Initial structure B could not be minimized. Try again and/or check your structure.') + + LOGGER.report(label='_clustenm_min') + + LOGGER.info('#' + '-' * 19 + '/*\\' + '-' * 19 + '#') + + self.setCoordsB(conformerB) + + potentials.extend([potentialB]) + sizes.extend([1]) + new_shape = [1] + for s in conformerB.shape: + new_shape.append(s) + confB = conformerB.reshape(new_shape) + start_confsB = confB + self._conformers = np.vstack((self._conformers, start_confsB)) + keys.extend([(0, 0)]) + + curr_rmsd = calcRMSD(self._conformers[-1], self._conformers[-2]) + self._traj_rmsds.append(curr_rmsd) + + for i in range(1, self._n_gens+1): + self._cycle += 1 + LOGGER.info('Generation %d ...' % i) + + confs, weights = self._generate(**kwargs) + + LOGGER.timeit('_clustenm_min_sim') + + pot_conf = [self._min_sim(conf) for conf in confs] + + LOGGER.report('Structures were sampled in %.2fs.', + label='_clustenm_min_sim') + LOGGER.info('#' + '-' * 19 + '/*\\' + '-' * 19 + '#') + + pots, confs = list(zip(*pot_conf)) + idx = np.logical_not(np.isnan(pots)) + weights = np.array(weights)[idx] + pots = np.array(pots)[idx] + confs = np.array(confs)[idx] + + if self._outlier: + idx = np.logical_not(self._outliers(pots)) + else: + idx = np.full(pots.size, True, dtype=bool) + + sizes.extend(weights[idx]) + potentials.extend(pots[idx]) + + start_confs = self._superpose_cg(confs[idx]) + + for j in range(start_confs.shape[0]): + keys.append((i, j)) + self._conformers = np.vstack((self._conformers, start_confs)) + + curr_rmsd = calcRMSD(self._conformers[-1], self._conformers[-2]) + self._traj_rmsds.append(curr_rmsd) + + diff_rmsds = self._traj_rmsds[-2] - self._traj_rmsds[-1] + + if curr_rmsd < 0.6 or diff_rmsds < 0.125: + if self._direction_mode == 2 and self._direction == 1: + self._direction = 2 + else: + LOGGER.report('Transition converged in %.2fs.', '_hybrid_overall') + LOGGER.info('\nRan {:d} generations, RMSD {:5.2f}, change in RMSD {:5.2f}'.format(self._cycle, + curr_rmsd, + diff_rmsds)) + break + + LOGGER.timeit('_hybrid_ens') + LOGGER.info('Creating an ensemble of conformers ...') + + self._build(self._conformers, keys, potentials, sizes) + LOGGER.report('Ensemble was created in %.2fs.', label='_hybrid_ens') + + self._time = LOGGER.timing(label='_hybrid_overall') + LOGGER.report('All completed in %.2fs.', label='_hybrid_overall') + + def _generate(self, **kwargs): + LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) + LOGGER.timeit('_clustenm_gen') + + tmp = [self._sample(**kwargs)] + + tmp = [r for r in tmp if r is not None] + + confs_ex = np.concatenate(tmp) + + return confs_ex, [1] + + def setCoordsB(self, coords): + """Set *coords* as the ensemble reference coordinate set. *coords* + may be an array with suitable data type, shape, and dimensionality, or + an object with :meth:`getCoords` method.""" + + atoms = coords + try: + if isinstance(coords, Ensemble): + coords = copy(coords._coords) + else: + coords = coords.getCoords() + except AttributeError: + pass + finally: + if coords is None: + raise ValueError('coordinates of {0} are not set' + .format(str(atoms))) + + try: + checkCoords(coords, natoms=self._n_atoms) + except TypeError: + raise TypeError('coords must be a numpy array or an object ' + 'with `getCoords` method') + + if coords.shape == self._coords.shape: + self._coordsB = coords + self._n_atomsB = coords.shape[0] + + if isinstance(atoms, Ensemble): + self._indicesB = atoms._indices + self._atomsB = atoms._atoms + else: + raise ValueError('coordsB must have the same shape as main coords') + if __name__ == '__main__': @@ -256,15 +933,15 @@ def calcANMMC(initial, final, **kwargs): initial_pdb = parsePDB(initial_pdbn) final_pdb = parsePDB(final_pdbn) - ensemble_final, count1, count2, count3, k, accept_para = calcANMMC(initial_pdb, final_pdb, - initial_pdb_id=initial_pdb_id, - original_initial_pdb=original_initial_pdb, - original_final_pdb=original_final_pdb, - comd_cycle_number=comd_cycle_number, - devi=devi, stepcutoff=stepcutoff, - acceptance_ratio=acceptance_ratio, - anm_cut=anm_cut, N=N, - usePseudoatoms=usePseudoatoms) + ensemble_final, count1, count2, count3, k, accept_para, rmsd = calcANMMC(initial_pdb, final_pdb, + initial_pdb_id=initial_pdb_id, + original_initial_pdb=original_initial_pdb, + original_final_pdb=original_final_pdb, + comd_cycle_number=comd_cycle_number, + devi=devi, stepcutoff=stepcutoff, + acceptance_ratio=acceptance_ratio, + anm_cut=anm_cut, N=N, + usePseudoatoms=usePseudoatoms) writeDCD(final_structure_dcd_name, ensemble_final) ratios = [count2/N, count2/count1 if count1 != 0 else 0, count2, k, accept_para ] diff --git a/prody/dynamics/hybrid.py b/prody/dynamics/hybrid.py index e31010fb3..187803240 100644 --- a/prody/dynamics/hybrid.py +++ b/prody/dynamics/hybrid.py @@ -295,7 +295,8 @@ def _min_sim(self, coords): return np.nan, np.full_like(coords, np.nan) - def _targeted_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=10000, ddtol=1e-3, n_conv=5): + def _targeted_sim(self, coords0, coords1, tmdk=15., + d_steps=100, n_max_steps=10000, ddtol=1e-3, n_conv=5): try: from simtk.openmm import CustomExternalForce @@ -311,7 +312,7 @@ def _targeted_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=100 pos1 = coords1 * angstrom # pos1_ca = pos1[self._idx_cg, :] - force = CustomExternalForce('tmdk*((x-x0)^2+(y-y0)^2+(z-z0)^2)') + force = CustomExternalForce("tmdk*periodicdistance(x, y, z, x0, y0, z0)^2") force.addGlobalParameter('tmdk', 0.) force.addPerParticleParameter('x0') force.addPerParticleParameter('y0') @@ -327,7 +328,7 @@ def _targeted_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=100 pars = pos1[i, :].value_in_unit(nanometer) force.addParticle(int(atm_idx), pars) - simulation = self._prep_sim([force]) + simulation = self._prep_sim(coords0, external_forces=[force]) # automatic conversion into nanometer will be carried out. simulation.context.setPositions(coords0 * angstrom) @@ -897,8 +898,8 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, self._n_gens = n_gens self._platform = kwargs.pop('platform', None) self._parallel = kwargs.pop('parallel', False) - self._targeted = kwargs.pop('targeted', False) - self._tmdk = kwargs.pop('tmdk', 15.) + self._targeted = kwargs.pop('targeted', self._targeted) + self._tmdk = kwargs.pop('tmdk', self._tmdk) self._direction_mode = kwargs.get('mode', None) self._direction = 1 From 05ef0516da5735747b9e4c11921e466d5472bdc3 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 25 Feb 2021 21:22:30 +0000 Subject: [PATCH 19/34] comd n_modes --- prody/dynamics/comd.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index dd751f62d..5d5dc9824 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -49,10 +49,11 @@ def calcANMMC(initial, final, **kwargs): initial_ca = initial.select('name CA or name BB') final_ca = final.select('name CA or name BB') + n_modes = kwargs.pop('n_modes', 20) # ANM calculation based on current pdb_anm = ANM('pdb ca') pdb_anm.buildHessian(initial_ca, cutoff=anm_cut, **kwargs) - pdb_anm.calcModes(**kwargs) + pdb_anm.calcModes(n_modes=n_modes, **kwargs) # Cumulative sum vector preparation for metropolis sampling eigs = 1/sqrt(pdb_anm.getEigvals()) @@ -261,6 +262,7 @@ def _sample(self, **kwargs): LOGGER.info('\nStarting cycle with structure A') self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, stepcutoff=rmsd, + n_modes=self._n_modes, **kwargs) cg_ens = self._cg_ensA @@ -268,6 +270,7 @@ def _sample(self, **kwargs): LOGGER.info('\nStarting cycle with structure B') self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, stepcutoff=rmsd, + n_modes=self._n_modes, **kwargs) cg_ens = self._cg_ensB @@ -276,6 +279,7 @@ def _sample(self, **kwargs): LOGGER.info('\nStarting cycle with structure A') self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, stepcutoff=rmsd, + n_modes=self._n_modes, **kwargs) cg_ens = self._cg_ensA @@ -283,6 +287,7 @@ def _sample(self, **kwargs): LOGGER.info('\nStarting cycle with structure B') self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, stepcutoff=rmsd, + n_modes=self._n_modes, **kwargs) cg_ens = self._cg_ensB From a834ef9645db96144bc9cd5beaf3eaee4fc94f15 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 11:59:54 +0000 Subject: [PATCH 20/34] adaptive ANM changes --- prody/dynamics/adaptive.py | 65 ++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 30594a779..5d3e4afb6 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -18,7 +18,7 @@ from prody import LOGGER from prody.atomic import Atomic, AtomMap -from prody.utilities import getCoords, createStringIO, importLA +from prody.utilities import getCoords, createStringIO, importLA, checkCoords, copy from prody.measure.transform import calcTransformation, superpose, applyTransformation, calcRMSD from prody.measure.measure import calcDeformVector, calcDistance @@ -29,7 +29,7 @@ from .functions import calcENM from .modeset import ModeSet from .nma import NMA - +from .sampling import traverseMode from .hybrid import Hybrid __all__ = ['calcAdaptiveANM', 'ONEWAY', 'ALTERNATING', 'SERIAL', 'DEFAULT', @@ -536,7 +536,6 @@ def _sample(self, conf, **kwargs): resetFmin=self._resetFmin, **kwargs) self._resetFmin = False cg_ens = self._cg_ensA - self._direction = 2 else: LOGGER.info('\nStarting cycle with structure B') @@ -544,7 +543,6 @@ def _sample(self, conf, **kwargs): self._defvecs, self._rmsds, mask=maskB, resetFmin=self._resetFmin, **kwargs) cg_ens = self._cg_ensB - self._direction = 1 elif self._direction_mode == SERIAL: if self._direction == 1: @@ -566,11 +564,26 @@ def _sample(self, conf, **kwargs): else: raise ValueError('unknown aANM mode: %d' % self._direction_mode) - defvec = calcDeformVector(cg, cg_ens.getCoordsets()[-1]) - model = NMA() - model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) - model_ex = self._extendModel(model, cg, tmp) - coordsets = [tmp.getCoords() + model_ex.getEigvecs()[0]] + if self._direction == 1: + defvec = calcDeformVector(cg, cg_ens.getCoordsets()[-1]) + model = NMA() + model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) + model_ex = self._extendModel(model, cg, tmp) + def_ens = traverseMode(model_ex[0], tmp, 1, rmsd) + coordsets = [def_ens.getCoordsets()[-1]] + + if self._direction_mode == ALTERNATING: + self._direction = 2 + else: + defvec = calcDeformVector(cgB, cg_ens.getCoordsets()[-1]) + model = NMA() + model.setEigens(defvec.getArray().reshape((defvec.getArray().shape[0], 1))) + model_ex = self._extendModel(model, cgB, tmpB) + def_ens = traverseMode(model_ex[0], tmpB, 1, rmsd) + coordsets = [def_ens.getCoordsets()[-1]] + + if self._direction_mode == ALTERNATING: + self._direction = 1 if self._targeted: if self._parallel: @@ -722,3 +735,37 @@ def _generate(self, confs, **kwargs): confs_ex = np.concatenate(tmp) return confs_ex, [1] + + def setCoordsB(self, coords): + """Set *coords* as the ensemble reference coordinate set. *coords* + may be an array with suitable data type, shape, and dimensionality, or + an object with :meth:`getCoords` method.""" + + atoms = coords + try: + if isinstance(coords, Ensemble): + coords = copy(coords._coords) + else: + coords = coords.getCoords() + except AttributeError: + pass + finally: + if coords is None: + raise ValueError('coordinates of {0} are not set' + .format(str(atoms))) + + try: + checkCoords(coords, natoms=self._n_atoms) + except TypeError: + raise TypeError('coords must be a numpy array or an object ' + 'with `getCoords` method') + + if coords.shape == self._coords.shape: + self._coordsB = coords + self._n_atomsB = coords.shape[0] + + if isinstance(atoms, Ensemble): + self._indicesB = atoms._indices + self._atomsB = atoms._atoms + else: + raise ValueError('coordsB must have the same shape as main coords') From 310b931ce153e7dcd040dc74e419f6e196d95fb7 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 14:08:18 +0000 Subject: [PATCH 21/34] comd bug fix --- prody/dynamics/hybrid.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/prody/dynamics/hybrid.py b/prody/dynamics/hybrid.py index 187803240..a949a92be 100644 --- a/prody/dynamics/hybrid.py +++ b/prody/dynamics/hybrid.py @@ -570,11 +570,18 @@ def _superpose_cg(self, confs): def _build(self, conformers, keys, potentials, sizes): if self._direction_mode == 0: # Split alternating values and use forwards As then backwards Bs - self.addCoordset(conformers[::2]) - self.addCoordset(conformers[-1::-2]) - self.setData('size', sizes[::2] + sizes[-1::-2]) - self.setData('key', keys[::2] + keys[-1::-2]) - self.setData('potential', potentials[::2] + potentials[-1::-2]) + if self._cycle % 2: + self.addCoordset(conformers[::2]) + self.addCoordset(conformers[-2::-2]) + self.setData('size', sizes[::2] + sizes[-2::-2]) + self.setData('key', keys[::2] + keys[-2::-2]) + self.setData('potential', potentials[::2] + potentials[-2::-2]) + else: + self.addCoordset(conformers[::2]) + self.addCoordset(conformers[-1::-2]) + self.setData('size', sizes[::2] + sizes[-1::-2]) + self.setData('key', keys[::2] + keys[-1::-2]) + self.setData('potential', potentials[::2] + potentials[-1::-2]) else: self.addCoordset(conformers) self.setData('size', sizes) From 03aad7ddf4b9da6799394a38d4d5590d17e0f78f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 14:08:35 +0000 Subject: [PATCH 22/34] comd convergence parameters --- prody/dynamics/comd.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 5d5dc9824..5a4fdf03c 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -626,6 +626,14 @@ def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, :arg parallel: If it is True (default is False), conformer generation will be parallelized. :type parallel: bool + + :arg min_rmsd_diff: Difference between *rmsds* from previous step to current for checking + convergence. Default is 0.6 A + :type min_rmsd_diff: float + + :arg target_rmsd: Target RMSD for stopping. + Default is 0.125 A + :type target_rmsd: float ''' if self._isBuilt(): @@ -643,6 +651,9 @@ def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, self._targeted = kwargs.pop('targeted', self._targeted) self._tmdk = kwargs.pop('tmdk', self._tmdk) + self._target_rmsd = kwargs.pop('target_rmsd', 0.6) + self._min_rmsd_diff = kwargs.pop('min_rmsd_diff', 0.125) + self._direction_mode = kwargs.get('mode', DEFAULT) if self._direction_mode == ALTERNATING and self._n_gens % 2: raise ValueError('ALTERNATING modes needs even n_gens') @@ -794,14 +805,20 @@ def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, diff_rmsds = self._traj_rmsds[-2] - self._traj_rmsds[-1] - if curr_rmsd < 0.6 or diff_rmsds < 0.125: + if self._cycle == 1: + g = "generation" + else: + g = "generations" + + LOGGER.info('\nRan {:d} {:s}, RMSD {:5.2f}, change in RMSD {:5.2f}\n'.format(self._cycle, g, + curr_rmsd, + diff_rmsds)) + + if curr_rmsd < self._target_rmsd or diff_rmsds < self._min_rmsd_diff: if self._direction_mode == 2 and self._direction == 1: self._direction = 2 else: LOGGER.report('Transition converged in %.2fs.', '_hybrid_overall') - LOGGER.info('\nRan {:d} generations, RMSD {:5.2f}, change in RMSD {:5.2f}'.format(self._cycle, - curr_rmsd, - diff_rmsds)) break LOGGER.timeit('_hybrid_ens') From 11b40c884328d4d6f06c6c94f118caabaed6a0d6 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 14:09:28 +0000 Subject: [PATCH 23/34] suppressed logs --- prody/dynamics/comd.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 5a4fdf03c..35155592a 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -33,6 +33,7 @@ def calcANMMC(initial, final, **kwargs): cutoff = kwargs.pop('cutoff', 15.) anm_cut = kwargs.pop('anm_cut', cutoff) + log = kwargs.pop('log', True) N = kwargs.pop('N', 10000) usePseudoatoms = kwargs.pop('usePseudoatoms', False) @@ -113,8 +114,9 @@ def calcANMMC(initial, final, **kwargs): step_count = 0 check_step_counts = [0] - sys.stdout.write(' '*2 + 'rmsd' + ' '*2 + 'rand' + ' '*2 + 'ID' + ' '*3 + 'step' \ - + ' '*2 + 'accept_para' + ' '*5 + 'f' + '\n') + if log: + sys.stdout.write(' '*2 + 'rmsd' + ' '*2 + 'rand' + ' '*2 + 'ID' + ' '*3 + 'step' + + ' '*2 + 'accept_para' + ' '*5 + 'f' + '\n') # MC Loop for k in range(N): @@ -178,9 +180,11 @@ def calcANMMC(initial, final, **kwargs): f = 1. rmsd = calcRMSD(pdb_ca.getCoords(), initial_ca.getCoords()) - sys.stdout.write('{:6.2f}'.format(rmsd) + ' ' + '{:5.2f}'.format(rand) + \ - '{:4d}'.format(ID) + '{:7d}'.format(k) + ' '*2 + str(accepted) + ' '*2 + \ - '{:5.4f}'.format(accept_para) + ' '*2 + '{:5.4f}'.format(f) + '\n') + + if log: + sys.stdout.write('{:6.2f}'.format(rmsd) + ' ' + '{:5.2f}'.format(rand) + + '{:4d}'.format(ID) + '{:7d}'.format(k) + ' '*2 + str(accepted) + ' '*2 + + '{:5.4e}'.format(accept_para) + ' '*2 + '{:5.4f}'.format(f) + '\n') if rmsd > stepcutoff: break @@ -222,6 +226,7 @@ def __init__(self, title, **kwargs): # 200 (like NAMD website) is too weak for such small conformational changes def _sample(self, **kwargs): + log = kwargs.pop('log', False) conf, conf2 = self._conformers[-2], self._conformers[-1] @@ -251,7 +256,7 @@ def _sample(self, **kwargs): if self._direction_mode == ONEWAY: LOGGER.info('\nStarting cycle with structure A') - self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, log=log, stepcutoff=rmsd, n_modes=self._n_modes, **kwargs) @@ -260,7 +265,7 @@ def _sample(self, **kwargs): elif self._direction_mode == ALTERNATING: if self._direction == 1: LOGGER.info('\nStarting cycle with structure A') - self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, log=log, stepcutoff=rmsd, n_modes=self._n_modes, **kwargs) @@ -268,7 +273,7 @@ def _sample(self, **kwargs): else: LOGGER.info('\nStarting cycle with structure B') - self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, + self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, log=log, stepcutoff=rmsd, n_modes=self._n_modes, **kwargs) @@ -277,7 +282,7 @@ def _sample(self, **kwargs): elif self._direction_mode == SERIAL: if self._direction == 1: LOGGER.info('\nStarting cycle with structure A') - self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, + self._cg_ensA, _, _, _, _, _, rmsd = calcANMMC(cg, cgB, log=log, stepcutoff=rmsd, n_modes=self._n_modes, **kwargs) @@ -285,7 +290,7 @@ def _sample(self, **kwargs): else: LOGGER.info('\nStarting cycle with structure B') - self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, + self._cg_ensB, _, _, _, _, _, rmsd = calcANMMC(cgB, cg, log=log, stepcutoff=rmsd, n_modes=self._n_modes, **kwargs) From df839061e8293c5aa8ede97668ceee9ebfa76544 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 14:04:06 +0000 Subject: [PATCH 24/34] save CoMD ensemble --- prody/dynamics/__init__.py | 1 + prody/ensemble/functions.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py index 5058bf458..3242fc430 100644 --- a/prody/dynamics/__init__.py +++ b/prody/dynamics/__init__.py @@ -365,3 +365,4 @@ functions.Hybrid = Hybrid functions.ClustENM = ClustENM functions.AdaptiveHybrid = AdaptiveHybrid +functions.CoMD = CoMD diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index 282c45abf..8b113a7bb 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -40,7 +40,7 @@ def saveEnsemble(ensemble, filename=None, **kwargs): if isinstance(ensemble, PDBEnsemble): attr_list.append('_labels') attr_list.append('_trans') - elif isinstance(ensemble, Hybrid): + elif isinstance(ensemble, (Hybrid, ClustENM)): attr_list.extend(['_ph', '_cutoff', '_gamma', '_n_modes', '_n_confs', '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_padding', '_ionicStrength', '_force_field', '_tolerance', @@ -50,6 +50,9 @@ def saveEnsemble(ensemble, filename=None, **kwargs): if isinstance(ensemble, AdaptiveHybrid): attr_list.extend(['_atomsB', '_defvecs', '_resetFmin', '_rmsds']) atomsB = dict_['_atomsB'] + if isinstance(ensemble, CoMD): + attr_list.extend(['_atomsB', '_target_rmsd', '_min_rmsd_diff', '_traj_rmsds']) + atomsB = dict_['_atomsB'] if filename is None: filename = ensemble.getTitle().replace(' ', '_') From 5b027774b325c488e3dcd1d38e3465d464aa143b Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 14:14:32 +0000 Subject: [PATCH 25/34] added getCoordsB --- prody/dynamics/adaptive.py | 9 +++++++++ prody/dynamics/comd.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 5d3e4afb6..1fb44f7bc 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -769,3 +769,12 @@ def setCoordsB(self, coords): self._atomsB = atoms._atoms else: raise ValueError('coordsB must have the same shape as main coords') + + def getCoordsB(self, selected=True): + """Returns a copy of reference coordinates for selected atoms.""" + + if self._coordsB is None: + return None + if self._indicesB is None or not selected: + return self._coordsB.copy() + return self._coordsB[self._indicesB].copy() diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 35155592a..647cfeb29 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -881,6 +881,15 @@ def setCoordsB(self, coords): else: raise ValueError('coordsB must have the same shape as main coords') + def getCoordsB(self, selected=True): + """Returns a copy of reference coordinates for selected atoms.""" + + if self._coordsB is None: + return None + if self._indicesB is None or not selected: + return self._coordsB.copy() + return self._coordsB[self._indicesB].copy() + if __name__ == '__main__': From eed201b68aa43e62a4f4bf1a0836ad9f07d08e56 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 17:17:24 +0000 Subject: [PATCH 26/34] error fix --- prody/dynamics/comd.py | 2 +- prody/dynamics/hybrid.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 647cfeb29..733e8f258 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -642,7 +642,7 @@ def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, ''' if self._isBuilt(): - raise ValueError('ClustENM ensemble has been built; please start a new instance') + raise ValueError('CoMD ensemble has been built; please start a new instance') # set up parameters self._cutoff = cutoff diff --git a/prody/dynamics/hybrid.py b/prody/dynamics/hybrid.py index a949a92be..cc40e0e5e 100644 --- a/prody/dynamics/hybrid.py +++ b/prody/dynamics/hybrid.py @@ -894,7 +894,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0, ''' if self._isBuilt(): - raise ValueError('ClustENM ensemble has been built; please start a new instance') + raise ValueError('ClustENM-like ensemble has been built; please start a new instance') # set up parameters self._cutoff = cutoff From 90dffc6f95cc55a85fb8b8e47c2e039d95411c7f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 17:35:11 +0000 Subject: [PATCH 27/34] add calcConvergenceRMSDs --- prody/dynamics/adaptive.py | 33 ++++++++++++++++++++++++++++++++- prody/dynamics/comd.py | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 1fb44f7bc..784a706a9 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -14,13 +14,14 @@ import time from numbers import Integral, Number +from decimal import Decimal, ROUND_HALF_UP import numpy as np from prody import LOGGER from prody.atomic import Atomic, AtomMap from prody.utilities import getCoords, createStringIO, importLA, checkCoords, copy -from prody.measure.transform import calcTransformation, superpose, applyTransformation, calcRMSD +from prody.measure.transform import calcTransformation, superpose, applyTransformation, calcRMSD, getRMSD from prody.measure.measure import calcDeformVector, calcDistance from prody.ensemble.ensemble import Ensemble @@ -717,6 +718,36 @@ def getRMSDsB(self): return calcRMSD(self._atomsB, self._confs[:, indices], weights) + def getConvergenceRMSDs(self): + if self._confs is None or self._coords is None: + return None + + indices = self._indices + if indices is None: + indices = np.arange(self._confs.shape[1]) + + weights = self._weights[indices] if self._weights is not None else None + + n_confs = self.numConfs() + n_confsA = int(Decimal(n_confs/2).to_integral(rounding=ROUND_HALF_UP)) + + confsA = self._confs[:n_confsA] + if n_confs % 2: + confsB = self._confs[n_confsA:] + else: + confsB = self._confs[n_confsA:] + + RMSDs = np.zeros((n_confs-9)) + n = 0 + for i in range(n_confsA): + for j in range(2): + RMSDs[n] = getRMSD(confsA[i+j], confsB[n_confsA-(i+1)]) + n += 1 + if i == n_confsA - 1: + break + + return RMSDs + def _generate(self, confs, **kwargs): LOGGER.info('Sampling conformers in generation %d ...' % self._cycle) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 733e8f258..38b3d1ace 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -1,6 +1,6 @@ from prody.proteins.pdbfile import parsePDB, writePDBStream, parsePDBStream -from prody.measure.transform import calcRMSD, calcTransformation, superpose, applyTransformation -from prody.measure.measure import buildDistMatrix, calcDeformVector, calcDistance +from prody.measure.transform import calcRMSD, calcTransformation, getRMSD, applyTransformation +from prody.measure.measure import buildDistMatrix, calcDeformVector from prody.ensemble.ensemble import Ensemble from prody.utilities import createStringIO, importLA, checkCoords, copy @@ -14,6 +14,7 @@ from random import random import os.path import sys +from decimal import Decimal, ROUND_HALF_UP from .adaptive import ONEWAY, ALTERNATING, SERIAL, DEFAULT from .hybrid import Hybrid @@ -537,6 +538,36 @@ def getRMSDsB(self): return calcRMSD(self._atomsB, self._confs[:, indices], weights) + def getConvergenceRMSDs(self): + if self._confs is None or self._coords is None: + return None + + indices = self._indices + if indices is None: + indices = np.arange(self._confs.shape[1]) + + weights = self._weights[indices] if self._weights is not None else None + + n_confs = self.numConfs() + n_confsA = int(Decimal(n_confs/2).to_integral(rounding=ROUND_HALF_UP)) + + confsA = self._confs[:n_confsA] + if n_confs % 2: + confsB = self._confs[n_confsA:] + else: + confsB = self._confs[n_confsA:] + + RMSDs = np.zeros((n_confs-9)) + n = 0 + for i in range(n_confsA): + for j in range(2): + RMSDs[n] = getRMSD(confsA[i+j], confsB[n_confsA-(i+1)]) + n += 1 + if i == n_confsA - 1: + break + + return RMSDs + def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, n_gens=5, solvent='imp', sim=False, force_field=None, temp=303.15, t_steps_i=1000, t_steps_g=7500, From 87101ce526e1ae5bfb999d41a5b540ed2ac78228 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 26 Feb 2021 17:39:19 +0000 Subject: [PATCH 28/34] init coordsB and indicesB --- prody/dynamics/adaptive.py | 3 +++ prody/dynamics/comd.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 784a706a9..4fe3f49e8 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -496,6 +496,9 @@ class AdaptiveHybrid(Hybrid): def __init__(self, title, **kwargs): super().__init__(title=title) self._atomsB = None + self._coordsB = None + self._indicesB = None + self._defvecs = [] self._rmsds = [] self._cg_ensA = Ensemble(title=title) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 38b3d1ace..8def54d6a 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -215,6 +215,9 @@ class CoMD(Hybrid): def __init__(self, title, **kwargs): super().__init__(title=title) self._atomsB = None + self._coordsB = None + self._indicesB = None + self._defvecs = [] self._rmsds = [] self._traj_rmsds = [] @@ -522,15 +525,15 @@ def getAtomsB(self, selected=True): 'Returns atoms for structure B.' if self._atomsB is None: return None - if self._indices is None or not selected: + if self._indicesB is None or not selected: return self._atomsB - return self._atomsB[self._indices] + return self._atomsB[self._indicesB] def getRMSDsB(self): if self._confs is None or self._coords is None: return None - indices = self._indices + indices = self._indicesB if indices is None: indices = np.arange(self._confs.shape[1]) @@ -917,9 +920,9 @@ def getCoordsB(self, selected=True): if self._coordsB is None: return None - if self._indicesB is None or not selected: + if self._indices is None or not selected: return self._coordsB.copy() - return self._coordsB[self._indicesB].copy() + return self._coordsB[self._indices].copy() if __name__ == '__main__': From 7afcf7e127e7150c193e6d26ecf50f287bbfd73e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 1 Mar 2021 19:53:36 +0000 Subject: [PATCH 29/34] ConvergenceRMSDs bug fix --- prody/dynamics/adaptive.py | 10 ++++------ prody/dynamics/comd.py | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/prody/dynamics/adaptive.py b/prody/dynamics/adaptive.py index 4fe3f49e8..b17f6d55c 100644 --- a/prody/dynamics/adaptive.py +++ b/prody/dynamics/adaptive.py @@ -740,16 +740,14 @@ def getConvergenceRMSDs(self): else: confsB = self._confs[n_confsA:] - RMSDs = np.zeros((n_confs-9)) - n = 0 + RMSDs = [] for i in range(n_confsA): for j in range(2): - RMSDs[n] = getRMSD(confsA[i+j], confsB[n_confsA-(i+1)]) - n += 1 - if i == n_confsA - 1: + if i + j > n_confsA - 1: break + RMSDs.append(getRMSD(confsA[i+j], confsB[n_confsA-(i+1)], weights=weights)) - return RMSDs + return np.array(RMSDs) def _generate(self, confs, **kwargs): diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 8def54d6a..024207ed1 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -560,16 +560,14 @@ def getConvergenceRMSDs(self): else: confsB = self._confs[n_confsA:] - RMSDs = np.zeros((n_confs-9)) - n = 0 + RMSDs = [] for i in range(n_confsA): for j in range(2): - RMSDs[n] = getRMSD(confsA[i+j], confsB[n_confsA-(i+1)]) - n += 1 - if i == n_confsA - 1: + if i + j > n_confsA - 1: break + RMSDs.append(getRMSD(confsA[i+j], confsB[n_confsA-(i+1)], weights=weights)) - return RMSDs + return np.array(RMSDs) def run(self, cutoff=15., n_modes=20, gamma=1., n_confs=50, rmsd=1.0, n_gens=5, solvent='imp', sim=False, force_field=None, temp=303.15, From 0ad0f0ac8b20823f2e4dbbbfb5e3a9555098471f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 2 Mar 2021 09:02:52 +0000 Subject: [PATCH 30/34] comd comment fixes --- prody/dynamics/comd.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index 024207ed1..d5e53db51 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -97,10 +97,10 @@ def calcANMMC(initial, final, **kwargs): else: accept_para = 0.1 - # MC parameter 1 is the acceptance ratio, which should converge on + # MC parameter 1 is the acceptance ratio, f, which should converge on # the selected value with a tolerance of 0.05 either side - # and accept_para is adjusted to help bring it within these limits. - # This also happens every 5 steps during the run (lines 150 to 156). + # and accept_para, gamma, is adjusted to help bring it within these limits. + # This also happens every 5 steps during the run. if original_initial_pdb != original_final_pdb: # difference from the target structure is defined as the energy and the minimum is zero. @@ -164,7 +164,7 @@ def calcANMMC(initial, final, **kwargs): if (mod(k,5)==0 and not(k==0)): # Update of the accept_para to keep the MC para reasonable - # See comment lines 86 to 89. + # See comment lines above. if f > acceptance_ratio + 0.05: accept_para /= 1.5 elif f < acceptance_ratio - 0.05: From 05c15812636b3650b33d4f08d9bdc8fb1ad6f1bc Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 2 Mar 2021 13:14:17 +0000 Subject: [PATCH 31/34] fixed import error --- prody/dynamics/hybrid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prody/dynamics/hybrid.py b/prody/dynamics/hybrid.py index cc40e0e5e..9ba323ea8 100644 --- a/prody/dynamics/hybrid.py +++ b/prody/dynamics/hybrid.py @@ -166,7 +166,7 @@ def _fix(self, atoms): from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile except ImportError: - raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + raise ImportError('Please install PDBFixer and OpenMM in order to use hybrid methods.') stream = createStringIO() title = atoms.getTitle() @@ -203,7 +203,7 @@ def _prep_sim(self, coords, external_forces=[]): from simtk.unit import angstrom, nanometers, picosecond, \ kelvin, Quantity, molar except ImportError: - raise ImportError('Please install PDBFixer and OpenMM in order to use ClustENM.') + raise ImportError('Please install PDBFixer and OpenMM in order to use hybrid methods.') positions = Quantity([Vec3(*xyz) for xyz in coords], angstrom) modeller = Modeller(self._topology, positions) @@ -259,7 +259,7 @@ def _min_sim(self, coords): from simtk.openmm.app import StateDataReporter from simtk.unit import kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R except ImportError: - raise ImportError('Please install PDBFixer and OpenMM in order to use Hybrid.') + raise ImportError('Please install PDBFixer and OpenMM in order to use hybrid methods.') simulation = self._prep_sim(coords=coords) @@ -303,7 +303,7 @@ def _targeted_sim(self, coords0, coords1, tmdk=15., from simtk.openmm.app import StateDataReporter from simtk.unit import nanometer, kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R except ImportError: - raise ImportError('Please install PDBFixer and OpenMM in order to use Hybrid.') + raise ImportError('Please install PDBFixer and OpenMM in order to use hybrid methods.') tmdk *= kilojoule_per_mole/angstrom**2 tmdk = tmdk.value_in_unit(kilojoule_per_mole/nanometer**2) From d164095438c8303ac9596ebad387a05515361934 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Tue, 2 Mar 2021 18:06:19 +0000 Subject: [PATCH 32/34] comment acceptance_ratio is f --- prody/dynamics/comd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/dynamics/comd.py b/prody/dynamics/comd.py index d5e53db51..f8384996a 100644 --- a/prody/dynamics/comd.py +++ b/prody/dynamics/comd.py @@ -30,7 +30,7 @@ def calcANMMC(initial, final, **kwargs): """ devi = kwargs.pop('devi', 0.5) stepcutoff = kwargs.pop('stepcutoff', 2.) - acceptance_ratio = kwargs.pop('acceptance_ratio', 0.9) + acceptance_ratio = kwargs.pop('acceptance_ratio', 0.9) # f in the paper cutoff = kwargs.pop('cutoff', 15.) anm_cut = kwargs.pop('anm_cut', cutoff) From 9bd17823f27d5201ce029673beea1e0bb392716e Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 3 Mar 2021 14:11:31 +0000 Subject: [PATCH 33/34] added comd to saveEnsemble --- prody/ensemble/functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index 8b113a7bb..850d42e81 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -143,6 +143,8 @@ def loadEnsemble(filename, **kwargs): ensemble = AdaptiveHybrid(title) elif type_ == 'Hybrid': ensemble = Hybrid(title) + elif type_ == 'CoMD': + ensemble = CoMD(title) else: ensemble = Ensemble(title) From 9316df332cf137da7b2efc749345036d0bf20cc5 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 14 Aug 2024 16:28:25 +0100 Subject: [PATCH 34/34] fix bad merge --- prody/ensemble/functions.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/prody/ensemble/functions.py b/prody/ensemble/functions.py index 7d3fa82cd..4b38f4bcb 100644 --- a/prody/ensemble/functions.py +++ b/prody/ensemble/functions.py @@ -183,22 +183,10 @@ def loadEnsemble(filename, **kwargs): '_rmsd', '_n_gens', '_maxclust', '_threshold', '_sol', '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1', '_parallel', '_idx_ca', '_n_ca', '_cycle', '_time', '_targeted', -<<<<<<< HEAD - '_tmdk', '_topology', '_positions'] - - for attr in attrs: - if attr in attr_dict.files: - setattr(ensemble, attr, attr_dict[attr]) - - if type_ == 'AdaptiveHybrid': - attrs = ['_atomsB', '_defvecs', '_resetFmin', '_rmsds', '_cc'] -======= '_tmdk', '_cc'] if have_openmm: attrs.extend(['_topology', '_position']) ->>>>>>> 34fa7bec368b27d09ade3c7b4f9b3ad428f4e1a6 - for attr in attrs: if attr in attr_dict.files: setattr(ensemble, attr, attr_dict[attr])