Skip to content

Commit

Permalink
Store initial and final topologies for all phases -- small molecule p…
Browse files Browse the repository at this point in the history
…ipeline (#1210)

* Initial and final topologies serialized per phase.

* Using properties instead of private attrs.

* Fix test

* Remove uneeded code/attributes

* bump ci

* Store phase topologies separately

* Fix tests. vacuum topologies expected.

* Better docstring.

---------

Co-authored-by: Mike Henry <[email protected]>
  • Loading branch information
ijpulidos and mikemhenry authored Jul 31, 2023
1 parent 4834ff7 commit b362e4b
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 54 deletions.
72 changes: 64 additions & 8 deletions perses/app/relative_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ def __init__(self,
else:
_logger.info("Skipping counterion")

# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="complex")


if 'solvent' in phases:
_logger.info(f"Detected solvent...")
Expand Down Expand Up @@ -521,6 +524,9 @@ def __init__(self,
else:
_logger.info("Skipping counterion")

# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="solvent")

if 'vacuum' in phases:
_logger.info(f"Detected solvent...")
# need to change nonbonded cutoff and remove barostat for vacuum leg
Expand Down Expand Up @@ -586,6 +592,8 @@ def __init__(self,
self._vacuum_forward_neglected_angles = self._geometry_engine.forward_neglected_angle_terms
self._vacuum_reverse_neglected_angles = self._geometry_engine.reverse_neglected_angle_terms
self._vacuum_geometry_engine = copy.deepcopy(self._geometry_engine)
# Store phase topology and positions in PDB file
self._store_phase_topologies(phase="vacuum")

def _setup_complex_phase(self):
"""
Expand Down Expand Up @@ -847,7 +855,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo
The parameterized system, containing a barostat if one was specified.
"""
# DEBUG: Write PDB file being fed into Modeller to check why MOL isn't being matched
from simtk.openmm.app import PDBFile
modeller = app.Modeller(topology, positions)
# retaining protein protonation from input files
#hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL','OLD','NEW']]
Expand Down Expand Up @@ -881,13 +888,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo
solvated_system = self._system_generator.create_system(solvated_topology)
_logger.info(f"\tSystem parameterized")

if self._trajectory_directory is not None and self._trajectory_prefix is not None:
pdb_filename = AnyPath(f"{self._trajectory_directory}/{self._trajectory_prefix}-{phase}.pdb")
with open(pdb_filename, 'w') as outfile:
PDBFile.writeFile(solvated_topology, solvated_positions, outfile)
else:
_logger.info('Both trajectory_directory and trajectory_prefix need to be provided to save .pdb')

return solvated_topology, solvated_positions, solvated_system

def _handle_charge_changes(self, topology_proposal, new_positions):
Expand Down Expand Up @@ -922,6 +922,62 @@ def _handle_charge_changes(self, topology_proposal, new_positions):
# modify the topology proposal
modify_atom_classes(new_water_indices_to_ionize, topology_proposal)

def _store_phase_topologies(self, phase: str):
"""
Stores topologies and positions in PDB file for the given the phase, for both the initial (old) and final (new)
states. Stores both solvent and solute whenever possible (only "solute" in vacuum).
Generates two PDB files, one for each state (old/new).
Notes
-----
- The topologies and positions are stored in the specified directory using the global trajectory prefix.
- The directory will be created if it doesn't already exist.
- The topologies and positions are stored as PDB files in the "models" subdirectory of the trajectory directory.
- Both the trajectory directory and trajectory prefix need to be provided in order to store the topologies.
- If any of the directories or files already exist, they will get overwritten.
Usage
-----
To store the topologies and positions, make sure to set the `trajectory_directory` and `trajectory_prefix`
attributes before calling this method.
"""
from openmm.app import PDBFile
if self._trajectory_directory is not None and self._trajectory_prefix is not None:
if phase == "complex":
topology_proposal = self.complex_topology_proposal
old_positions = self.complex_old_positions
new_positions = self.complex_new_positions
elif phase == "solvent":
topology_proposal = self.solvent_topology_proposal
old_positions = self.solvent_old_positions
new_positions = self.solvent_new_positions
elif phase == "vacuum":
topology_proposal = self.vacuum_topology_proposal
old_positions = self.vacuum_old_positions
new_positions = self.vacuum_new_positions

topologies_to_store = {
f"{phase}_old": {"topology": topology_proposal.old_topology,
"positions": old_positions},
f"{phase}_new": {"topology": topology_proposal.new_topology,
"positions": new_positions},
}
# Store in "models" subdir -- create if doesn't exist
models_path = f"{self._trajectory_directory}/models"
if not os.path.exists(models_path):
os.makedirs(models_path)
for phase_key, phase_data in topologies_to_store.items():
pdb_filename = AnyPath(f"{models_path}/{self._trajectory_prefix}_{phase_key}.pdb")
with open(pdb_filename, 'w') as outfile:
PDBFile.writeFile(phase_data["topology"], phase_data["positions"], outfile)
else:
_logger.warning(
'Not storing topologies. Both trajectory_directory and trajectory_prefix arguments need to'
' be provided to store topology .pdb files.')



@property
def complex_topology_proposal(self):
return self._complex_topology_proposal
Expand Down
100 changes: 54 additions & 46 deletions perses/tests/test_relative_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from perses.app import setup_relative_calculation
import mdtraj as md
from openmmtools import states, alchemy, testsystems, cache
from unittest import skipIf
import pytest

from perses.tests.utils import enter_temp_directory
Expand All @@ -19,7 +18,6 @@
'lambda_electrostatics' : 'lambda',
}


def generate_example_waterbox_states(temperature=300.0*unit.kelvin, pressure=1.0*unit.atmosphere):
"""
This is a convenience function to generate a CompoundThermodynamicState and SamplerState to use in other tests.
Expand Down Expand Up @@ -260,47 +258,6 @@ def warn(*args, **kwargs):
found_tla = True
assert found_tla == True, 'Spectator TLA not in old topology'


def test_host_guest_deterministic_geometries():
"""
execute the `RelativeFEPSetup` with geometries specified for a host-guest relative free energy pair
"""
from perses.app.relative_setup import RelativeFEPSetup

# Setup directory
ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf")
host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb")

setup = RelativeFEPSetup(
ligand_input = ligand_sdf,
old_ligand_index=0,
new_ligand_index=1,
forcefield_files = ['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],
phases = ['complex', 'solvent', 'vacuum'],
protein_pdb_filename=host_pdb,
receptor_mol2_filename=None,
pressure=1.0 * unit.atmosphere,
temperature=300.0 * unit.kelvin,
solvent_padding=9.0 * unit.angstroms,
ionic_strength=0.15 * unit.molar,
hmass=3*unit.amus,
neglect_angles=False,
map_strength='default',
atom_expr=None,
bond_expr=None,
anneal_14s=False,
small_molecule_forcefield='gaff-2.11',
small_molecule_parameters_cache=None,
trajectory_directory=None,
trajectory_prefix=None,
spectator_filenames=None,
nonbonded_method = 'PME',
complex_box_dimensions=None,
solvent_box_dimensions=None,
remove_constraints=False,
use_given_geometries = True
)

def test_relative_setup_charge_change():
"""
execute `RelativeFEPSetup` in solvent/complex phase on a charge change and assert that the modified new system and old system charge difference is zero.
Expand Down Expand Up @@ -357,7 +314,6 @@ def test_relative_setup_solvent_padding():
Check that the user inputted solvent_padding argument to `RelativeFEPSetup` actually changes the padding for the solvent phase
"""
from perses.app.relative_setup import RelativeFEPSetup
import numpy as np

input_solvent_padding = 1.7 * unit.nanometers
smiles_filename = resource_filename("perses", os.path.join("data", "test.smi"))
Expand Down Expand Up @@ -409,6 +365,58 @@ def test_relative_setup_list_ligand_input():
assert ligand_input_size == expected_input_size, f"There should be {expected_input_size} ligand input files, " \
f"receiving {ligand_input_size} files."

def test_relative_setup_topologies_storage(tmp_path):
"""
Check that topologies get stored
"""
from perses.app.relative_setup import RelativeFEPSetup

# if __name__=="__main__":
# test_run_cdk2_iterations_repex()
# Setup directory
ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf")
host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb")

fe_setup = RelativeFEPSetup(
ligand_input=ligand_sdf,
old_ligand_index=0,
new_ligand_index=1,
forcefield_files=['amber/ff14SB.xml', 'amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'],
phases=['complex', 'solvent', 'vacuum'],
protein_pdb_filename=host_pdb,
receptor_mol2_filename=None,
pressure=1.0 * unit.atmosphere,
temperature=300.0 * unit.kelvin,
solvent_padding=9.0 * unit.angstroms,
ionic_strength=0.15 * unit.molar,
hmass=3 * unit.amus,
neglect_angles=False,
map_strength='default',
atom_expr=None,
bond_expr=None,
anneal_14s=False,
small_molecule_forcefield='openff-2.1.0',
small_molecule_parameters_cache=None,
trajectory_directory=tmp_path,
trajectory_prefix="host-guest",
spectator_filenames=None,
nonbonded_method='PME',
complex_box_dimensions=None,
solvent_box_dimensions=None,
remove_constraints=False,
use_given_geometries=True
)

expected_files = [
'host-guest_vacuum_old.pdb',
'host-guest_vacuum_new.pdb',
'host-guest_solvent_old.pdb',
'host-guest_solvent_new.pdb',
'host-guest_complex_old.pdb',
'host-guest_complex_new.pdb',
]
for file in expected_files:
pdb_filename = f"{tmp_path}/models/{file}"
assert os.path.exists(pdb_filename)

# Assert that no extra files are created
files_in_directory = os.listdir(f"{tmp_path}/models")
assert len(files_in_directory) == len(expected_files)

0 comments on commit b362e4b

Please sign in to comment.