From b362e4be9a602e94dd79cb824709dc4584a3a147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Mon, 31 Jul 2023 18:05:50 -0400 Subject: [PATCH] Store initial and final topologies for all phases -- small molecule pipeline (#1210) * Initial and final topologies serialized per phase. * Using properties instead of private attrs. * Fix test * Remove uneeded code/attributes * bump ci * Store phase topologies separately * Fix tests. vacuum topologies expected. * Better docstring. --------- Co-authored-by: Mike Henry <11765982+mikemhenry@users.noreply.github.com> --- perses/app/relative_setup.py | 72 +++++++++++++++++--- perses/tests/test_relative_setup.py | 100 +++++++++++++++------------- 2 files changed, 118 insertions(+), 54 deletions(-) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index 4f8a380ac..e5bba621a 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -461,6 +461,9 @@ def __init__(self, else: _logger.info("Skipping counterion") + # Store phase topology and positions in PDB file + self._store_phase_topologies(phase="complex") + if 'solvent' in phases: _logger.info(f"Detected solvent...") @@ -521,6 +524,9 @@ def __init__(self, else: _logger.info("Skipping counterion") + # Store phase topology and positions in PDB file + self._store_phase_topologies(phase="solvent") + if 'vacuum' in phases: _logger.info(f"Detected solvent...") # need to change nonbonded cutoff and remove barostat for vacuum leg @@ -586,6 +592,8 @@ def __init__(self, self._vacuum_forward_neglected_angles = self._geometry_engine.forward_neglected_angle_terms self._vacuum_reverse_neglected_angles = self._geometry_engine.reverse_neglected_angle_terms self._vacuum_geometry_engine = copy.deepcopy(self._geometry_engine) + # Store phase topology and positions in PDB file + self._store_phase_topologies(phase="vacuum") def _setup_complex_phase(self): """ @@ -847,7 +855,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo The parameterized system, containing a barostat if one was specified. """ # DEBUG: Write PDB file being fed into Modeller to check why MOL isn't being matched - from simtk.openmm.app import PDBFile modeller = app.Modeller(topology, positions) # retaining protein protonation from input files #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL','OLD','NEW']] @@ -881,13 +888,6 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo solvated_system = self._system_generator.create_system(solvated_topology) _logger.info(f"\tSystem parameterized") - if self._trajectory_directory is not None and self._trajectory_prefix is not None: - pdb_filename = AnyPath(f"{self._trajectory_directory}/{self._trajectory_prefix}-{phase}.pdb") - with open(pdb_filename, 'w') as outfile: - PDBFile.writeFile(solvated_topology, solvated_positions, outfile) - else: - _logger.info('Both trajectory_directory and trajectory_prefix need to be provided to save .pdb') - return solvated_topology, solvated_positions, solvated_system def _handle_charge_changes(self, topology_proposal, new_positions): @@ -922,6 +922,62 @@ def _handle_charge_changes(self, topology_proposal, new_positions): # modify the topology proposal modify_atom_classes(new_water_indices_to_ionize, topology_proposal) + def _store_phase_topologies(self, phase: str): + """ + Stores topologies and positions in PDB file for the given the phase, for both the initial (old) and final (new) + states. Stores both solvent and solute whenever possible (only "solute" in vacuum). + + Generates two PDB files, one for each state (old/new). + + Notes + ----- + - The topologies and positions are stored in the specified directory using the global trajectory prefix. + - The directory will be created if it doesn't already exist. + - The topologies and positions are stored as PDB files in the "models" subdirectory of the trajectory directory. + - Both the trajectory directory and trajectory prefix need to be provided in order to store the topologies. + - If any of the directories or files already exist, they will get overwritten. + + Usage + ----- + To store the topologies and positions, make sure to set the `trajectory_directory` and `trajectory_prefix` + attributes before calling this method. + """ + from openmm.app import PDBFile + if self._trajectory_directory is not None and self._trajectory_prefix is not None: + if phase == "complex": + topology_proposal = self.complex_topology_proposal + old_positions = self.complex_old_positions + new_positions = self.complex_new_positions + elif phase == "solvent": + topology_proposal = self.solvent_topology_proposal + old_positions = self.solvent_old_positions + new_positions = self.solvent_new_positions + elif phase == "vacuum": + topology_proposal = self.vacuum_topology_proposal + old_positions = self.vacuum_old_positions + new_positions = self.vacuum_new_positions + + topologies_to_store = { + f"{phase}_old": {"topology": topology_proposal.old_topology, + "positions": old_positions}, + f"{phase}_new": {"topology": topology_proposal.new_topology, + "positions": new_positions}, + } + # Store in "models" subdir -- create if doesn't exist + models_path = f"{self._trajectory_directory}/models" + if not os.path.exists(models_path): + os.makedirs(models_path) + for phase_key, phase_data in topologies_to_store.items(): + pdb_filename = AnyPath(f"{models_path}/{self._trajectory_prefix}_{phase_key}.pdb") + with open(pdb_filename, 'w') as outfile: + PDBFile.writeFile(phase_data["topology"], phase_data["positions"], outfile) + else: + _logger.warning( + 'Not storing topologies. Both trajectory_directory and trajectory_prefix arguments need to' + ' be provided to store topology .pdb files.') + + + @property def complex_topology_proposal(self): return self._complex_topology_proposal diff --git a/perses/tests/test_relative_setup.py b/perses/tests/test_relative_setup.py index b1029bedb..da53fac92 100644 --- a/perses/tests/test_relative_setup.py +++ b/perses/tests/test_relative_setup.py @@ -7,7 +7,6 @@ from perses.app import setup_relative_calculation import mdtraj as md from openmmtools import states, alchemy, testsystems, cache -from unittest import skipIf import pytest from perses.tests.utils import enter_temp_directory @@ -19,7 +18,6 @@ 'lambda_electrostatics' : 'lambda', } - def generate_example_waterbox_states(temperature=300.0*unit.kelvin, pressure=1.0*unit.atmosphere): """ This is a convenience function to generate a CompoundThermodynamicState and SamplerState to use in other tests. @@ -260,47 +258,6 @@ def warn(*args, **kwargs): found_tla = True assert found_tla == True, 'Spectator TLA not in old topology' - -def test_host_guest_deterministic_geometries(): - """ - execute the `RelativeFEPSetup` with geometries specified for a host-guest relative free energy pair - """ - from perses.app.relative_setup import RelativeFEPSetup - - # Setup directory - ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf") - host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb") - - setup = RelativeFEPSetup( - ligand_input = ligand_sdf, - old_ligand_index=0, - new_ligand_index=1, - forcefield_files = ['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'], - phases = ['complex', 'solvent', 'vacuum'], - protein_pdb_filename=host_pdb, - receptor_mol2_filename=None, - pressure=1.0 * unit.atmosphere, - temperature=300.0 * unit.kelvin, - solvent_padding=9.0 * unit.angstroms, - ionic_strength=0.15 * unit.molar, - hmass=3*unit.amus, - neglect_angles=False, - map_strength='default', - atom_expr=None, - bond_expr=None, - anneal_14s=False, - small_molecule_forcefield='gaff-2.11', - small_molecule_parameters_cache=None, - trajectory_directory=None, - trajectory_prefix=None, - spectator_filenames=None, - nonbonded_method = 'PME', - complex_box_dimensions=None, - solvent_box_dimensions=None, - remove_constraints=False, - use_given_geometries = True - ) - def test_relative_setup_charge_change(): """ execute `RelativeFEPSetup` in solvent/complex phase on a charge change and assert that the modified new system and old system charge difference is zero. @@ -357,7 +314,6 @@ def test_relative_setup_solvent_padding(): Check that the user inputted solvent_padding argument to `RelativeFEPSetup` actually changes the padding for the solvent phase """ from perses.app.relative_setup import RelativeFEPSetup - import numpy as np input_solvent_padding = 1.7 * unit.nanometers smiles_filename = resource_filename("perses", os.path.join("data", "test.smi")) @@ -409,6 +365,58 @@ def test_relative_setup_list_ligand_input(): assert ligand_input_size == expected_input_size, f"There should be {expected_input_size} ligand input files, " \ f"receiving {ligand_input_size} files." +def test_relative_setup_topologies_storage(tmp_path): + """ + Check that topologies get stored + """ + from perses.app.relative_setup import RelativeFEPSetup -# if __name__=="__main__": -# test_run_cdk2_iterations_repex() + # Setup directory + ligand_sdf = resource_filename("perses", "data/given-geometries/ligands.sdf") + host_pdb = resource_filename("perses", "data/given-geometries/receptor.pdb") + + fe_setup = RelativeFEPSetup( + ligand_input=ligand_sdf, + old_ligand_index=0, + new_ligand_index=1, + forcefield_files=['amber/ff14SB.xml', 'amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'], + phases=['complex', 'solvent', 'vacuum'], + protein_pdb_filename=host_pdb, + receptor_mol2_filename=None, + pressure=1.0 * unit.atmosphere, + temperature=300.0 * unit.kelvin, + solvent_padding=9.0 * unit.angstroms, + ionic_strength=0.15 * unit.molar, + hmass=3 * unit.amus, + neglect_angles=False, + map_strength='default', + atom_expr=None, + bond_expr=None, + anneal_14s=False, + small_molecule_forcefield='openff-2.1.0', + small_molecule_parameters_cache=None, + trajectory_directory=tmp_path, + trajectory_prefix="host-guest", + spectator_filenames=None, + nonbonded_method='PME', + complex_box_dimensions=None, + solvent_box_dimensions=None, + remove_constraints=False, + use_given_geometries=True + ) + + expected_files = [ + 'host-guest_vacuum_old.pdb', + 'host-guest_vacuum_new.pdb', + 'host-guest_solvent_old.pdb', + 'host-guest_solvent_new.pdb', + 'host-guest_complex_old.pdb', + 'host-guest_complex_new.pdb', + ] + for file in expected_files: + pdb_filename = f"{tmp_path}/models/{file}" + assert os.path.exists(pdb_filename) + + # Assert that no extra files are created + files_in_directory = os.listdir(f"{tmp_path}/models") + assert len(files_in_directory) == len(expected_files)