Skip to content

Commit

Permalink
PDBE-6878: Sort nodes of graph based on residue id and chain (#25)
Browse files Browse the repository at this point in the history
* Improving depiction diagrams for dative bonds to metal (like HEM) (#19)

* Dative bonds to metals that break valency that than
charge adjustment

* ZERO order bond to metals that break valency
rather than charge adjustment.

ZERO order results in dotted line in depictions.

* Alter DATIVE bonds to ZERO order for diagrams as
this results in dotted lines in depictions but
keeps good angles in CPT diagram

DATIVE bonds to metals that break valency
rather than charge adjustment.

* sort nodes before assigning id to boundmolecule

* test for boundmolecule equivalence

* bumped up version

* correct alignment of atom names in PDB files

* formatting the file

* updated to use custom PDB writing instead of RDKit MolToPDBBlock

* removed MAN as its mapping is for soe reason missing from unichem API

* change bond type from dative to single for inchi calculation

* use inchi_from_mol from mol_tools for inchi calcualtion

* removed altering of bond type while exproting svg, as mol2D is is generated using dative bond and it is only used for depiction

* store mol instead of smiles in SubstructureMapping

* updated doc to use refactored SubstructureMapping model

* strip new line only. RDKit checks for the 3D label in column 21-22

* removed formal charge from PDB files

* alters bondtype from dative to zero for depiction

* linting and formatting

* added changes

* updated rdkit version

---------

Co-authored-by: Oliver Smart <[email protected]>
Co-authored-by: roshan <“[email protected]”>
  • Loading branch information
3 people authored May 24, 2024
1 parent 5754952 commit f2469cc
Show file tree
Hide file tree
Showing 14 changed files with 229 additions and 149 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python-version: "3.10"

- run: |
pip install rdkit==2023.3.3
pip install rdkit==2023.09.6
pip install -e ".[tests]"
pip install pre-commit
pre-commit install && pre-commit run --all
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## RELEASE 0.8.5 - May 26, 2024

### Features
* Updated for RDKit 2023.09.6
* Removed the formal charge adjustment for valence issues, instead uses Dative bonds
* Uses Zero bond type (dotted lines) for depiction of coordinate bonds
* Changes Dative bonds to single bonds for inchi calculation
* Corrected the format of PDB files
* Corrected the header of SDF file

### Breaking changes
* Removed smile from SubstructureMapping and added mol

## RELEASE 0.8.0 - July 26, 2023

### Features
Expand Down
2 changes: 1 addition & 1 deletion doc/guide/fragments.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fragment_library = FragmentLibrary()
matches = component.library_search(fragment_library)
print(f'Matches found in the fragment library {matches}.')

fragment_mols = [Chem.MolFromSmiles(fragment.smiles) for fragment in component.fragments]
fragment_mols = [fragment.mol for fragment in component.fragments]
img = Draw.MolsToGridImage(fragment_mols, legends = [fragment.name for fragment in component.fragments])
img
```
Expand Down
2 changes: 1 addition & 1 deletion pdbeccdutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.4"
__version__ = "0.8.5"
73 changes: 27 additions & 46 deletions pdbeccdutils/core/ccd_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import rdkit
import gemmi
from gemmi import cif
from pdbeccdutils.helpers import cif_tools
from pdbeccdutils.helpers import cif_tools, mol_tools
from pdbeccdutils.core.component import Component
from pdbeccdutils.core.exceptions import CCDUtilsError
from pdbeccdutils.core.models import ConformerType
Expand Down Expand Up @@ -119,20 +119,6 @@ def to_pdb_str(
component, remove_hs, conf_type
)

info = rdkit.Chem.rdchem.AtomPDBResidueInfo()
info.SetResidueName(f"{component.id:>3}")
info.SetTempFactor(20.0)
info.SetOccupancy(1.0)
info.SetChainId("A")
info.SetResidueNumber(1)
info.SetIsHeteroAtom(True)

for atom in mol_to_save.GetAtoms():
flag = _get_alt_atom_name(atom) if alt_names else _get_atom_name(atom)
atom_name = f"{flag:<4}" # make sure it is 4 characters
info.SetName(atom_name)
atom.SetMonomerInfo(info)

pdb_title = [
f"HEADER {conf_type.name} coordinates",
f" for PDB-CCD {component.id}",
Expand All @@ -141,12 +127,7 @@ def to_pdb_str(
f"AUTHOR RDKit {rdkit.__version__}",
]

try:
pdb_body = rdkit.Chem.MolToPDBBlock(mol_to_save, conf_id)
except Exception:
pdb_body = _to_pdb_str_fallback(
mol_to_save, component.id, conf_id, conf_type.name
)
pdb_body = _to_pdb_str_fallback(mol_to_save, component.id, conf_id, alt_names)

return "\n".join(pdb_title + [pdb_body])

Expand Down Expand Up @@ -186,7 +167,7 @@ def to_sdf_str(

block = [
f"{component.id} - {c.name} conformer",
rdkit.Chem.MolToMolBlock(mol_to_save, confId=conf_id).strip(),
rdkit.Chem.MolToMolBlock(mol_to_save, confId=conf_id).strip("\n"),
"$$$$\n",
]
mol_block += block
Expand Down Expand Up @@ -945,7 +926,7 @@ def _to_sdf_str_fallback(mol, ccd_id, conformers):

content += [
f"{ccd_id} - {c.name} conformer",
" RDKit 3D",
" RDKit 3D",
"\n" f"{atom_count:>3}{bond_count:3} 0 0 0 0 0 0 0 0999 V2000",
]

Expand All @@ -972,42 +953,43 @@ def _to_sdf_str_fallback(mol, ccd_id, conformers):
return content


def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
"""Fallback method to generate PDB file in case the default one in
RDKit fails.
def _to_pdb_str_fallback(mol, component_id, conf_id, alt_names):
"""Method to generate PDB file
Args:
mol (rdkit.Chem.rdchem.Mol): Molecule to be written.
component_id (str): Component id.
conf_id (int): conformer id to be written.
conf_name (str): conformer name to be written.
alt_names (bool): atom names or alternate names.
Returns:
str: String representation the component in the PDB format.
"""
conformer_ids = []
content = [
f"HEADER {conf_name} coordinates for PDB-CCD {component_id}",
f"COMPND {component_id}",
f"AUTHOR pdbccdutils {pdbeccdutils.__version__}",
f"AUTHOR RDKit {rdkit.__version__}",
]

if conf_id == -1:
conformer_ids = [c.GetId() for c in mol.GetConformers()]
else:
conformer_ids = [conf_id]

content = []
for m in conformer_ids:
rdkit_conformer = mol.GetConformer(m)

for i in range(0, mol.GetNumAtoms()):
atom = mol.GetAtomWithIdx(i)

s = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}{:>2}".format(
atom_name = _get_alt_atom_name(atom) if alt_names else _get_atom_name(atom)
atom_symbol = atom.GetSymbol()
# Atom name spans from column 13-16. For 4 letter atom names and 2 letter atom symbol it starts from column 13
# and for others column starts from 14
col_align = "{:<6}{:>5} {:<3} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}"
if len(atom_name) == 4 or len(atom_symbol) == 2:
col_align = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}"

s = col_align.format(
"HETATM",
i + 1,
atom.GetProp("name"),
atom_name,
component_id,
"A",
1,
Expand All @@ -1017,8 +999,7 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
rdkit_conformer.GetAtomPosition(i).z,
1,
20,
atom.GetSymbol(),
atom.GetFormalCharge(),
atom.GetSymbol().upper(),
)
content.append(s)

Expand Down Expand Up @@ -1189,30 +1170,30 @@ def _add_fragments_and_scaffolds_cif(component, cif_block_copy):
)

for i, scaffold in enumerate(component.scaffolds):
mol = rdkit.Chem.MolFromSmiles(scaffold.smiles)
inchi = rdkit.Chem.MolToInchi(mol)
inchikey = rdkit.Chem.MolToInchiKey(mol)
smiles = rdkit.Chem.MolToSmiles(scaffold.mol)
inchi = mol_tools.inchi_from_mol(scaffold.mol).inchi
inchikey = mol_tools.inchikey_from_inchi(inchi)
new_row = [
component.id,
scaffold.name,
f"S{i+1}",
"scaffold",
scaffold.smiles,
smiles,
inchi or None,
inchikey or None,
]
substructure_loop.add_row(cif.quote_list(new_row))

for j, fragment in enumerate(component.fragments):
mol = rdkit.Chem.MolFromSmiles(fragment.smiles)
inchi = rdkit.Chem.MolToInchi(mol)
inchikey = rdkit.Chem.MolToInchiKey(mol)
smiles = rdkit.Chem.MolToSmiles(fragment.mol)
inchi = mol_tools.inchi_from_mol(fragment.mol).inchi
inchikey = mol_tools.inchikey_from_inchi(inchi)
new_row = [
component.id,
fragment.name,
f"F{j+1}",
"fragment",
fragment.smiles,
smiles,
inchi or None,
inchikey or None,
]
Expand Down
44 changes: 12 additions & 32 deletions pdbeccdutils/core/clc_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,6 @@ def to_pdb_str(
(mol_to_save, conf_id, conf_type) = ccd_writer._prepate_structure(
component, remove_hs, conf_type
)
residue_numbers = _get_residue_number(mol_to_save)
for atom in mol_to_save.GetAtoms():
flag = ccd_writer._get_atom_name(atom)
atom_name = f"{flag:<4}" # make sure it is 4 characters
res_info = atom.GetPDBResidueInfo()
res_num = residue_numbers[atom.GetProp("residue_id")]
res_info.SetResidueNumber(res_num)
res_info.SetName(atom_name)
res_info.SetTempFactor(20.0)
res_info.SetOccupancy(1.0)
res_info.SetChainId("A")

pdb_title = [
f"HEADER {conf_type.name} coordinates",
Expand All @@ -111,12 +100,7 @@ def to_pdb_str(
f"AUTHOR RDKit {rdkit.__version__}",
]

try:
pdb_body = rdkit.Chem.MolToPDBBlock(mol_to_save, conf_id)
except Exception:
pdb_body = _to_pdb_str_fallback(
mol_to_save, component.id, conf_id, conf_type.name
)
pdb_body = _to_pdb_str_fallback(mol_to_save, conf_id)

return "\n".join(pdb_title + [pdb_body])

Expand Down Expand Up @@ -233,26 +217,18 @@ def to_xml_xml(component):
return root


def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
"""Fallback method to generate PDB file in case the default one in
RDKit fails.
def _to_pdb_str_fallback(mol, conf_id):
"""Method to generate PDB file
Args:
mol (rdkit.Chem.rdchem.Mol): Molecule to be written.
component_id (str): Component id.
conf_id (int): conformer id to be written.
conf_name (str): conformer name to be written.
Returns:
str: String representation the component in the PDB format.
"""
conformer_ids = []
content = [
f"HEADER {conf_name} coordinates for PDB-CLC {component_id}",
f"COMPND {component_id}",
f"AUTHOR pdbccdutils {pdbeccdutils.__version__}",
f"AUTHOR RDKit {rdkit.__version__}",
]
content = []

if conf_id == -1:
conformer_ids = [c.GetId() for c in mol.GetConformers()]
Expand All @@ -265,11 +241,16 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
for i in range(0, mol.GetNumAtoms()):
atom = mol.GetAtomWithIdx(i)
res_info = atom.GetPDBResidueInfo()
atom_name = atom.GetProp("name")
atom_symbol = atom.GetSymbol().upper()
col_align = "{:<6}{:>5} {:<3} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}"
if len(atom_name) == 4 or len(atom_symbol) == 2:
col_align = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}"

s = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:>2}{:>2}".format(
s = col_align.format(
"HETATM",
i + 1,
atom.GetProp("name"),
atom_name,
res_info.GetResidueName(),
"A",
residue_numbers[atom.GetProp("residue_id")],
Expand All @@ -279,8 +260,7 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
rdkit_conformer.GetAtomPosition(i).z,
1,
20,
atom.GetSymbol(),
atom.GetFormalCharge(),
atom_symbol,
)
content.append(s)

Expand Down
27 changes: 16 additions & 11 deletions pdbeccdutils/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import rdkit
import gemmi
from rdkit.Chem import BRICS, Draw
from rdkit.Chem import BRICS, Draw, BondType
from rdkit.Chem.rdMolDescriptors import Properties
from rdkit.Chem.Scaffolds import MurckoScaffold

Expand Down Expand Up @@ -227,10 +227,12 @@ def inchi_from_rdkit(self) -> str:
str: the InChI or empty '' if there was an error finding it.
"""
if not self._inchi_from_rdkit:
try:
self._inchi_from_rdkit = rdkit.Chem.inchi.MolToInchi(self.mol)
except ValueError:
inchi_result = mol_tools.inchi_from_mol(self.mol)
if inchi_result.errors:
self._inchi_from_rdkit = ""
else:
self._inchi_from_rdkit = inchi_result.inchi

return self._inchi_from_rdkit

@property
Expand Down Expand Up @@ -512,10 +514,14 @@ def export_2d_svg(
options.atomLabels[i] = atom_name
a.SetProp("molFileAlias", atom_name)

mol_tools.change_bonds_type(self.mol2D, BondType.DATIVE, BondType.ZERO)

drawing.draw_molecule(
self.mol2D, drawer, file_name, wedge_bonds, atom_highlight, bond_highlight
)

mol_tools.change_bonds_type(self.mol2D, BondType.ZERO, BondType.ZERO)

def export_2d_annotation(self, file_name: str, wedge_bonds: bool = True) -> None:
"""Generates 2D depiction in JSON format with annotation of
bonds and atoms to be redrawn in the interactions component.
Expand Down Expand Up @@ -675,9 +681,7 @@ def library_search(

key = f"{fragment_library.name}_{v.name}"
if key not in self._fragments:
temp[key] = SubstructureMapping(
v.name, rdkit.Chem.MolToSmiles(v.mol), v.source, matches
)
temp[key] = SubstructureMapping(v.name, v.mol, v.source, matches)

except Exception:
logging.warning(f"Error mapping fragment {v.name}.")
Expand Down Expand Up @@ -718,7 +722,8 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):
brics_hits = [self.mol_no_h.GetSubstructMatches(i) for i in brics_mols]

for index, brics_hit in enumerate(brics_hits):
smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
brics_mol = brics_mols[index]
smiles = rdkit.Chem.MolToSmiles(brics_mol)
name = scaffolding_method.name
source = "RDKit scaffolds"
key = f"{name}_{smiles}"
Expand All @@ -729,7 +734,7 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):

if key not in self._scaffolds:
self._scaffolds[key] = SubstructureMapping(
name, smiles, source, brics_hit
name, brics_mol, source, brics_hit
)

return brics_mols
Expand All @@ -756,7 +761,7 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):
self._scaffolds[name].mappings.append(mapping)
else:
self._scaffolds[name] = SubstructureMapping(
name, smiles, source, [mapping]
name, s, source, [mapping]
)

return scaffolds
Expand Down Expand Up @@ -802,6 +807,6 @@ def _id_to_name_mapping(self, struct_mapping):
for m in v.mappings:
atom_names = [self.mol.GetAtomWithIdx(idx).GetProp("name") for idx in m]
mappings.append(atom_names)
res.append(SubstructureMapping(v.name, v.smiles, v.source, mappings))
res.append(SubstructureMapping(v.name, v.mol, v.source, mappings))

return res
Loading

0 comments on commit f2469cc

Please sign in to comment.