PDBE-6878: Sort nodes of graph based on residue id and chain (#25)

* Improving depiction diagrams for dative bonds to metal (like HEM) (#19) * Dative bonds to metals that break valency that than charge adjustment * ZERO order bond to metals that break valency rather than charge adjustment. ZERO order results in dotted line in depictions. * Alter DATIVE bonds to ZERO order for diagrams as this results in dotted lines in depictions but keeps good angles in CPT diagram DATIVE bonds to metals that break valency rather than charge adjustment. * sort nodes before assigning id to boundmolecule * test for boundmolecule equivalence * bumped up version * correct alignment of atom names in PDB files * formatting the file * updated to use custom PDB writing instead of RDKit MolToPDBBlock * removed MAN as its mapping is for soe reason missing from unichem API * change bond type from dative to single for inchi calculation * use inchi_from_mol from mol_tools for inchi calcualtion * removed altering of bond type while exproting svg, as mol2D is is generated using dative bond and it is only used for depiction * store mol instead of smiles in SubstructureMapping * updated doc to use refactored SubstructureMapping model * strip new line only. RDKit checks for the 3D label in column 21-22 * removed formal charge from PDB files * alters bondtype from dative to zero for depiction * linting and formatting * added changes * updated rdkit version --------- Co-authored-by: Oliver Smart <[email protected]> Co-authored-by: roshan <“[email protected]”>
PDBeurope · May 24, 2024 · f2469cc · f2469cc
1 parent 5754952
commit f2469cc
Show file tree

Hide file tree

Showing 14 changed files with 229 additions and 149 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -20,7 +20,7 @@ jobs:
           python-version: "3.10"
 
       - run: |
-          pip install rdkit==2023.3.3
+          pip install rdkit==2023.09.6
           pip install -e ".[tests]"
           pip install pre-commit
           pre-commit install && pre-commit run --all

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## RELEASE 0.8.5 - May 26, 2024
+
+### Features
+* Updated for RDKit 2023.09.6
+* Removed the formal charge adjustment for valence issues, instead uses Dative bonds
+* Uses Zero bond type (dotted lines) for depiction of coordinate bonds
+* Changes Dative bonds to single bonds for inchi calculation
+* Corrected the format of PDB files
+* Corrected the header of SDF file
+
+### Breaking changes
+* Removed smile from SubstructureMapping and added mol
+
 ## RELEASE 0.8.0 - July 26, 2023
 
 ### Features

diff --git a/doc/guide/fragments.md b/doc/guide/fragments.md
@@ -26,7 +26,7 @@ fragment_library = FragmentLibrary()
 matches = component.library_search(fragment_library)
 print(f'Matches found in the fragment library {matches}.')
 
-fragment_mols = [Chem.MolFromSmiles(fragment.smiles) for fragment in component.fragments]
+fragment_mols = [fragment.mol for fragment in component.fragments]
 img = Draw.MolsToGridImage(fragment_mols, legends = [fragment.name for fragment in component.fragments])
 img
 ```

diff --git a/pdbeccdutils/__init__.py b/pdbeccdutils/__init__.py
@@ -1 +1 @@
-__version__ = "0.8.4"
+__version__ = "0.8.5"
diff --git a/pdbeccdutils/core/ccd_writer.py b/pdbeccdutils/core/ccd_writer.py
@@ -35,7 +35,7 @@
 import rdkit
 import gemmi
 from gemmi import cif
-from pdbeccdutils.helpers import cif_tools
+from pdbeccdutils.helpers import cif_tools, mol_tools
 from pdbeccdutils.core.component import Component
 from pdbeccdutils.core.exceptions import CCDUtilsError
 from pdbeccdutils.core.models import ConformerType
@@ -119,20 +119,6 @@ def to_pdb_str(
         component, remove_hs, conf_type
     )
 
-    info = rdkit.Chem.rdchem.AtomPDBResidueInfo()
-    info.SetResidueName(f"{component.id:>3}")
-    info.SetTempFactor(20.0)
-    info.SetOccupancy(1.0)
-    info.SetChainId("A")
-    info.SetResidueNumber(1)
-    info.SetIsHeteroAtom(True)
-
-    for atom in mol_to_save.GetAtoms():
-        flag = _get_alt_atom_name(atom) if alt_names else _get_atom_name(atom)
-        atom_name = f"{flag:<4}"  # make sure it is 4 characters
-        info.SetName(atom_name)
-        atom.SetMonomerInfo(info)
-
     pdb_title = [
         f"HEADER    {conf_type.name} coordinates",
         f" for PDB-CCD {component.id}",
@@ -141,12 +127,7 @@ def to_pdb_str(
         f"AUTHOR    RDKit {rdkit.__version__}",
     ]
 
-    try:
-        pdb_body = rdkit.Chem.MolToPDBBlock(mol_to_save, conf_id)
-    except Exception:
-        pdb_body = _to_pdb_str_fallback(
-            mol_to_save, component.id, conf_id, conf_type.name
-        )
+    pdb_body = _to_pdb_str_fallback(mol_to_save, component.id, conf_id, alt_names)
 
     return "\n".join(pdb_title + [pdb_body])
 
@@ -186,7 +167,7 @@ def to_sdf_str(
 
                 block = [
                     f"{component.id} - {c.name} conformer",
-                    rdkit.Chem.MolToMolBlock(mol_to_save, confId=conf_id).strip(),
+                    rdkit.Chem.MolToMolBlock(mol_to_save, confId=conf_id).strip("\n"),
                     "$$$$\n",
                 ]
                 mol_block += block
@@ -945,7 +926,7 @@ def _to_sdf_str_fallback(mol, ccd_id, conformers):
 
         content += [
             f"{ccd_id} - {c.name} conformer",
-            "    RDKit   3D",
+            "     RDKit          3D",
             "\n" f"{atom_count:>3}{bond_count:3}  0  0  0  0  0  0  0  0999 V2000",
         ]
 
@@ -972,42 +953,43 @@ def _to_sdf_str_fallback(mol, ccd_id, conformers):
     return content
 
 
-def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
-    """Fallback method to generate PDB file in case the default one in
-    RDKit fails.
+def _to_pdb_str_fallback(mol, component_id, conf_id, alt_names):
+    """Method to generate PDB file
 
     Args:
         mol (rdkit.Chem.rdchem.Mol): Molecule to be written.
         component_id (str): Component id.
         conf_id (int): conformer id to be written.
-        conf_name (str): conformer name to be written.
+        alt_names (bool): atom names or alternate names.
 
     Returns:
         str: String representation the component in the PDB format.
     """
     conformer_ids = []
-    content = [
-        f"HEADER    {conf_name} coordinates for PDB-CCD {component_id}",
-        f"COMPND    {component_id}",
-        f"AUTHOR    pdbccdutils {pdbeccdutils.__version__}",
-        f"AUTHOR    RDKit {rdkit.__version__}",
-    ]
 
     if conf_id == -1:
         conformer_ids = [c.GetId() for c in mol.GetConformers()]
     else:
         conformer_ids = [conf_id]
 
+    content = []
     for m in conformer_ids:
         rdkit_conformer = mol.GetConformer(m)
 
         for i in range(0, mol.GetNumAtoms()):
             atom = mol.GetAtomWithIdx(i)
-
-            s = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}{:>2}".format(
+            atom_name = _get_alt_atom_name(atom) if alt_names else _get_atom_name(atom)
+            atom_symbol = atom.GetSymbol()
+            # Atom name spans from column 13-16. For 4 letter atom names and 2 letter atom symbol it starts from column 13
+            # and for others column starts from 14
+            col_align = "{:<6}{:>5}  {:<3} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}"
+            if len(atom_name) == 4 or len(atom_symbol) == 2:
+                col_align = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}"
+
+            s = col_align.format(
                 "HETATM",
                 i + 1,
-                atom.GetProp("name"),
+                atom_name,
                 component_id,
                 "A",
                 1,
@@ -1017,8 +999,7 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
                 rdkit_conformer.GetAtomPosition(i).z,
                 1,
                 20,
-                atom.GetSymbol(),
-                atom.GetFormalCharge(),
+                atom.GetSymbol().upper(),
             )
             content.append(s)
 
@@ -1189,30 +1170,30 @@ def _add_fragments_and_scaffolds_cif(component, cif_block_copy):
     )
 
     for i, scaffold in enumerate(component.scaffolds):
-        mol = rdkit.Chem.MolFromSmiles(scaffold.smiles)
-        inchi = rdkit.Chem.MolToInchi(mol)
-        inchikey = rdkit.Chem.MolToInchiKey(mol)
+        smiles = rdkit.Chem.MolToSmiles(scaffold.mol)
+        inchi = mol_tools.inchi_from_mol(scaffold.mol).inchi
+        inchikey = mol_tools.inchikey_from_inchi(inchi)
         new_row = [
             component.id,
             scaffold.name,
             f"S{i+1}",
             "scaffold",
-            scaffold.smiles,
+            smiles,
             inchi or None,
             inchikey or None,
         ]
         substructure_loop.add_row(cif.quote_list(new_row))
 
     for j, fragment in enumerate(component.fragments):
-        mol = rdkit.Chem.MolFromSmiles(fragment.smiles)
-        inchi = rdkit.Chem.MolToInchi(mol)
-        inchikey = rdkit.Chem.MolToInchiKey(mol)
+        smiles = rdkit.Chem.MolToSmiles(fragment.mol)
+        inchi = mol_tools.inchi_from_mol(fragment.mol).inchi
+        inchikey = mol_tools.inchikey_from_inchi(inchi)
         new_row = [
             component.id,
             fragment.name,
             f"F{j+1}",
             "fragment",
-            fragment.smiles,
+            smiles,
             inchi or None,
             inchikey or None,
         ]

diff --git a/pdbeccdutils/core/clc_writer.py b/pdbeccdutils/core/clc_writer.py
@@ -91,17 +91,6 @@ def to_pdb_str(
     (mol_to_save, conf_id, conf_type) = ccd_writer._prepate_structure(
         component, remove_hs, conf_type
     )
-    residue_numbers = _get_residue_number(mol_to_save)
-    for atom in mol_to_save.GetAtoms():
-        flag = ccd_writer._get_atom_name(atom)
-        atom_name = f"{flag:<4}"  # make sure it is 4 characters
-        res_info = atom.GetPDBResidueInfo()
-        res_num = residue_numbers[atom.GetProp("residue_id")]
-        res_info.SetResidueNumber(res_num)
-        res_info.SetName(atom_name)
-        res_info.SetTempFactor(20.0)
-        res_info.SetOccupancy(1.0)
-        res_info.SetChainId("A")
 
     pdb_title = [
         f"HEADER    {conf_type.name} coordinates",
@@ -111,12 +100,7 @@ def to_pdb_str(
         f"AUTHOR    RDKit {rdkit.__version__}",
     ]
 
-    try:
-        pdb_body = rdkit.Chem.MolToPDBBlock(mol_to_save, conf_id)
-    except Exception:
-        pdb_body = _to_pdb_str_fallback(
-            mol_to_save, component.id, conf_id, conf_type.name
-        )
+    pdb_body = _to_pdb_str_fallback(mol_to_save, conf_id)
 
     return "\n".join(pdb_title + [pdb_body])
 
@@ -233,26 +217,18 @@ def to_xml_xml(component):
     return root
 
 
-def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
-    """Fallback method to generate PDB file in case the default one in
-    RDKit fails.
+def _to_pdb_str_fallback(mol, conf_id):
+    """Method to generate PDB file
 
     Args:
         mol (rdkit.Chem.rdchem.Mol): Molecule to be written.
-        component_id (str): Component id.
         conf_id (int): conformer id to be written.
-        conf_name (str): conformer name to be written.
 
     Returns:
         str: String representation the component in the PDB format.
     """
     conformer_ids = []
-    content = [
-        f"HEADER    {conf_name} coordinates for PDB-CLC {component_id}",
-        f"COMPND    {component_id}",
-        f"AUTHOR    pdbccdutils {pdbeccdutils.__version__}",
-        f"AUTHOR    RDKit {rdkit.__version__}",
-    ]
+    content = []
 
     if conf_id == -1:
         conformer_ids = [c.GetId() for c in mol.GetConformers()]
@@ -265,11 +241,16 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
         for i in range(0, mol.GetNumAtoms()):
             atom = mol.GetAtomWithIdx(i)
             res_info = atom.GetPDBResidueInfo()
+            atom_name = atom.GetProp("name")
+            atom_symbol = atom.GetSymbol().upper()
+            col_align = "{:<6}{:>5}  {:<3} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}"
+            if len(atom_name) == 4 or len(atom_symbol) == 2:
+                col_align = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}"
 
-            s = "{:<6}{:>5} {:<4} {:>3} {}{:>4}{}   {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}          {:>2}{:>2}".format(
+            s = col_align.format(
                 "HETATM",
                 i + 1,
-                atom.GetProp("name"),
+                atom_name,
                 res_info.GetResidueName(),
                 "A",
                 residue_numbers[atom.GetProp("residue_id")],
@@ -279,8 +260,7 @@ def _to_pdb_str_fallback(mol, component_id, conf_id, conf_name="Model"):
                 rdkit_conformer.GetAtomPosition(i).z,
                 1,
                 20,
-                atom.GetSymbol(),
-                atom.GetFormalCharge(),
+                atom_symbol,
             )
             content.append(s)
 

diff --git a/pdbeccdutils/core/component.py b/pdbeccdutils/core/component.py
@@ -23,7 +23,7 @@
 
 import rdkit
 import gemmi
-from rdkit.Chem import BRICS, Draw
+from rdkit.Chem import BRICS, Draw, BondType
 from rdkit.Chem.rdMolDescriptors import Properties
 from rdkit.Chem.Scaffolds import MurckoScaffold
 
@@ -227,10 +227,12 @@ def inchi_from_rdkit(self) -> str:
             str: the InChI or empty '' if there was an error finding it.
         """
         if not self._inchi_from_rdkit:
-            try:
-                self._inchi_from_rdkit = rdkit.Chem.inchi.MolToInchi(self.mol)
-            except ValueError:
+            inchi_result = mol_tools.inchi_from_mol(self.mol)
+            if inchi_result.errors:
                 self._inchi_from_rdkit = ""
+            else:
+                self._inchi_from_rdkit = inchi_result.inchi
+
         return self._inchi_from_rdkit
 
     @property
@@ -512,10 +514,14 @@ def export_2d_svg(
                 options.atomLabels[i] = atom_name
                 a.SetProp("molFileAlias", atom_name)
 
+        mol_tools.change_bonds_type(self.mol2D, BondType.DATIVE, BondType.ZERO)
+
         drawing.draw_molecule(
             self.mol2D, drawer, file_name, wedge_bonds, atom_highlight, bond_highlight
         )
 
+        mol_tools.change_bonds_type(self.mol2D, BondType.ZERO, BondType.ZERO)
+
     def export_2d_annotation(self, file_name: str, wedge_bonds: bool = True) -> None:
         """Generates 2D depiction in JSON format with annotation of
         bonds and atoms to be redrawn in the interactions component.
@@ -675,9 +681,7 @@ def library_search(
 
                 key = f"{fragment_library.name}_{v.name}"
                 if key not in self._fragments:
-                    temp[key] = SubstructureMapping(
-                        v.name, rdkit.Chem.MolToSmiles(v.mol), v.source, matches
-                    )
+                    temp[key] = SubstructureMapping(v.name, v.mol, v.source, matches)
 
             except Exception:
                 logging.warning(f"Error mapping fragment {v.name}.")
@@ -718,7 +722,8 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):
                 brics_hits = [self.mol_no_h.GetSubstructMatches(i) for i in brics_mols]
 
                 for index, brics_hit in enumerate(brics_hits):
-                    smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
+                    brics_mol = brics_mols[index]
+                    smiles = rdkit.Chem.MolToSmiles(brics_mol)
                     name = scaffolding_method.name
                     source = "RDKit scaffolds"
                     key = f"{name}_{smiles}"
@@ -729,7 +734,7 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):
 
                     if key not in self._scaffolds:
                         self._scaffolds[key] = SubstructureMapping(
-                            name, smiles, source, brics_hit
+                            name, brics_mol, source, brics_hit
                         )
 
                 return brics_mols
@@ -756,7 +761,7 @@ def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold):
                     self._scaffolds[name].mappings.append(mapping)
                 else:
                     self._scaffolds[name] = SubstructureMapping(
-                        name, smiles, source, [mapping]
+                        name, s, source, [mapping]
                     )
 
             return scaffolds
@@ -802,6 +807,6 @@ def _id_to_name_mapping(self, struct_mapping):
             for m in v.mappings:
                 atom_names = [self.mol.GetAtomWithIdx(idx).GetProp("name") for idx in m]
                 mappings.append(atom_names)
-            res.append(SubstructureMapping(v.name, v.smiles, v.source, mappings))
+            res.append(SubstructureMapping(v.name, v.mol, v.source, mappings))
 
         return res