Skip to content

Commit

Permalink
Return non sanitised mol if the sanitisation fails (#24)
Browse files Browse the repository at this point in the history
* added example for scaffold identification

* added examples of fragments

* added image of codeine

* added SanitisationResult model

* return original mol object if sanitisation fails

* use SanitisationResult object

* updated fragments and scaffolds examples

* bumped version

* added sanitisation cleanup flag

* linting and formatting

* fixed the version of rdkit

* corrected typo

---------

Co-authored-by: “roshan” <“[email protected]”>
  • Loading branch information
roshkjr and “roshan” authored Oct 30, 2023
1 parent 3c15c4e commit 5754952
Show file tree
Hide file tree
Showing 12 changed files with 714 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python-version: "3.10"

- run: |
pip install rdkit
pip install rdkit==2023.3.3
pip install -e ".[tests]"
pip install pre-commit
pre-commit install && pre-commit run --all
Expand Down
388 changes: 388 additions & 0 deletions doc/_static/codeine.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
75 changes: 75 additions & 0 deletions doc/_static/fragment_example.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
172 changes: 172 additions & 0 deletions doc/_static/scaffold_example.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 26 additions & 8 deletions doc/guide/fragments.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,44 @@ Alternativelly fragments can be supplied in an external library (*.tsv) provided
| phenanthrene | SMARTS | [#6]1:[#6]:[#6]:[#6]2:[#6](:[#6]:1):[#6]:[#6]:[#6]1:[#6]:2:[#6]:[#6]:[#6]:[#6]:1 | | unchecked | | PDBe |


## Basic use case
## Identifying fragments of a chemical component

```python
from pdbeccdutils.core import ccd_reader
from pdbeccdutils.core.fragment_library import FragmentLibrary

component = ccd_reader.read_pdb_cif_file('HEM.cif').component
component = ccd_reader.read_pdb_cif_file('HEL.cif').component
fragment_library = FragmentLibrary()

matches = component.library_search(library)
matches = component.library_search(fragment_library)
print(f'Matches found in the fragment library {matches}.')

for fragment in component.fragments:
print(f'Fragment name {fragment.name} from source {fragment.source}')
fragment_mols = [Chem.MolFromSmiles(fragment.smiles) for fragment in component.fragments]
img = Draw.MolsToGridImage(fragment_mols, legends = [fragment.name for fragment in component.fragments])
img
```
<img src='../_static/fragment_example.svg' style="display:block margin-bottom:5px" />


## Identifying all chemical components with penicillin fragment

```python
fragment_library = FragmentLibrary()
ccd_dict = ccd_reader.read_pdb_components_file('components.cif')
ccd_with_penicillin_fragment = []
for ccd_id in ccd_dict.keys():
component = ccd_dict[ccd_id].component
frag_matches = component.library_search(fragment_library)
for fragment in component.fragments:
if fragment.name == 'penicillin':
ccd_with_penicillin_fragment.append(ccd_id)

ccd_with_penicillin_fragment

['0RN', 'AIC', 'APV', 'CXN', 'HEL', 'IP1', 'MII', 'NFN', 'PN1', 'PNN', 'PNV', 'SOX', 'TAZ', 'WPP', 'X1E']
```
## PDBe supplied fragments

Below you can find actual fragment structures comming with the pdbeccdutil's `FragmentsLibrary` from the PDBe resource:

<div align='center'>
<img src='../_static/pdbe_fragments.svg' />
</div>
<img src='../_static/pdbe_fragments.svg' style="display:block"/>
14 changes: 12 additions & 2 deletions doc/guide/scaffolds.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@

```python
from pdbeccdutils.core import ccd_reader
from pdbeccdutils.core.models import ScaffoldingMethod
from rdkit.Chem import Draw

component = ccd_reader.read_pdb_cif_file('HEM.cif').component
component.get_scaffolds()
component = ccd_reader.read_pdb_cif_file('CVV.cif').component
scaffolds = component.get_scaffolds(scaffolding_method=ScaffoldingMethod.Brics)
img = Draw.MolsToGridImage(scaffolds, legends = [f"scaffold {i}" for i in range(1, len(scaffolds)+1)])
img
```
<p float="left">
<img src="../_static/scaffold_example.svg" width="49%" />
<img src="../_static/codeine.svg" width="49%" />
</p>

The figure shows the scaffolds identified by pdbeccdutils using the BRICS fragmentation rule for the chemical component CVV when bound to the human kappa opioid receptor (PDB entry 6b73). Interestingly, scaffold 3 is an exact match to the scaffold of Codeine (ChEMBL485), a known analgesic that targets various opioid receptors, and its biological activity is well-documented in ChEMBL. Although the PDB does not contain the structure of Codeine, the shared scaffold between Codeine and CCD component CVV suggests that Codeine may interact with the Human kappa opioid receptor in a similar manner to CVV
2 changes: 1 addition & 1 deletion pdbeccdutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.3"
__version__ = "0.8.4"
3 changes: 2 additions & 1 deletion pdbeccdutils/core/ccd_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ def _parse_pdb_mmcif(cif_block, sanitize=True):
_handle_implicit_hydrogens(mol)

if sanitize:
sanitized = mol_tools.sanitize(mol)
sanitized_result = mol_tools.sanitize(mol)
mol, sanitized = sanitized_result.mol, sanitized_result.status

descriptors = _parse_pdb_descriptors(
cif_block, "_pdbx_chem_comp_descriptor.", "descriptor"
Expand Down
3 changes: 2 additions & 1 deletion pdbeccdutils/core/clc_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def infer_multiple_chem_comp(path_to_cif, bm, bm_id, sanitize=True):
(mol, warnings, errors) = _parse_pdb_mmcif(cif_block, bm.graph)
sanitized = False
if sanitize:
sanitized = mol_tools.sanitize(mol)
sanitized_result = mol_tools.sanitize(mol)
mol, sanitized = sanitized_result.mol, sanitized_result.status

inchi_result = mol_tools.inchi_from_mol(mol)
if inchi_result.warnings:
Expand Down
13 changes: 13 additions & 0 deletions pdbeccdutils/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,19 @@ class DepictionResult(NamedTuple):
score: float


class SanitisationResult(NamedTuple):
"""
Sanitisation result details.
Args:
mol: rdkit.Chem.rdchem.RWMol
status: Status of sanitisation process.
"""

mol: Chem.rdchem.Mol
status: str


class Descriptor(NamedTuple):
"""
Descriptor obtained from the cif file. This is essentially
Expand Down
3 changes: 2 additions & 1 deletion pdbeccdutils/core/prd_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def _parse_pdb_mmcif(cif_block, sanitize=True):
ccd_reader._handle_implicit_hydrogens(mol)

if sanitize:
sanitized = mol_tools.sanitize(mol)
sanitized_result = mol_tools.sanitize(mol)
mol, sanitized = sanitized_result.mol, sanitized_result.status

descriptors = ccd_reader._parse_pdb_descriptors(
cif_block, "_pdbx_chem_comp_descriptor.", "descriptor"
Expand Down
28 changes: 20 additions & 8 deletions pdbeccdutils/helpers/mol_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
import re
import sys
from io import StringIO
from pdbeccdutils.core.models import InChIFromRDKit, MolFromRDKit, ConformerType
from pdbeccdutils.core.models import (
InChIFromRDKit,
MolFromRDKit,
ConformerType,
SanitisationResult,
)
from contextlib import redirect_stderr

import numpy as np
Expand Down Expand Up @@ -79,12 +84,16 @@ def sanitize(rwmol):
success = False

try:
success = fix_molecule(rwmol)
mol_copy = rdkit.Chem.RWMol(rwmol)
success = fix_molecule(mol_copy)

if not success:
return False
rdkit.Chem.SanitizeMol(
rwmol, sanitizeOps=rdkit.Chem.SanitizeFlags.SANITIZE_CLEANUP
)
return SanitisationResult(mol=rwmol, status=False)

rdkit.Chem.Kekulize(rwmol)
rdkit.Chem.Kekulize(mol_copy)
# rdkit.Chem.rdmolops.AssignAtomChiralTagsFromStructure(rwmol, confId=0)

# find correct conformer to assign stereochemistry
Expand All @@ -93,7 +102,7 @@ def sanitize(rwmol):
conformer_id = -1
conformer_types = [ConformerType.Ideal, ConformerType.Model]
for conf_type in conformer_types:
conformer = get_conformer(rwmol, conf_type)
conformer = get_conformer(mol_copy, conf_type)
if not is_degenerate_conformer(conformer):
conformer_id = conformer.GetId()

Expand All @@ -103,13 +112,16 @@ def sanitize(rwmol):
# else:
# conformer_id = conformers[0].GetId()

rdkit.Chem.rdmolops.AssignStereochemistryFrom3D(rwmol, conformer_id)
rdkit.Chem.rdmolops.AssignStereochemistryFrom3D(mol_copy, conformer_id)

except Exception as e:
print(e, file=sys.stderr)
return False
rdkit.Chem.SanitizeMol(
rwmol, sanitizeOps=rdkit.Chem.SanitizeFlags.SANITIZE_CLEANUP
)
return SanitisationResult(mol=rwmol, status=False)

return success
return SanitisationResult(mol=mol_copy, status=success)


def get_conformer(rwmol, c_type):
Expand Down

0 comments on commit 5754952

Please sign in to comment.