diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 520e8ba8..2b2cdac0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: python-version: "3.10" - run: | - pip install rdkit + pip install rdkit==2023.3.3 pip install -e ".[tests]" pip install pre-commit pre-commit install && pre-commit run --all diff --git a/doc/_static/codeine.svg b/doc/_static/codeine.svg new file mode 100644 index 00000000..adcd7173 --- /dev/null +++ b/doc/_static/codeine.svg @@ -0,0 +1,388 @@ + + diff --git a/doc/_static/fragment_example.svg b/doc/_static/fragment_example.svg new file mode 100644 index 00000000..a712fe60 --- /dev/null +++ b/doc/_static/fragment_example.svg @@ -0,0 +1,75 @@ + diff --git a/doc/_static/scaffold_example.svg b/doc/_static/scaffold_example.svg new file mode 100644 index 00000000..cefcac62 --- /dev/null +++ b/doc/_static/scaffold_example.svg @@ -0,0 +1,172 @@ + diff --git a/doc/guide/fragments.md b/doc/guide/fragments.md index d15e92e2..3764c173 100644 --- a/doc/guide/fragments.md +++ b/doc/guide/fragments.md @@ -14,26 +14,44 @@ Alternativelly fragments can be supplied in an external library (*.tsv) provided | phenanthrene | SMARTS | [#6]1:[#6]:[#6]:[#6]2:[#6](:[#6]:1):[#6]:[#6]:[#6]1:[#6]:2:[#6]:[#6]:[#6]:[#6]:1 | | unchecked | | PDBe | -## Basic use case +## Identifying fragments of a chemical component ```python from pdbeccdutils.core import ccd_reader from pdbeccdutils.core.fragment_library import FragmentLibrary -component = ccd_reader.read_pdb_cif_file('HEM.cif').component +component = ccd_reader.read_pdb_cif_file('HEL.cif').component fragment_library = FragmentLibrary() -matches = component.library_search(library) +matches = component.library_search(fragment_library) print(f'Matches found in the fragment library {matches}.') -for fragment in component.fragments: - print(f'Fragment name {fragment.name} from source {fragment.source}') +fragment_mols = [Chem.MolFromSmiles(fragment.smiles) for fragment in component.fragments] +img = Draw.MolsToGridImage(fragment_mols, legends = [fragment.name for fragment in component.fragments]) +img ``` + + +## Identifying all chemical components with penicillin fragment + +```python +fragment_library = FragmentLibrary() +ccd_dict = ccd_reader.read_pdb_components_file('components.cif') +ccd_with_penicillin_fragment = [] +for ccd_id in ccd_dict.keys(): + component = ccd_dict[ccd_id].component + frag_matches = component.library_search(fragment_library) + for fragment in component.fragments: + if fragment.name == 'penicillin': + ccd_with_penicillin_fragment.append(ccd_id) + +ccd_with_penicillin_fragment + +['0RN', 'AIC', 'APV', 'CXN', 'HEL', 'IP1', 'MII', 'NFN', 'PN1', 'PNN', 'PNV', 'SOX', 'TAZ', 'WPP', 'X1E'] +``` ## PDBe supplied fragments Below you can find actual fragment structures comming with the pdbeccdutil's `FragmentsLibrary` from the PDBe resource: -
+ + +
+ +The figure shows the scaffolds identified by pdbeccdutils using the BRICS fragmentation rule for the chemical component CVV when bound to the human kappa opioid receptor (PDB entry 6b73). Interestingly, scaffold 3 is an exact match to the scaffold of Codeine (ChEMBL485), a known analgesic that targets various opioid receptors, and its biological activity is well-documented in ChEMBL. Although the PDB does not contain the structure of Codeine, the shared scaffold between Codeine and CCD component CVV suggests that Codeine may interact with the Human kappa opioid receptor in a similar manner to CVV diff --git a/pdbeccdutils/__init__.py b/pdbeccdutils/__init__.py index 732155f8..fa3ddd8c 100644 --- a/pdbeccdutils/__init__.py +++ b/pdbeccdutils/__init__.py @@ -1 +1 @@ -__version__ = "0.8.3" +__version__ = "0.8.4" diff --git a/pdbeccdutils/core/ccd_reader.py b/pdbeccdutils/core/ccd_reader.py index 09dff033..2c18e0b2 100644 --- a/pdbeccdutils/core/ccd_reader.py +++ b/pdbeccdutils/core/ccd_reader.py @@ -169,7 +169,8 @@ def _parse_pdb_mmcif(cif_block, sanitize=True): _handle_implicit_hydrogens(mol) if sanitize: - sanitized = mol_tools.sanitize(mol) + sanitized_result = mol_tools.sanitize(mol) + mol, sanitized = sanitized_result.mol, sanitized_result.status descriptors = _parse_pdb_descriptors( cif_block, "_pdbx_chem_comp_descriptor.", "descriptor" diff --git a/pdbeccdutils/core/clc_reader.py b/pdbeccdutils/core/clc_reader.py index 16d855a8..63e5585a 100644 --- a/pdbeccdutils/core/clc_reader.py +++ b/pdbeccdutils/core/clc_reader.py @@ -112,7 +112,8 @@ def infer_multiple_chem_comp(path_to_cif, bm, bm_id, sanitize=True): (mol, warnings, errors) = _parse_pdb_mmcif(cif_block, bm.graph) sanitized = False if sanitize: - sanitized = mol_tools.sanitize(mol) + sanitized_result = mol_tools.sanitize(mol) + mol, sanitized = sanitized_result.mol, sanitized_result.status inchi_result = mol_tools.inchi_from_mol(mol) if inchi_result.warnings: diff --git a/pdbeccdutils/core/models.py b/pdbeccdutils/core/models.py index a90978c5..e03ce209 100755 --- a/pdbeccdutils/core/models.py +++ b/pdbeccdutils/core/models.py @@ -149,6 +149,19 @@ class DepictionResult(NamedTuple): score: float +class SanitisationResult(NamedTuple): + """ + Sanitisation result details. + + Args: + mol: rdkit.Chem.rdchem.RWMol + status: Status of sanitisation process. + """ + + mol: Chem.rdchem.Mol + status: str + + class Descriptor(NamedTuple): """ Descriptor obtained from the cif file. This is essentially diff --git a/pdbeccdutils/core/prd_reader.py b/pdbeccdutils/core/prd_reader.py index 757a109e..d4088898 100644 --- a/pdbeccdutils/core/prd_reader.py +++ b/pdbeccdutils/core/prd_reader.py @@ -137,7 +137,8 @@ def _parse_pdb_mmcif(cif_block, sanitize=True): ccd_reader._handle_implicit_hydrogens(mol) if sanitize: - sanitized = mol_tools.sanitize(mol) + sanitized_result = mol_tools.sanitize(mol) + mol, sanitized = sanitized_result.mol, sanitized_result.status descriptors = ccd_reader._parse_pdb_descriptors( cif_block, "_pdbx_chem_comp_descriptor.", "descriptor" diff --git a/pdbeccdutils/helpers/mol_tools.py b/pdbeccdutils/helpers/mol_tools.py index f5a3e384..ba62b1d2 100644 --- a/pdbeccdutils/helpers/mol_tools.py +++ b/pdbeccdutils/helpers/mol_tools.py @@ -22,7 +22,12 @@ import re import sys from io import StringIO -from pdbeccdutils.core.models import InChIFromRDKit, MolFromRDKit, ConformerType +from pdbeccdutils.core.models import ( + InChIFromRDKit, + MolFromRDKit, + ConformerType, + SanitisationResult, +) from contextlib import redirect_stderr import numpy as np @@ -79,12 +84,16 @@ def sanitize(rwmol): success = False try: - success = fix_molecule(rwmol) + mol_copy = rdkit.Chem.RWMol(rwmol) + success = fix_molecule(mol_copy) if not success: - return False + rdkit.Chem.SanitizeMol( + rwmol, sanitizeOps=rdkit.Chem.SanitizeFlags.SANITIZE_CLEANUP + ) + return SanitisationResult(mol=rwmol, status=False) - rdkit.Chem.Kekulize(rwmol) + rdkit.Chem.Kekulize(mol_copy) # rdkit.Chem.rdmolops.AssignAtomChiralTagsFromStructure(rwmol, confId=0) # find correct conformer to assign stereochemistry @@ -93,7 +102,7 @@ def sanitize(rwmol): conformer_id = -1 conformer_types = [ConformerType.Ideal, ConformerType.Model] for conf_type in conformer_types: - conformer = get_conformer(rwmol, conf_type) + conformer = get_conformer(mol_copy, conf_type) if not is_degenerate_conformer(conformer): conformer_id = conformer.GetId() @@ -103,13 +112,16 @@ def sanitize(rwmol): # else: # conformer_id = conformers[0].GetId() - rdkit.Chem.rdmolops.AssignStereochemistryFrom3D(rwmol, conformer_id) + rdkit.Chem.rdmolops.AssignStereochemistryFrom3D(mol_copy, conformer_id) except Exception as e: print(e, file=sys.stderr) - return False + rdkit.Chem.SanitizeMol( + rwmol, sanitizeOps=rdkit.Chem.SanitizeFlags.SANITIZE_CLEANUP + ) + return SanitisationResult(mol=rwmol, status=False) - return success + return SanitisationResult(mol=mol_copy, status=success) def get_conformer(rwmol, c_type):