Skip to content

Commit

Permalink
Inchitomol (#86)
Browse files Browse the repository at this point in the history
* fix molsanitize exception error catching

* linting

* isort on other stuff

* add inchitomol element
  • Loading branch information
frederik-sandfort1 authored Sep 10, 2024
1 parent 8040ebe commit 69a4577
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 33 deletions.
64 changes: 62 additions & 2 deletions molpipeline/abstract_pipeline_elements/any2mol/string2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@

import abc

from molpipeline.abstract_pipeline_elements.core import AnyToMolPipelineElement
from molpipeline.utils.molpipeline_types import OptionalMol
from molpipeline.abstract_pipeline_elements.core import (
AnyToMolPipelineElement,
InvalidInstance,
)
from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol


class StringToMolPipelineElement(AnyToMolPipelineElement, abc.ABC):
Expand Down Expand Up @@ -43,3 +46,60 @@ def pretransform_single(self, value: str) -> OptionalMol:
OptionalMol
RDKit molecule if representation was valid, else InvalidInstance.
"""


class SimpleStringToMolElement(StringToMolPipelineElement, abc.ABC):
"""Transforms string representation to RDKit Mol objects."""

def pretransform_single(self, value: str) -> OptionalMol:
"""Transform string to molecule.
Parameters
----------
value: str
string representation.
Returns
-------
OptionalMol
Rdkit molecule if valid string representation, else None.
"""
if value is None:
return InvalidInstance(
self.uuid,
f"Invalid representation: {value}",
self.name,
)

if not isinstance(value, str):
return InvalidInstance(
self.uuid,
f"Not a string: {value}",
self.name,
)

mol: RDKitMol = self.string_to_mol(value)

if not mol:
return InvalidInstance(
self.uuid,
f"Invalid representation: {value}",
self.name,
)
mol.SetProp("identifier", value)
return mol

@abc.abstractmethod
def string_to_mol(self, value: str) -> RDKitMol:
"""Transform string representation to molecule.
Parameters
----------
value: str
string representation
Returns
-------
RDKitMol
Rdkit molecule if valid representation, else None.
"""
2 changes: 2 additions & 0 deletions molpipeline/any2mol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

from molpipeline.any2mol.auto2mol import AutoToMol
from molpipeline.any2mol.bin2mol import BinaryToMol
from molpipeline.any2mol.inchi2mol import InchiToMol
from molpipeline.any2mol.sdf2mol import SDFToMol
from molpipeline.any2mol.smiles2mol import SmilesToMol

__all__ = [
"AutoToMol",
"BinaryToMol",
"SmilesToMol",
"InchiToMol",
"SDFToMol",
]
4 changes: 3 additions & 1 deletion molpipeline/any2mol/auto2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
InvalidInstance,
)
from molpipeline.any2mol.bin2mol import BinaryToMol
from molpipeline.any2mol.inchi2mol import InchiToMol
from molpipeline.any2mol.sdf2mol import SDFToMol
from molpipeline.any2mol.smiles2mol import SmilesToMol
from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
Expand All @@ -29,6 +30,7 @@ def __init__(
uuid: Optional[str] = None,
elements: tuple[AnyToMolPipelineElement, ...] = (
SmilesToMol(),
InchiToMol(),
BinaryToMol(),
SDFToMol(),
),
Expand All @@ -44,7 +46,7 @@ def __init__(
uuid: str, optional (default=None)
Unique identifier of PipelineElement.
elements: tuple[AnyToMol, ...], optional (default=(SmilesToMol(),
BinaryToMol(), SDFToMol()))
InchiToMol(), BinaryToMol(), SDFToMol()))
Elements to try to transform the input to a molecule.
"""
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)
Expand Down
27 changes: 27 additions & 0 deletions molpipeline/any2mol/inchi2mol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Classes ment to transform given inchi to a RDKit molecule."""

from rdkit import Chem

from molpipeline.abstract_pipeline_elements.any2mol.string2mol import (
SimpleStringToMolElement,
)
from molpipeline.utils.molpipeline_types import RDKitMol


class InchiToMol(SimpleStringToMolElement):
"""Transforms Inchi to RDKit Mol objects."""

def string_to_mol(self, value: str) -> RDKitMol:
"""Transform Inchi string to molecule.
Parameters
----------
value: str
Inchi string.
Returns
-------
RDKitMol
Rdkit molecule if valid Inchi, else None.
"""
return Chem.MolFromInchi(value)
36 changes: 6 additions & 30 deletions molpipeline/any2mol/smiles2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
from rdkit import Chem

from molpipeline.abstract_pipeline_elements.any2mol.string2mol import (
StringToMolPipelineElement as _StringToMolPipelineElement,
SimpleStringToMolElement,
)
from molpipeline.abstract_pipeline_elements.core import InvalidInstance
from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol
from molpipeline.utils.molpipeline_types import RDKitMol


class SmilesToMol(_StringToMolPipelineElement):
class SmilesToMol(SimpleStringToMolElement):
"""Transforms Smiles to RDKit Mol objects."""

def pretransform_single(self, value: str) -> OptionalMol:
def string_to_mol(self, value: str) -> RDKitMol:
"""Transform Smiles string to molecule.
Parameters
Expand All @@ -24,30 +23,7 @@ def pretransform_single(self, value: str) -> OptionalMol:
Returns
-------
OptionalMol
RDKitMol
Rdkit molecule if valid SMILES, else None.
"""
if value is None:
return InvalidInstance(
self.uuid,
f"Invalid SMILES: {value}",
self.name,
)

if not isinstance(value, str):
return InvalidInstance(
self.uuid,
f"Not a string: {value}",
self.name,
)

mol: RDKitMol = Chem.MolFromSmiles(value)

if not mol:
return InvalidInstance(
self.uuid,
f"Invalid SMILES: {value}",
self.name,
)
mol.SetProp("identifier", value)
return mol
return Chem.MolFromSmiles(value)
28 changes: 28 additions & 0 deletions tests/test_elements/test_any2mol/test_auto2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
SMILES_CL_BR = "NC(Cl)(Br)C(=O)O"
SMILES_METAL_AU = "OC[C@H]1OC(S[Au])[C@H](O)[C@@H](O)[C@@H]1O"

INCHI_BENZENE = "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"
INCHI_CHLOROBENZENE = "InChI=1S/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H"

# SDF
with gzip.open(TEST_DATA_DIR / "P86_B_400.sdf.gz") as file:
SDF_P86_B_400 = file.read()
Expand Down Expand Up @@ -82,6 +85,31 @@ def test_auto2mol_for_smiles(self) -> None:
)
del log_block

def test_auto2mol_for_inchi(self) -> None:
"""Test molecules can be read from inchi automatically."""

test_inchis = [INCHI_BENZENE, INCHI_CHLOROBENZENE]
expected_mols = [MOL_BENZENE, MOL_CHLOROBENZENE]

pipeline = Pipeline(
[
(
"Auto2Mol",
AutoToMol(),
),
]
)
log_block = rdBase.BlockLogs()
actual_mols = pipeline.fit_transform(test_inchis)
self.assertEqual(len(test_inchis), len(actual_mols))
self.assertTrue(
all(
Chem.MolToInchi(smiles_mol) == Chem.MolToInchi(original_mol)
for smiles_mol, original_mol in zip(actual_mols, expected_mols)
)
)
del log_block

def test_auto2mol_for_sdf(self) -> None:
"""Test molecules can be read from sdf automatically."""

Expand Down

0 comments on commit 69a4577

Please sign in to comment.