diff --git a/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py b/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py index cc1f5c53..9bd6ac75 100644 --- a/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py +++ b/molpipeline/abstract_pipeline_elements/any2mol/string2mol.py @@ -4,8 +4,11 @@ import abc -from molpipeline.abstract_pipeline_elements.core import AnyToMolPipelineElement -from molpipeline.utils.molpipeline_types import OptionalMol +from molpipeline.abstract_pipeline_elements.core import ( + AnyToMolPipelineElement, + InvalidInstance, +) +from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol class StringToMolPipelineElement(AnyToMolPipelineElement, abc.ABC): @@ -43,3 +46,60 @@ def pretransform_single(self, value: str) -> OptionalMol: OptionalMol RDKit molecule if representation was valid, else InvalidInstance. """ + + +class SimpleStringToMolElement(StringToMolPipelineElement, abc.ABC): + """Transforms string representation to RDKit Mol objects.""" + + def pretransform_single(self, value: str) -> OptionalMol: + """Transform string to molecule. + + Parameters + ---------- + value: str + string representation. + + Returns + ------- + OptionalMol + Rdkit molecule if valid string representation, else None. + """ + if value is None: + return InvalidInstance( + self.uuid, + f"Invalid representation: {value}", + self.name, + ) + + if not isinstance(value, str): + return InvalidInstance( + self.uuid, + f"Not a string: {value}", + self.name, + ) + + mol: RDKitMol = self.string_to_mol(value) + + if not mol: + return InvalidInstance( + self.uuid, + f"Invalid representation: {value}", + self.name, + ) + mol.SetProp("identifier", value) + return mol + + @abc.abstractmethod + def string_to_mol(self, value: str) -> RDKitMol: + """Transform string representation to molecule. + + Parameters + ---------- + value: str + string representation + + Returns + ------- + RDKitMol + Rdkit molecule if valid representation, else None. + """ diff --git a/molpipeline/any2mol/__init__.py b/molpipeline/any2mol/__init__.py index 5b8b2da3..c4dabadd 100644 --- a/molpipeline/any2mol/__init__.py +++ b/molpipeline/any2mol/__init__.py @@ -2,6 +2,7 @@ from molpipeline.any2mol.auto2mol import AutoToMol from molpipeline.any2mol.bin2mol import BinaryToMol +from molpipeline.any2mol.inchi2mol import InchiToMol from molpipeline.any2mol.sdf2mol import SDFToMol from molpipeline.any2mol.smiles2mol import SmilesToMol @@ -9,5 +10,6 @@ "AutoToMol", "BinaryToMol", "SmilesToMol", + "InchiToMol", "SDFToMol", ] diff --git a/molpipeline/any2mol/auto2mol.py b/molpipeline/any2mol/auto2mol.py index b33ee2d8..925b7c95 100644 --- a/molpipeline/any2mol/auto2mol.py +++ b/molpipeline/any2mol/auto2mol.py @@ -9,6 +9,7 @@ InvalidInstance, ) from molpipeline.any2mol.bin2mol import BinaryToMol +from molpipeline.any2mol.inchi2mol import InchiToMol from molpipeline.any2mol.sdf2mol import SDFToMol from molpipeline.any2mol.smiles2mol import SmilesToMol from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol @@ -29,6 +30,7 @@ def __init__( uuid: Optional[str] = None, elements: tuple[AnyToMolPipelineElement, ...] = ( SmilesToMol(), + InchiToMol(), BinaryToMol(), SDFToMol(), ), @@ -44,7 +46,7 @@ def __init__( uuid: str, optional (default=None) Unique identifier of PipelineElement. elements: tuple[AnyToMol, ...], optional (default=(SmilesToMol(), - BinaryToMol(), SDFToMol())) + InchiToMol(), BinaryToMol(), SDFToMol())) Elements to try to transform the input to a molecule. """ super().__init__(name=name, n_jobs=n_jobs, uuid=uuid) diff --git a/molpipeline/any2mol/inchi2mol.py b/molpipeline/any2mol/inchi2mol.py new file mode 100644 index 00000000..4c881843 --- /dev/null +++ b/molpipeline/any2mol/inchi2mol.py @@ -0,0 +1,27 @@ +"""Classes ment to transform given inchi to a RDKit molecule.""" + +from rdkit import Chem + +from molpipeline.abstract_pipeline_elements.any2mol.string2mol import ( + SimpleStringToMolElement, +) +from molpipeline.utils.molpipeline_types import RDKitMol + + +class InchiToMol(SimpleStringToMolElement): + """Transforms Inchi to RDKit Mol objects.""" + + def string_to_mol(self, value: str) -> RDKitMol: + """Transform Inchi string to molecule. + + Parameters + ---------- + value: str + Inchi string. + + Returns + ------- + RDKitMol + Rdkit molecule if valid Inchi, else None. + """ + return Chem.MolFromInchi(value) diff --git a/molpipeline/any2mol/smiles2mol.py b/molpipeline/any2mol/smiles2mol.py index 79db23bd..0d7c45e6 100644 --- a/molpipeline/any2mol/smiles2mol.py +++ b/molpipeline/any2mol/smiles2mol.py @@ -5,16 +5,15 @@ from rdkit import Chem from molpipeline.abstract_pipeline_elements.any2mol.string2mol import ( - StringToMolPipelineElement as _StringToMolPipelineElement, + SimpleStringToMolElement, ) -from molpipeline.abstract_pipeline_elements.core import InvalidInstance -from molpipeline.utils.molpipeline_types import OptionalMol, RDKitMol +from molpipeline.utils.molpipeline_types import RDKitMol -class SmilesToMol(_StringToMolPipelineElement): +class SmilesToMol(SimpleStringToMolElement): """Transforms Smiles to RDKit Mol objects.""" - def pretransform_single(self, value: str) -> OptionalMol: + def string_to_mol(self, value: str) -> RDKitMol: """Transform Smiles string to molecule. Parameters @@ -24,30 +23,7 @@ def pretransform_single(self, value: str) -> OptionalMol: Returns ------- - OptionalMol + RDKitMol Rdkit molecule if valid SMILES, else None. """ - if value is None: - return InvalidInstance( - self.uuid, - f"Invalid SMILES: {value}", - self.name, - ) - - if not isinstance(value, str): - return InvalidInstance( - self.uuid, - f"Not a string: {value}", - self.name, - ) - - mol: RDKitMol = Chem.MolFromSmiles(value) - - if not mol: - return InvalidInstance( - self.uuid, - f"Invalid SMILES: {value}", - self.name, - ) - mol.SetProp("identifier", value) - return mol + return Chem.MolFromSmiles(value) diff --git a/tests/test_elements/test_any2mol/test_auto2mol.py b/tests/test_elements/test_any2mol/test_auto2mol.py index 06726a24..9cbad60b 100644 --- a/tests/test_elements/test_any2mol/test_auto2mol.py +++ b/tests/test_elements/test_any2mol/test_auto2mol.py @@ -17,6 +17,9 @@ SMILES_CL_BR = "NC(Cl)(Br)C(=O)O" SMILES_METAL_AU = "OC[C@H]1OC(S[Au])[C@H](O)[C@@H](O)[C@@H]1O" +INCHI_BENZENE = "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H" +INCHI_CHLOROBENZENE = "InChI=1S/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H" + # SDF with gzip.open(TEST_DATA_DIR / "P86_B_400.sdf.gz") as file: SDF_P86_B_400 = file.read() @@ -82,6 +85,31 @@ def test_auto2mol_for_smiles(self) -> None: ) del log_block + def test_auto2mol_for_inchi(self) -> None: + """Test molecules can be read from inchi automatically.""" + + test_inchis = [INCHI_BENZENE, INCHI_CHLOROBENZENE] + expected_mols = [MOL_BENZENE, MOL_CHLOROBENZENE] + + pipeline = Pipeline( + [ + ( + "Auto2Mol", + AutoToMol(), + ), + ] + ) + log_block = rdBase.BlockLogs() + actual_mols = pipeline.fit_transform(test_inchis) + self.assertEqual(len(test_inchis), len(actual_mols)) + self.assertTrue( + all( + Chem.MolToInchi(smiles_mol) == Chem.MolToInchi(original_mol) + for smiles_mol, original_mol in zip(actual_mols, expected_mols) + ) + ) + del log_block + def test_auto2mol_for_sdf(self) -> None: """Test molecules can be read from sdf automatically."""