Source code for scm.plams.interfaces.molecule.rdkit

from importlib.util import find_spec
from typing import List, Literal, Optional, overload

if find_spec("rdkit"):
    __all__ = [
        "add_Hs",
        "apply_reaction_smarts",
        "apply_template",
        "gen_coords_rdmol",
        "get_backbone_atoms",
        "modify_atom",
        "to_rdmol",
        "from_rdmol",
        "from_sequence",
        "from_smiles",
        "from_smarts",
        "to_smiles",
        "partition_protein",
        "readpdb",
        "writepdb",
        "get_substructure",
        "get_conformations",
        "yield_coords",
        "canonicalize_mol",
    ]
else:
    __all__ = []

"""
@author: Lars Ridder
@description: A set of functions to manipulate molecules based on RDKit

This is a series of functions that apply RDKit functionality on PLAMS molecules
"""

import random
import sys
from warnings import warn

from scm.plams.core.errors import PlamsError
from scm.plams.core.functions import add_to_class, log
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule


[docs]def from_rdmol(rdkit_mol, confid=-1, properties=True): """ Translate an RDKit molecule into a PLAMS molecule type. RDKit properties will be unpickled if their name ends with '_pickled'. :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int confid: conformer identifier from which to take coordinates :parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format. :return: a PLAMS molecule :rtype: |Molecule| """ from rdkit import Chem if isinstance(rdkit_mol, Molecule): return rdkit_mol # Create PLAMS molecule plams_mol = Molecule() total_charge = 0 try: Chem.Kekulize(rdkit_mol) except Exception: pass conf = rdkit_mol.GetConformer(id=confid) # Add atoms and assign properties to the PLAMS atom if *properties* = True for rd_atom in rdkit_mol.GetAtoms(): pos = conf.GetAtomPosition(rd_atom.GetIdx()) ch = rd_atom.GetFormalCharge() pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch}) if properties and rd_atom.GetPDBResidueInfo(): pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom) plams_mol.add_atom(pl_atom) total_charge += ch # Check for R/S information stereo = str(rd_atom.GetChiralTag()) if stereo == "CHI_TETRAHEDRAL_CCW": pl_atom.properties.rdkit.stereo = "counter-clockwise" elif stereo == "CHI_TETRAHEDRAL_CW": pl_atom.properties.rdkit.stereo = "clockwise" # Add bonds to the PLAMS molecule for bond in rdkit_mol.GetBonds(): at1 = plams_mol.atoms[bond.GetBeginAtomIdx()] at2 = plams_mol.atoms[bond.GetEndAtomIdx()] plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble())) # Check for cis/trans information stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir()) if stereo == "STEREOZ" or stereo == "STEREOCIS": plams_mol.bonds[-1].properties.rdkit.stereo = "Z" elif stereo == "STEREOE" or stereo == "STEREOTRANS": plams_mol.bonds[-1].properties.rdkit.stereo = "E" elif bond_dir == "ENDUPRIGHT": plams_mol.bonds[-1].properties.rdkit.stereo = "up" elif bond_dir == "ENDDOWNRIGHT": plams_mol.bonds[-1].properties.rdkit.stereo = "down" # Set charge and assign properties to PLAMS molecule and bonds if *properties* = True plams_mol.properties.charge = total_charge if properties: prop_from_rdmol(plams_mol, rdkit_mol) for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol): prop_from_rdmol(plams_atom, rd_atom) for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds): prop_from_rdmol(plams_bond, rd_bond) return plams_mol
[docs]def to_rdmol(plams_mol, sanitize=True, properties=True, assignChirality=False): """ Translate a PLAMS molecule into an RDKit molecule type. PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats, integers, floats nor strings, the resulting property names are appended with '_pickled'. :parameter plams_mol: A PLAMS molecule :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format. :parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule. :type plams_mol: |Molecule| :return: an RDKit molecule :rtype: rdkit.Chem.Mol """ from rdkit import Chem, Geometry if isinstance(plams_mol, Chem.Mol): return plams_mol # Create rdkit molecule e = Chem.EditableMol(Chem.Mol()) # Add atoms and assign properties to the RDKit atom if *properties* = True for pl_atom in plams_mol.atoms: rd_atom = Chem.Atom(int(pl_atom.atnum)) if "rdkit" in pl_atom.properties: if "charge" in pl_atom.properties.rdkit: rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge) if properties: if "rdkit" in pl_atom.properties: if "pdb_info" in pl_atom.properties.rdkit: set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info) for prop in pl_atom.properties.rdkit: if prop not in ("charge", "pdb_info", "stereo"): prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop)) prop_dic = {} for prop in pl_atom.properties: if prop != "rdkit": prop_dic[prop] = pl_atom.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rd_atom, "plams", prop_dic) # Check for R/S information if pl_atom.properties.rdkit.stereo: stereo = pl_atom.properties.rdkit.stereo.lower() if stereo == "counter-clockwise": rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW) elif stereo == "clockwise": rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) e.AddAtom(rd_atom) # Mapping of PLAMS bond orders to RDKit bond types: def plams_to_rd_bonds(bo): if 1.4 < bo < 1.6: return 12 # bond type for aromatic bond else: return int(bo) # Add bonds to the RDKit molecule for bond in plams_mol.bonds: a1 = plams_mol.atoms.index(bond.atom1) a2 = plams_mol.atoms.index(bond.atom2) e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order))) rdmol = e.GetMol() # Check for cis/trans information for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): if pl_bond.properties.rdkit.stereo: stereo = pl_bond.properties.rdkit.stereo.lower() if stereo == "e" or stereo == "trans": rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) elif stereo == "z" or stereo == "cis": rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) elif stereo == "up": rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT) elif stereo == "down": rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT) # Assign properties to RDKit molecule and bonds if *properties* = True # All properties will be taken from 'rdkit' subsettings, except the molecular charge if properties: prop_dic = {} for prop in plams_mol.properties: if prop == "rdkit": for rdprop in plams_mol.properties.rdkit: prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop)) else: # prop_dic[prop] = {'plams':plams_mol.properties.get(prop)} prop_dic[prop] = plams_mol.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rdmol, "plams", prop_dic) prop_dic = {} for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): for prop in pl_bond.properties: if prop == "rdkit": for rdprop in pl_bond.properties.rdkit: if rdprop != "stereo": prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop)) else: prop_dic[prop] = pl_bond.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rd_bond, "plams", prop_dic) if sanitize: try: Chem.SanitizeMol(rdmol) except ValueError as exc: # rdkit_flag = Chem.SanitizeMol(rdmol,catchErrors=True) # log ('RDKit Sanitization Error. Failed Operation Flag = %s'%(rdkit_flag)) log("RDKit Sanitization Error.") text = "Most likely this is a problem with the assigned bond orders: " text += "Use chemical insight to adjust them." log(text) log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.") raise exc conf = Chem.Conformer() for i, atom in enumerate(plams_mol.atoms): xyz = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz()) conf.SetAtomPosition(i, xyz) rdmol.AddConformer(conf) # REB: Assign all stereochemistry, if it wasn't already there if assignChirality: Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False) try: Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False) except AttributeError: pass return rdmol
[docs]def to_smiles(plams_mol, short_smiles=True, **kwargs): """ Returns the RDKit-generated SMILES string of a PLAMS molecule. Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used. :parameter plams_mol: A PLAMS |Molecule| :parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H]) :parameter \**kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info. :return: the SMILES string """ from rdkit import Chem if len(plams_mol.bonds) > 0: mol_with_bonds = plams_mol else: mol_with_bonds = plams_mol.copy() mol_with_bonds.guess_bonds() rd_mol = to_rdmol(mol_with_bonds, sanitize=False) # This sanitization black magic is needed for getting the "short, nice and clean" SMILES string. # Without this, the SMILES string for water would be "[H]O[H]". With this is just "O" if short_smiles: s = Chem.rdmolops.SanitizeFlags rdkitSanitizeOptions = ( s.SANITIZE_ADJUSTHS or s.SANITIZE_CLEANUP or s.SANITIZE_CLEANUPCHIRALITY or s.SANITIZE_FINDRADICALS or s.SANITIZE_PROPERTIES or s.SANITIZE_SETAROMATICITY or s.SANITIZE_SETCONJUGATION or s.SANITIZE_SETHYBRIDIZATION or s.SANITIZE_SYMMRINGS ) Chem.rdmolops.AssignRadicals(rd_mol) rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False) Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions) smiles = Chem.MolToSmiles(rd_mol, **kwargs) return smiles
pdb_residue_info_items = [ "AltLoc", "ChainId", "InsertionCode", "IsHeteroAtom", "Name", "Occupancy", "ResidueName", "ResidueNumber", "SecondaryStructure", "SegmentNumber", "SerialNumber", "TempFactor", ] # 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized def get_PDBResidueInfo(rdkit_atom): pdb_info = {} for item in pdb_residue_info_items: get_function = "Get" + item pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)() return pdb_info def set_PDBresidueInfo(rdkit_atom, pdb_info): from rdkit import Chem atom_pdb_residue_info = Chem.AtomPDBResidueInfo() for item, value in pdb_info.items(): set_function = "Set" + item atom_pdb_residue_info.__getattribute__(set_function)(value) rdkit_atom.SetMonomerInfo(atom_pdb_residue_info) def prop_to_rdmol(rd_obj, propkey, propvalue): """ Convert a single PLAMS property into an RDKit property. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond :parameter str propkey: The |Settings| key of the PLAMS property. """ try: import dill as pickle except ImportError: import pickle obj = type(propvalue) obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp} if obj_dict.get(obj): obj_dict[obj](propkey, propvalue) else: name = propkey + "_pickled" try: rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode()) except (Exception, pickle.PicklingError): pass def prop_from_rdmol(pl_obj, rd_obj): """ Convert one or more RDKit properties into PLAMS properties. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond """ try: import dill as pickle except ImportError: import pickle prop_dict = rd_obj.GetPropsAsDict() for propname in prop_dict.keys(): if propname == "plams_pickled": plams_props = pickle.loads(prop_dict[propname].encode()) if not isinstance(plams_props, dict): raise Exception("PLAMS property not properly stored in RDKit") for key, value in plams_props.items(): pl_obj.properties[key] = value else: if propname == "__computedProps": continue if "_pickled" not in propname: pl_obj.properties.rdkit[propname] = prop_dict[propname] else: prop = prop_dict[propname] propname = propname.rsplit("_pickled", 1)[0] propvalue = pickle.loads(prop.encode()) pl_obj.properties.rdkit[propname] = propvalue @overload def from_smiles( smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> Molecule: ... @overload def from_smiles( smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> List[Molecule]: ...
[docs]def from_smiles( smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1 ): """ Generates PLAMS molecule(s) from a smiles strings. :parameter str smiles: A smiles string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem smiles = str(smiles.split()[0]) smiles = Chem.CanonSmiles(smiles) rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) rdkit_mol.SetProp("smiles", smiles) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
[docs]def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule(s) from a smarts strings. This allows for example to define hydrogens explicitly. However it is less suitable for aromatic molecules (use from_smiles in that case). :parameter str smarts: A smarts string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem smiles = str(smarts.split()[0]) mol = Chem.MolFromSmarts(smiles) Chem.SanitizeMol(mol) molecule = Chem.AddHs(mol) molecule.SetProp("smiles", smiles) return get_conformations(molecule, nconfs, name, forcefield, rms)
[docs]def get_conformations( mol, nconfs=1, name=None, forcefield=None, rms=-1, enforceChirality=False, useExpTorsionAnglePrefs="default", constraint_ats=None, EmbedParameters="EmbedParameters", randomSeed=1, best_rms=-1, ): """ Generates 3D conformation(s) for an rdkit_mol or a PLAMS Molecule :parameter mol: RDKit or PLAMS Molecule :type mol: rdkit.Chem.Mol or |Molecule| :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :parameter float best_rms: Root Mean Square deviation of best atomic permutation for removing similar/equivalent conformations. :parameter bool enforceChirality: Enforce the correct chirality if chiral centers are present :parameter str useExpTorsionAnglePrefs: Use experimental torsion angles preferences for the conformer generation by rdkit :parameter list constraint_ats: List of atom indices to be constrained :parameter str EmbedParameters: Name of RDKit EmbedParameters class ('EmbedParameters', 'ETKDG') :parameter int randomSeed: The seed for the random number generator. If set to None the generated conformers will be non-deterministic. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem from rdkit.Chem import AllChem if isinstance(mol, Molecule): if not mol.bonds: mol.guess_bonds() rdkit_mol = to_rdmol(mol, assignChirality=enforceChirality) else: rdkit_mol = mol def MMFFenergy(cid): ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) try: energy = ff.CalcEnergy() except: msg = ( "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + "\nNo geometry optimization was performed." ) warn(msg) energy = 1e9 return energy def UFFenergy(cid): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) try: energy = ff.CalcEnergy() except: msg = ( "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + "\nNo geometry optimization was performed." ) warn(msg) energy = 1e9 return energy def remove_some_Hs(m): res = Chem.RWMol(m) c_hs = [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts("[#1;$([#1]-[#6])]"))] c_hs.sort(reverse=True) for aid in c_hs: res.RemoveAtom(aid) return res.GetMol() if name: rdkit_mol.SetProp("name", name) if best_rms > 0: if rms > 0: raise PlamsError("Cannot set both rms and best_rms") rms = best_rms # if enforceChirality : # # This is how chirality is enforced in the GUI. The argument is not passed to AllChem.EmbedMultipleConfs # Chem.AssignAtomChiralTagsFromStructure(rdkit_mol) # param_obj = AllChem.ETKDG() param_obj = getattr(AllChem, EmbedParameters)() param_obj.pruneRmsThresh = rms param_obj.randomSeed = randomSeed if randomSeed is not None else random.getrandbits(31) param_obj.enforceChirality = enforceChirality if useExpTorsionAnglePrefs != "default": # The default (True of False) changes with rdkit versions param_obj.useExpTorsionAnglePrefs = True if constraint_ats is not None: coordMap = {} for i, iat in enumerate(constraint_ats): coordMap[iat] = rdkit_mol.GetConformer(0).GetAtomPosition(iat) param_obj.coordMap = coordMap try: cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) except Exception: # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems param_obj.useRandomCoords = True cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) if len(cids) == 0: # Sometimes rdkit does not crash (for large systems), but simply doe snot create conformers param_obj.useRandomCoords = True cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) if forcefield: # Select the forcefield (UFF or MMFF) optimize_molecule, energy = { "uff": [AllChem.UFFOptimizeMolecule, UFFenergy], "mmff": [AllChem.MMFFOptimizeMolecule, MMFFenergy], }[forcefield] # Optimize and sort conformations for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) # Remove duplicate conformations based on RMS if best_rms > 0 or forcefield: rdmol_local = rdkit_mol rms_function = AllChem.AlignMol if best_rms > 0: # Remove the H atoms, and prepare to use the more expensive RDKit function rdmol_local = remove_some_Hs(rdkit_mol) rms_function = AllChem.GetBestRMS keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: # r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) r = rms_function(rdmol_local, rdmol_local, cid, idx) except Exception: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
[docs]def from_sequence(sequence, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule from a peptide sequence. Includes explicit hydrogens and 3D coordinates. :parameter str sequence: A peptide sequence, e.g. 'HAG' :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A peptide molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence)) rdkit_mol.SetProp("sequence", sequence) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
def calc_rmsd(mol1, mol2): """ Superimpose two molecules and calculate the root-mean-squared deviations of the atomic positions. The molecules should be identical, but the ordering of the atoms may differ. :param mol1: Molecule 1 :param mol2: Molecule 2 :return: The rmsd after superposition :rtype: float """ from rdkit.Chem import AllChem rdkit_mol1 = to_rdmol(mol1) rdkit_mol2 = to_rdmol(mol2) try: return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2) except: return -999
[docs]def modify_atom(mol, idx, element): """ Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter int idx: index of the atom to be modified :parameter str element: :return: Molecule with new element and possibly added or removed hydrogens :rtype: |Molecule| """ from rdkit import Chem rdmol = to_rdmol(mol) if rdmol.GetAtomWithIdx(idx).GetSymbol() == element: return mol else: e = Chem.EditableMol(rdmol) for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()): if neighbor.GetSymbol() == "H": e.RemoveAtom(neighbor.GetIdx()) e.ReplaceAtom(idx - 1, Chem.Atom(element)) newmol = e.GetMol() Chem.SanitizeMol(newmol) newmol = Chem.AddHs(newmol, addCoords=True) return from_rdmol(newmol)
[docs]def apply_template(mol, template): """ Modifies bond orders in PLAMS molecule according template smiles structure. :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str template: smiles string defining the correct chemical structure :return: Molecule with correct chemical structure and provided 3D coordinates :rtype: |Molecule| """ from rdkit import Chem rdmol = to_rdmol(mol, sanitize=False) template_mol = Chem.AddHs(Chem.MolFromSmiles(template)) newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol) return from_rdmol(newmol)
[docs]def apply_reaction_smarts(mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False): """ Applies reaction smirks and returns product. If returned as a PLAMS molecule, thismolecule.properties.orig_atoms is a list of indices of atoms that have not been changed (which can for example be used partially optimize new atoms only with the freeze keyword) :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str reactions_smarts: Reactions smarts to be applied to molecule :parameter complete: Apply reaction until no further changes occur or given fraction of reaction centers have been modified :type complete: bool or float (value between 0 and 1) :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization of product structures. :type forcefield: str :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: (product molecule, list of unchanged atoms) :rtype: (|Molecule|, list of int) """ from rdkit import Chem from rdkit.Chem import AllChem def react(reactant, reaction): """Apply reaction to reactant and return products""" ps = reaction.RunReactants([reactant]) # if reaction doesn't apply, return the reactant if len(ps) == 0: return [(reactant, range(reactant.GetNumAtoms()))] full = len(ps) while complete: # when complete is True # apply reaction until no further changes r = random.randint(0, len(ps) - 1) reactant = ps[r][0] ps = reaction.RunReactants([reactant]) if len(ps) == 0 or len(ps) / full < (1 - complete): ps = [[reactant]] break # add hydrogens and generate coordinates for new atoms products = [] for p in ps[0]: Chem.SanitizeMol(p) q = Chem.AddHs(p) Chem.SanitizeMol(q) u = gen_coords_rdmol(q) # These are the atoms that have not changed products.append((q, u)) return products mol = to_rdmol(mol) reaction = AllChem.ReactionFromSmarts(reaction_smarts) # RDKit removes fragments that are disconnected from the reaction center # In order to keep these, the molecule is first split in separate fragments # and the results, including non-reacting parts, are re-combined afterwards frags = Chem.GetMolFrags(mol, asMols=True) product = Chem.Mol() unchanged = [] # List of atoms that have not changed for frag in frags: for p, u in react(frag, reaction): unchanged += [product.GetNumAtoms() + i for i in u] product = Chem.CombineMols(product, p) if forcefield: optimize_coordinates(product, forcefield, fixed=unchanged) # The molecule is returned together with a list of atom indices of the atoms # that are identical to those # in the reactants. This list can be used in subsequent partial optimization of the molecule if not return_rdmol: product = from_rdmol(product) product.properties.orig_atoms = [a + 1 for a in unchanged] return product
def gen_coords(plamsmol): """Calculate 3D positions only for atoms without coordinates""" rdmol = to_rdmol(plamsmol) unchanged = gen_coords_rdmol(rdmol) conf = rdmol.GetConformer() for a in range(len(plamsmol.atoms)): pos = conf.GetAtomPosition(a) atom = plamsmol.atoms[a] atom._setx(pos.x) atom._sety(pos.y) atom._setz(pos.z) return [a + 1 for a in unchanged] def gen_coords_rdmol(rdmol): from rdkit.Chem import AllChem ref = rdmol.__copy__() conf = rdmol.GetConformer() coordDict = {} unchanged = [] maps = [] # Put known coordinates in coordDict for i in range(rdmol.GetNumAtoms()): pos = conf.GetAtomPosition(i) if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001): continue # atom without coordinates coordDict[i] = pos unchanged.append(i) maps.append((i, i)) # compute coordinates for new atoms, keeping known coordinates rms = 1 rs = 1 # repeat embedding and alignment until the rms of mapped atoms is sufficiently small if rdmol.GetNumAtoms() > len(maps): while rms > 0.1: AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True) # align new molecule to original coordinates rms = AllChem.AlignMol(rdmol, ref, atomMap=maps) rs += 1 return unchanged def optimize_coordinates(rdkit_mol, forcefield, fixed=[]): from rdkit import Chem from rdkit.Chem import AllChem def MMFFminimize(): ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol)) for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) def UFFminimize(): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True) for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield] Chem.SanitizeMol(rdkit_mol) optimize_molecule() return def write_molblock(plams_mol, file=sys.stdout): from rdkit import Chem file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))
[docs]def readpdb(pdb_file, sanitize=True, removeHs=False, proximityBonding=False, return_rdmol=False): """ Generate a molecule from a PDB file :param pdb_file: The PDB file to read :type pdb_file: path- or file-like :param bool sanitize: :param bool removeHs: Hydrogens are removed if True :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: The molecule :rtype: |Molecule| or rdkit.Chem.Mol """ from rdkit import Chem try: pdb_file = open(pdb_file, "r") except TypeError: pass # pdb_file is a file-like object... hopefully pdb_mol = Chem.MolFromPDBBlock(pdb_file.read(), sanitize=sanitize, removeHs=removeHs) return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]def writepdb(mol, pdb_file=sys.stdout): """ Write a PDB file from a molecule :parameter mol: molecule to be exported to PDB :type mol: |Molecule| or rdkit.Chem.Mol :param pdb_file: The PDB file to write to, or a filename :type pdb_file: path- or file-like """ from rdkit import Chem try: pdb_file = open(pdb_file, "w") except TypeError: pass # pdb_file is a file-like object... hopefully mol = to_rdmol(mol, sanitize=False) pdb_file.write(Chem.MolToPDBBlock(mol))
[docs]def add_Hs(mol, forcefield=None, return_rdmol=False): """ Add hydrogens to protein molecules read from PDB. Makes sure that the hydrogens get the correct PDBResidue info. :param mol: Molecule to be protonated :type mol: |Molecule| or rdkit.Chem.Mol :param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization on new atoms. :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: A molecule with explicit hydrogens added :rtype: |Molecule| or rdkit.Chem.Mol """ from rdkit import Chem mol = to_rdmol(mol) retmol = Chem.AddHs(mol) for atom in retmol.GetAtoms(): if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H": bond = atom.GetBonds()[0] if bond.GetBeginAtom().GetIdx() == atom.GetIdx: connected_atom = bond.GetEndAtom() else: connected_atom = bond.GetBeginAtom() try: ResInfo = connected_atom.GetPDBResidueInfo() if ResInfo is None: continue # Segmentation faults are raised if ResInfo is None atom.SetMonomerInfo(ResInfo) atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:] atom.GetPDBResidueInfo().SetName(atomname) except: pass unchanged = gen_coords_rdmol(retmol) if forcefield: optimize_coordinates(retmol, forcefield, fixed=unchanged) return retmol if return_rdmol else from_rdmol(retmol)
def add_fragment(rwmol, frag, rwmol_atom_idx=None, frag_atom_idx=None, bond_order=None): from rdkit import Chem molconf = rwmol.GetConformer() fragconf = frag.GetConformer() new_indices = [] for a in frag.GetAtoms(): new_index = rwmol.AddAtom(a) new_indices.append(new_index) molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx())) for b in frag.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType()) if bond_order: rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order]) rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0) def get_fragment(mol, indices, incl_expl_Hs=True, neutralize=True): from rdkit import Chem molconf = mol.GetConformer() fragment = Chem.RWMol(Chem.Mol()) fragconf = Chem.Conformer() # Put atoms in fragment for i in indices: atom = mol.GetAtomWithIdx(i) new_index = fragment.AddAtom(atom) pos = molconf.GetAtomPosition(i) fragconf.SetAtomPosition(new_index, pos) # Put bonds in fragment for b in mol.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() if ba in indices and ea in indices: fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType()) continue if not incl_expl_Hs: continue if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H": hi = fragment.AddAtom(mol.GetAtomWithIdx(ea)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea)) fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE) continue if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H": hi = fragment.AddAtom(mol.GetAtomWithIdx(ba)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba)) fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE) ret_frag = fragment.GetMol() Chem.SanitizeMol(ret_frag) if neutralize: for atom in ret_frag.GetAtoms(): nrad = atom.GetNumRadicalElectrons() if nrad > 0: atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad) atom.SetNumRadicalElectrons(0) Chem.SanitizeMol(ret_frag) ret_frag.AddConformer(fragconf) return ret_frag
[docs]def partition_protein(mol, residue_bonds=None, split_heteroatoms=True, return_rdmol=False): """ Splits a protein molecule into capped amino acid fragments and caps. :param mol: A protein molecule :type mol: |Molecule| or rdkit.Chem.Mol :param tuple residue_bonds: a tuple of pairs of residue number indicating which peptide bonds to split. If none, split all peptide bonds. :param bool split_heteroatoms: if True, all bonds between a heteroatom and a non-heteroatom across residues are removed :return: list of fragments, list of caps """ from rdkit import Chem mol = to_rdmol(mol) caps = [] em = Chem.RWMol(mol) if split_heteroatoms: for bond in mol.GetBonds(): resinfa = bond.GetBeginAtom().GetPDBResidueInfo() resinfb = bond.GetEndAtom().GetPDBResidueInfo() if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom(): if resinfa.GetResidueNumber() != resinfb.GetResidueNumber(): em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) # Split peptide bonds pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)") for match in mol.GetSubstructMatches(pept_bond): if residue_bonds: resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber() resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber() if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds: continue cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O")) cap_o = get_fragment(cap, cap_o_ind, neutralize=False) cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:] cap_n = get_fragment(cap, cap_n_ind, neutralize=False) em.RemoveBond(match[1], match[3]) add_fragment(em, cap_o, match[3], 1, 1) add_fragment(em, cap_n, match[1], 0, 1) # Split disulfide bonds ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]") for match in mol.GetSubstructMatches(ss_bond): cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]")) cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False) cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False) em.RemoveBond(match[1], match[2]) add_fragment(em, cap_s1, match[2], 1, 1) add_fragment(em, cap_s2, match[1], 0, 1) frags = Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False) if not return_rdmol: frags = [from_rdmol(frag) for frag in frags] return frags, caps
def charge_AAs(mol, return_rdmol=False): from rdkit import Chem ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1} mol = to_rdmol(mol) for atom in mol.GetAtoms(): resinfo = atom.GetPDBResidueInfo() res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip() try: atom.SetFormalCharge(ionizations[res_atom]) Chem.SanitizeMol(mol) except KeyError: pass Chem.SanitizeMol(mol) return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol): """ Return a list of atom indices corresponding to the backbone atoms in a peptide molecule. This function assumes PDB information in properties.pdb_info of each atom, which is the case if the molecule is generated with the "readpdb" or "from_sequence" functions. :parameter mol: a peptide molecule :type mol: |Molecule| or rdkit.Chem.Mol :return: a list of atom indices :rtype: list """ mol = from_rdmol(mol) backbone = ["N", "CA", "C", "O"] return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]def get_substructure(mol, func_list): """ Search for functional groups within a molecule based on a list of reference functional groups. SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list". Example: .. code:: python >>> mol = from_smiles('OCCO') # Ethylene glycol >>> func_list = ['[H]O', 'C[N+]', 'O=PO'] >>> get_substructure(mol, func_list) {'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>, <scm.plams.mol.atom.Atom at 0x1251836a0>), (<scm.plams.mol.atom.Atom at 0x125183550>, <scm.plams.mol.atom.Atom at 0x125183240>)]} :parameter mol: A PLAMS molecule. :type mol: |Molecule| :parameter list func_list: A list of functional groups. Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules. :return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples with matching PLAMS |Atom| as values. """ from rdkit import Chem def _to_rdmol(functional_group): """Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule.""" if isinstance(functional_group, str): # RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS ret = Chem.MolFromSmiles(functional_group, sanitize=False) Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize) return ret elif isinstance(functional_group, Molecule): return to_rdmol(functional_group) elif isinstance(functional_group, Chem.Mol): return functional_group raise TypeError( "get_substructure: " + str(type(functional_group)) + " is not a supported \ object type" ) def _get_match(mol, rdmol, functional_group): """Perform a substructure match on "mol". If a match is found, return a list of n-tuples consisting PLAMS |Atom|. Otherwise return False.""" matches = rdmol.GetSubstructMatches(functional_group) if matches: return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches] return False rdmol = to_rdmol(mol) rdmol_func_list = [_to_rdmol(i) for i in func_list] gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list) return {key: value for key, value in zip(func_list, gen) if value}
[docs]def yield_coords(rdmol, id=-1): """Take an rdkit molecule and yield its coordinates as 3-tuples. .. code-block:: python >>> from scm.plams import yield_coords >>> from rdkit import Chem >>> rdmol = Chem.Mol(...) # e.g. Methane >>> for xyz in yield_coords(rdmol): ... print(xyz) (-0.0, -0.0, -0.0) (0.6405, 0.6405, -0.6405) (0.6405, -0.6405, 0.6405) (-0.6405, 0.6405, 0.6405) (-0.6405, -0.6405, -0.6405) The iterator produced by this function can, for example, be passed to :meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>` the update the coordinates of a PLAMS Molecule in-place. .. code-block:: python >>> from scm.plams import Molecule >>> mol = Molecule(...) >>> xyz_iterator = yield_coords(rdmol) >>> mol.from_array(xyz_iterator) :parameter rdmol: An RDKit mol. :type rdmol: rdkit.Chem.Mol :parameter int id: The ID of the desired conformer. :return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates. :rtype: iterator """ conf = rdmol.GetConformer(id=id) for atom in rdmol.GetAtoms(): pos = conf.GetAtomPosition(atom.GetIdx()) yield (pos.x, pos.y, pos.z)
@add_to_class(Molecule) def assign_chirality(self): """ Assigns stereo-info to PLAMS molecule by invoking RDKIT """ rd_mol = to_rdmol(self, assignChirality=True) pl_mol = from_rdmol(rd_mol) # Add R/S info to self for iat, pl_atom in enumerate(pl_mol.atoms): # Check for R/S information if pl_atom.properties.rdkit.stereo: self.atoms[iat].properties.rdkit.stereo = pl_atom.properties.rdkit.stereo # Add cis/trans information to self for ibond, pl_bond in enumerate(pl_mol.bonds): if pl_bond.properties.rdkit.stereo: self.bonds[ibond] = pl_bond.properties.rdkit.stereo @add_to_class(Molecule) def get_chirality(self): """ Returns the chirality of the atoms """ from rdkit import Chem rd_mol = to_rdmol(self, assignChirality=True) return Chem.FindMolChiralCenters(rd_mol, force=True, includeUnassigned=True)
[docs]def canonicalize_mol(mol, inplace=False, **kwargs): r"""Take a PLAMS molecule and sort its atoms based on their canonical rank. Example: .. code:: python >>> from scm.plams import Molecule, canonicalize_mol # Methane >>> mol: Molecule = ... >>> print(mol) Atoms: 1 H 0.640510 0.640510 -0.640510 2 H 0.640510 -0.640510 0.640510 3 C 0.000000 0.000000 0.000000 4 H -0.640510 0.640510 0.640510 5 H -0.640510 -0.640510 -0.640510 >>> print(canonicalize_mol(mol)) Atoms: 1 C 0.000000 0.000000 0.000000 2 H -0.640510 -0.640510 -0.640510 3 H -0.640510 0.640510 0.640510 4 H 0.640510 -0.640510 0.640510 5 H 0.640510 0.640510 -0.640510 :parameter mol: The to-be canonicalized molecule. :type mol: |Molecule| :parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule. :parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_. :return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``. :rtype: None or |Molecule| .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms """ from rdkit import Chem if not isinstance(mol, Molecule): raise TypeError("`mol` expected a plams Molecule") rdmol = to_rdmol(mol) idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs) if inplace: mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)] return None else: ret = mol.copy() ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)] return ret