from importlib.util import find_spec
from typing import List, Literal, Optional, overload
if find_spec("rdkit"):
__all__ = [
"add_Hs",
"apply_reaction_smarts",
"apply_template",
"gen_coords_rdmol",
"get_backbone_atoms",
"modify_atom",
"to_rdmol",
"from_rdmol",
"from_sequence",
"from_smiles",
"from_smarts",
"to_smiles",
"partition_protein",
"readpdb",
"writepdb",
"get_substructure",
"get_conformations",
"yield_coords",
"canonicalize_mol",
]
else:
__all__ = []
"""
@author: Lars Ridder
@description: A set of functions to manipulate molecules based on RDKit
This is a series of functions that apply RDKit functionality on PLAMS molecules
"""
import random
import sys
from warnings import warn
from scm.plams.core.errors import PlamsError
from scm.plams.core.functions import add_to_class, log
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule
[docs]def from_rdmol(rdkit_mol, confid=-1, properties=True):
"""
Translate an RDKit molecule into a PLAMS molecule type.
RDKit properties will be unpickled if their name ends with '_pickled'.
:parameter rdkit_mol: RDKit molecule
:type rdkit_mol: rdkit.Chem.Mol
:parameter int confid: conformer identifier from which to take coordinates
:parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format.
:return: a PLAMS molecule
:rtype: |Molecule|
"""
from rdkit import Chem
if isinstance(rdkit_mol, Molecule):
return rdkit_mol
# Create PLAMS molecule
plams_mol = Molecule()
total_charge = 0
try:
Chem.Kekulize(rdkit_mol)
except Exception:
pass
conf = rdkit_mol.GetConformer(id=confid)
# Add atoms and assign properties to the PLAMS atom if *properties* = True
for rd_atom in rdkit_mol.GetAtoms():
pos = conf.GetAtomPosition(rd_atom.GetIdx())
ch = rd_atom.GetFormalCharge()
pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch})
if properties and rd_atom.GetPDBResidueInfo():
pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom)
plams_mol.add_atom(pl_atom)
total_charge += ch
# Check for R/S information
stereo = str(rd_atom.GetChiralTag())
if stereo == "CHI_TETRAHEDRAL_CCW":
pl_atom.properties.rdkit.stereo = "counter-clockwise"
elif stereo == "CHI_TETRAHEDRAL_CW":
pl_atom.properties.rdkit.stereo = "clockwise"
# Add bonds to the PLAMS molecule
for bond in rdkit_mol.GetBonds():
at1 = plams_mol.atoms[bond.GetBeginAtomIdx()]
at2 = plams_mol.atoms[bond.GetEndAtomIdx()]
plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble()))
# Check for cis/trans information
stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir())
if stereo == "STEREOZ" or stereo == "STEREOCIS":
plams_mol.bonds[-1].properties.rdkit.stereo = "Z"
elif stereo == "STEREOE" or stereo == "STEREOTRANS":
plams_mol.bonds[-1].properties.rdkit.stereo = "E"
elif bond_dir == "ENDUPRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "up"
elif bond_dir == "ENDDOWNRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "down"
# Set charge and assign properties to PLAMS molecule and bonds if *properties* = True
plams_mol.properties.charge = total_charge
if properties:
prop_from_rdmol(plams_mol, rdkit_mol)
for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol):
prop_from_rdmol(plams_atom, rd_atom)
for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds):
prop_from_rdmol(plams_bond, rd_bond)
return plams_mol
[docs]def to_rdmol(plams_mol, sanitize=True, properties=True, assignChirality=False):
"""
Translate a PLAMS molecule into an RDKit molecule type.
PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats,
integers, floats nor strings, the resulting property names are appended with '_pickled'.
:parameter plams_mol: A PLAMS molecule
:parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization
:parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format.
:parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule.
:type plams_mol: |Molecule|
:return: an RDKit molecule
:rtype: rdkit.Chem.Mol
"""
from rdkit import Chem, Geometry
if isinstance(plams_mol, Chem.Mol):
return plams_mol
# Create rdkit molecule
e = Chem.EditableMol(Chem.Mol())
# Add atoms and assign properties to the RDKit atom if *properties* = True
for pl_atom in plams_mol.atoms:
rd_atom = Chem.Atom(int(pl_atom.atnum))
if "rdkit" in pl_atom.properties:
if "charge" in pl_atom.properties.rdkit:
rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge)
if properties:
if "rdkit" in pl_atom.properties:
if "pdb_info" in pl_atom.properties.rdkit:
set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info)
for prop in pl_atom.properties.rdkit:
if prop not in ("charge", "pdb_info", "stereo"):
prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop))
prop_dic = {}
for prop in pl_atom.properties:
if prop != "rdkit":
prop_dic[prop] = pl_atom.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_atom, "plams", prop_dic)
# Check for R/S information
if pl_atom.properties.rdkit.stereo:
stereo = pl_atom.properties.rdkit.stereo.lower()
if stereo == "counter-clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
elif stereo == "clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
e.AddAtom(rd_atom)
# Mapping of PLAMS bond orders to RDKit bond types:
def plams_to_rd_bonds(bo):
if 1.4 < bo < 1.6:
return 12 # bond type for aromatic bond
else:
return int(bo)
# Add bonds to the RDKit molecule
for bond in plams_mol.bonds:
a1 = plams_mol.atoms.index(bond.atom1)
a2 = plams_mol.atoms.index(bond.atom2)
e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order)))
rdmol = e.GetMol()
# Check for cis/trans information
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
if pl_bond.properties.rdkit.stereo:
stereo = pl_bond.properties.rdkit.stereo.lower()
if stereo == "e" or stereo == "trans":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
elif stereo == "z" or stereo == "cis":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
elif stereo == "up":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT)
elif stereo == "down":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT)
# Assign properties to RDKit molecule and bonds if *properties* = True
# All properties will be taken from 'rdkit' subsettings, except the molecular charge
if properties:
prop_dic = {}
for prop in plams_mol.properties:
if prop == "rdkit":
for rdprop in plams_mol.properties.rdkit:
prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop))
else:
# prop_dic[prop] = {'plams':plams_mol.properties.get(prop)}
prop_dic[prop] = plams_mol.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rdmol, "plams", prop_dic)
prop_dic = {}
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
for prop in pl_bond.properties:
if prop == "rdkit":
for rdprop in pl_bond.properties.rdkit:
if rdprop != "stereo":
prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop))
else:
prop_dic[prop] = pl_bond.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_bond, "plams", prop_dic)
if sanitize:
try:
Chem.SanitizeMol(rdmol)
except ValueError as exc:
# rdkit_flag = Chem.SanitizeMol(rdmol,catchErrors=True)
# log ('RDKit Sanitization Error. Failed Operation Flag = %s'%(rdkit_flag))
log("RDKit Sanitization Error.")
text = "Most likely this is a problem with the assigned bond orders: "
text += "Use chemical insight to adjust them."
log(text)
log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.")
raise exc
conf = Chem.Conformer()
for i, atom in enumerate(plams_mol.atoms):
xyz = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz())
conf.SetAtomPosition(i, xyz)
rdmol.AddConformer(conf)
# REB: Assign all stereochemistry, if it wasn't already there
if assignChirality:
Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False)
try:
Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False)
except AttributeError:
pass
return rdmol
[docs]def to_smiles(plams_mol, short_smiles=True, **kwargs):
"""
Returns the RDKit-generated SMILES string of a PLAMS molecule.
Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used.
:parameter plams_mol: A PLAMS |Molecule|
:parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H])
:parameter \**kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info.
:return: the SMILES string
"""
from rdkit import Chem
if len(plams_mol.bonds) > 0:
mol_with_bonds = plams_mol
else:
mol_with_bonds = plams_mol.copy()
mol_with_bonds.guess_bonds()
rd_mol = to_rdmol(mol_with_bonds, sanitize=False)
# This sanitization black magic is needed for getting the "short, nice and clean" SMILES string.
# Without this, the SMILES string for water would be "[H]O[H]". With this is just "O"
if short_smiles:
s = Chem.rdmolops.SanitizeFlags
rdkitSanitizeOptions = (
s.SANITIZE_ADJUSTHS
or s.SANITIZE_CLEANUP
or s.SANITIZE_CLEANUPCHIRALITY
or s.SANITIZE_FINDRADICALS
or s.SANITIZE_PROPERTIES
or s.SANITIZE_SETAROMATICITY
or s.SANITIZE_SETCONJUGATION
or s.SANITIZE_SETHYBRIDIZATION
or s.SANITIZE_SYMMRINGS
)
Chem.rdmolops.AssignRadicals(rd_mol)
rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False)
Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions)
smiles = Chem.MolToSmiles(rd_mol, **kwargs)
return smiles
pdb_residue_info_items = [
"AltLoc",
"ChainId",
"InsertionCode",
"IsHeteroAtom",
"Name",
"Occupancy",
"ResidueName",
"ResidueNumber",
"SecondaryStructure",
"SegmentNumber",
"SerialNumber",
"TempFactor",
]
# 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized
def get_PDBResidueInfo(rdkit_atom):
pdb_info = {}
for item in pdb_residue_info_items:
get_function = "Get" + item
pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)()
return pdb_info
def set_PDBresidueInfo(rdkit_atom, pdb_info):
from rdkit import Chem
atom_pdb_residue_info = Chem.AtomPDBResidueInfo()
for item, value in pdb_info.items():
set_function = "Set" + item
atom_pdb_residue_info.__getattribute__(set_function)(value)
rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)
def prop_to_rdmol(rd_obj, propkey, propvalue):
"""
Convert a single PLAMS property into an RDKit property.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
:parameter str propkey: The |Settings| key of the PLAMS property.
"""
try:
import dill as pickle
except ImportError:
import pickle
obj = type(propvalue)
obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp}
if obj_dict.get(obj):
obj_dict[obj](propkey, propvalue)
else:
name = propkey + "_pickled"
try:
rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode())
except (Exception, pickle.PicklingError):
pass
def prop_from_rdmol(pl_obj, rd_obj):
"""
Convert one or more RDKit properties into PLAMS properties.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
"""
try:
import dill as pickle
except ImportError:
import pickle
prop_dict = rd_obj.GetPropsAsDict()
for propname in prop_dict.keys():
if propname == "plams_pickled":
plams_props = pickle.loads(prop_dict[propname].encode())
if not isinstance(plams_props, dict):
raise Exception("PLAMS property not properly stored in RDKit")
for key, value in plams_props.items():
pl_obj.properties[key] = value
else:
if propname == "__computedProps":
continue
if "_pickled" not in propname:
pl_obj.properties.rdkit[propname] = prop_dict[propname]
else:
prop = prop_dict[propname]
propname = propname.rsplit("_pickled", 1)[0]
propvalue = pickle.loads(prop.encode())
pl_obj.properties.rdkit[propname] = propvalue
@overload
def from_smiles(
smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Molecule: ...
@overload
def from_smiles(
smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> List[Molecule]: ...
[docs]def from_smiles(
smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
):
"""
Generates PLAMS molecule(s) from a smiles strings.
:parameter str smiles: A smiles string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization
and ranking of comformations. The default value None results in skipping of the
geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for
removing similar/equivalent conformations
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smiles.split()[0])
smiles = Chem.CanonSmiles(smiles)
rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
rdkit_mol.SetProp("smiles", smiles)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
[docs]def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1):
"""
Generates PLAMS molecule(s) from a smarts strings.
This allows for example to define hydrogens explicitly.
However it is less suitable for aromatic molecules (use from_smiles in that case).
:parameter str smarts: A smarts string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smarts.split()[0])
mol = Chem.MolFromSmarts(smiles)
Chem.SanitizeMol(mol)
molecule = Chem.AddHs(mol)
molecule.SetProp("smiles", smiles)
return get_conformations(molecule, nconfs, name, forcefield, rms)
[docs]def from_sequence(sequence, nconfs=1, name=None, forcefield=None, rms=0.1):
"""
Generates PLAMS molecule from a peptide sequence.
Includes explicit hydrogens and 3D coordinates.
:parameter str sequence: A peptide sequence, e.g. 'HAG'
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A peptide molecule with hydrogens and 3D coordinates
or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence))
rdkit_mol.SetProp("sequence", sequence)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
def calc_rmsd(mol1, mol2):
"""
Superimpose two molecules and calculate the root-mean-squared deviations of
the atomic positions.
The molecules should be identical, but the ordering of the atoms may differ.
:param mol1: Molecule 1
:param mol2: Molecule 2
:return: The rmsd after superposition
:rtype: float
"""
from rdkit.Chem import AllChem
rdkit_mol1 = to_rdmol(mol1)
rdkit_mol2 = to_rdmol(mol2)
try:
return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2)
except:
return -999
[docs]def modify_atom(mol, idx, element):
"""
Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter int idx: index of the atom to be modified
:parameter str element:
:return: Molecule with new element and possibly added or removed hydrogens
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol)
if rdmol.GetAtomWithIdx(idx).GetSymbol() == element:
return mol
else:
e = Chem.EditableMol(rdmol)
for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()):
if neighbor.GetSymbol() == "H":
e.RemoveAtom(neighbor.GetIdx())
e.ReplaceAtom(idx - 1, Chem.Atom(element))
newmol = e.GetMol()
Chem.SanitizeMol(newmol)
newmol = Chem.AddHs(newmol, addCoords=True)
return from_rdmol(newmol)
[docs]def apply_template(mol, template):
"""
Modifies bond orders in PLAMS molecule according template smiles structure.
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str template: smiles string defining the correct chemical structure
:return: Molecule with correct chemical structure and provided 3D coordinates
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol, sanitize=False)
template_mol = Chem.AddHs(Chem.MolFromSmiles(template))
newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol)
return from_rdmol(newmol)
[docs]def apply_reaction_smarts(mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False):
"""
Applies reaction smirks and returns product.
If returned as a PLAMS molecule, thismolecule.properties.orig_atoms
is a list of indices of atoms that have not been changed
(which can for example be used partially optimize new atoms only with the freeze keyword)
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str reactions_smarts: Reactions smarts to be applied to molecule
:parameter complete: Apply reaction until no further changes occur or given
fraction of reaction centers have been modified
:type complete: bool or float (value between 0 and 1)
:parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization of product structures.
:type forcefield: str
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: (product molecule, list of unchanged atoms)
:rtype: (|Molecule|, list of int)
"""
from rdkit import Chem
from rdkit.Chem import AllChem
def react(reactant, reaction):
"""Apply reaction to reactant and return products"""
ps = reaction.RunReactants([reactant])
# if reaction doesn't apply, return the reactant
if len(ps) == 0:
return [(reactant, range(reactant.GetNumAtoms()))]
full = len(ps)
while complete: # when complete is True
# apply reaction until no further changes
r = random.randint(0, len(ps) - 1)
reactant = ps[r][0]
ps = reaction.RunReactants([reactant])
if len(ps) == 0 or len(ps) / full < (1 - complete):
ps = [[reactant]]
break
# add hydrogens and generate coordinates for new atoms
products = []
for p in ps[0]:
Chem.SanitizeMol(p)
q = Chem.AddHs(p)
Chem.SanitizeMol(q)
u = gen_coords_rdmol(q) # These are the atoms that have not changed
products.append((q, u))
return products
mol = to_rdmol(mol)
reaction = AllChem.ReactionFromSmarts(reaction_smarts)
# RDKit removes fragments that are disconnected from the reaction center
# In order to keep these, the molecule is first split in separate fragments
# and the results, including non-reacting parts, are re-combined afterwards
frags = Chem.GetMolFrags(mol, asMols=True)
product = Chem.Mol()
unchanged = [] # List of atoms that have not changed
for frag in frags:
for p, u in react(frag, reaction):
unchanged += [product.GetNumAtoms() + i for i in u]
product = Chem.CombineMols(product, p)
if forcefield:
optimize_coordinates(product, forcefield, fixed=unchanged)
# The molecule is returned together with a list of atom indices of the atoms
# that are identical to those
# in the reactants. This list can be used in subsequent partial optimization of the molecule
if not return_rdmol:
product = from_rdmol(product)
product.properties.orig_atoms = [a + 1 for a in unchanged]
return product
def gen_coords(plamsmol):
"""Calculate 3D positions only for atoms without coordinates"""
rdmol = to_rdmol(plamsmol)
unchanged = gen_coords_rdmol(rdmol)
conf = rdmol.GetConformer()
for a in range(len(plamsmol.atoms)):
pos = conf.GetAtomPosition(a)
atom = plamsmol.atoms[a]
atom._setx(pos.x)
atom._sety(pos.y)
atom._setz(pos.z)
return [a + 1 for a in unchanged]
def gen_coords_rdmol(rdmol):
from rdkit.Chem import AllChem
ref = rdmol.__copy__()
conf = rdmol.GetConformer()
coordDict = {}
unchanged = []
maps = []
# Put known coordinates in coordDict
for i in range(rdmol.GetNumAtoms()):
pos = conf.GetAtomPosition(i)
if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001):
continue # atom without coordinates
coordDict[i] = pos
unchanged.append(i)
maps.append((i, i))
# compute coordinates for new atoms, keeping known coordinates
rms = 1
rs = 1
# repeat embedding and alignment until the rms of mapped atoms is sufficiently small
if rdmol.GetNumAtoms() > len(maps):
while rms > 0.1:
AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True)
# align new molecule to original coordinates
rms = AllChem.AlignMol(rdmol, ref, atomMap=maps)
rs += 1
return unchanged
def optimize_coordinates(rdkit_mol, forcefield, fixed=[]):
from rdkit import Chem
from rdkit.Chem import AllChem
def MMFFminimize():
ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol))
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
def UFFminimize():
ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True)
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield]
Chem.SanitizeMol(rdkit_mol)
optimize_molecule()
return
def write_molblock(plams_mol, file=sys.stdout):
from rdkit import Chem
file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))
[docs]def readpdb(pdb_file, sanitize=True, removeHs=False, proximityBonding=False, return_rdmol=False):
"""
Generate a molecule from a PDB file
:param pdb_file: The PDB file to read
:type pdb_file: path- or file-like
:param bool sanitize:
:param bool removeHs: Hydrogens are removed if True
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: The molecule
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
try:
pdb_file = open(pdb_file, "r")
except TypeError:
pass # pdb_file is a file-like object... hopefully
pdb_mol = Chem.MolFromPDBBlock(pdb_file.read(), sanitize=sanitize, removeHs=removeHs)
return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]def writepdb(mol, pdb_file=sys.stdout):
"""
Write a PDB file from a molecule
:parameter mol: molecule to be exported to PDB
:type mol: |Molecule| or rdkit.Chem.Mol
:param pdb_file: The PDB file to write to, or a filename
:type pdb_file: path- or file-like
"""
from rdkit import Chem
try:
pdb_file = open(pdb_file, "w")
except TypeError:
pass # pdb_file is a file-like object... hopefully
mol = to_rdmol(mol, sanitize=False)
pdb_file.write(Chem.MolToPDBBlock(mol))
[docs]def add_Hs(mol, forcefield=None, return_rdmol=False):
"""
Add hydrogens to protein molecules read from PDB.
Makes sure that the hydrogens get the correct PDBResidue info.
:param mol: Molecule to be protonated
:type mol: |Molecule| or rdkit.Chem.Mol
:param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization on new atoms.
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: A molecule with explicit hydrogens added
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
mol = to_rdmol(mol)
retmol = Chem.AddHs(mol)
for atom in retmol.GetAtoms():
if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
bond = atom.GetBonds()[0]
if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
connected_atom = bond.GetEndAtom()
else:
connected_atom = bond.GetBeginAtom()
try:
ResInfo = connected_atom.GetPDBResidueInfo()
if ResInfo is None:
continue # Segmentation faults are raised if ResInfo is None
atom.SetMonomerInfo(ResInfo)
atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:]
atom.GetPDBResidueInfo().SetName(atomname)
except:
pass
unchanged = gen_coords_rdmol(retmol)
if forcefield:
optimize_coordinates(retmol, forcefield, fixed=unchanged)
return retmol if return_rdmol else from_rdmol(retmol)
def add_fragment(rwmol, frag, rwmol_atom_idx=None, frag_atom_idx=None, bond_order=None):
from rdkit import Chem
molconf = rwmol.GetConformer()
fragconf = frag.GetConformer()
new_indices = []
for a in frag.GetAtoms():
new_index = rwmol.AddAtom(a)
new_indices.append(new_index)
molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx()))
for b in frag.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType())
if bond_order:
rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order])
rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0)
def get_fragment(mol, indices, incl_expl_Hs=True, neutralize=True):
from rdkit import Chem
molconf = mol.GetConformer()
fragment = Chem.RWMol(Chem.Mol())
fragconf = Chem.Conformer()
# Put atoms in fragment
for i in indices:
atom = mol.GetAtomWithIdx(i)
new_index = fragment.AddAtom(atom)
pos = molconf.GetAtomPosition(i)
fragconf.SetAtomPosition(new_index, pos)
# Put bonds in fragment
for b in mol.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
if ba in indices and ea in indices:
fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType())
continue
if not incl_expl_Hs:
continue
if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ea))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea))
fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE)
continue
if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ba))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba))
fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE)
ret_frag = fragment.GetMol()
Chem.SanitizeMol(ret_frag)
if neutralize:
for atom in ret_frag.GetAtoms():
nrad = atom.GetNumRadicalElectrons()
if nrad > 0:
atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad)
atom.SetNumRadicalElectrons(0)
Chem.SanitizeMol(ret_frag)
ret_frag.AddConformer(fragconf)
return ret_frag
[docs]def partition_protein(mol, residue_bonds=None, split_heteroatoms=True, return_rdmol=False):
"""
Splits a protein molecule into capped amino acid fragments and caps.
:param mol: A protein molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:param tuple residue_bonds: a tuple of pairs of residue number indicating which
peptide bonds to split. If none, split all peptide bonds.
:param bool split_heteroatoms: if True, all bonds between a heteroatom and
a non-heteroatom across residues are removed
:return: list of fragments, list of caps
"""
from rdkit import Chem
mol = to_rdmol(mol)
caps = []
em = Chem.RWMol(mol)
if split_heteroatoms:
for bond in mol.GetBonds():
resinfa = bond.GetBeginAtom().GetPDBResidueInfo()
resinfb = bond.GetEndAtom().GetPDBResidueInfo()
if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom():
if resinfa.GetResidueNumber() != resinfb.GetResidueNumber():
em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
# Split peptide bonds
pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)")
for match in mol.GetSubstructMatches(pept_bond):
if residue_bonds:
resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber()
resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber()
if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds:
continue
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O"))
cap_o = get_fragment(cap, cap_o_ind, neutralize=False)
cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:]
cap_n = get_fragment(cap, cap_n_ind, neutralize=False)
em.RemoveBond(match[1], match[3])
add_fragment(em, cap_o, match[3], 1, 1)
add_fragment(em, cap_n, match[1], 0, 1)
# Split disulfide bonds
ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]")
for match in mol.GetSubstructMatches(ss_bond):
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]"))
cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False)
cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False)
em.RemoveBond(match[1], match[2])
add_fragment(em, cap_s1, match[2], 1, 1)
add_fragment(em, cap_s2, match[1], 0, 1)
frags = Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False)
if not return_rdmol:
frags = [from_rdmol(frag) for frag in frags]
return frags, caps
def charge_AAs(mol, return_rdmol=False):
from rdkit import Chem
ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1}
mol = to_rdmol(mol)
for atom in mol.GetAtoms():
resinfo = atom.GetPDBResidueInfo()
res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip()
try:
atom.SetFormalCharge(ionizations[res_atom])
Chem.SanitizeMol(mol)
except KeyError:
pass
Chem.SanitizeMol(mol)
return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol):
"""
Return a list of atom indices corresponding to the backbone atoms in a peptide molecule.
This function assumes PDB information in properties.pdb_info of each atom, which is the case
if the molecule is generated with the "readpdb" or "from_sequence" functions.
:parameter mol: a peptide molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:return: a list of atom indices
:rtype: list
"""
mol = from_rdmol(mol)
backbone = ["N", "CA", "C", "O"]
return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]def get_substructure(mol, func_list):
"""
Search for functional groups within a molecule based on a list of reference functional groups.
SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list".
Example:
.. code:: python
>>> mol = from_smiles('OCCO') # Ethylene glycol
>>> func_list = ['[H]O', 'C[N+]', 'O=PO']
>>> get_substructure(mol, func_list)
{'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>,
<scm.plams.mol.atom.Atom at 0x1251836a0>),
(<scm.plams.mol.atom.Atom at 0x125183550>,
<scm.plams.mol.atom.Atom at 0x125183240>)]}
:parameter mol: A PLAMS molecule.
:type mol: |Molecule|
:parameter list func_list: A list of functional groups.
Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules.
:return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples
with matching PLAMS |Atom| as values.
"""
from rdkit import Chem
def _to_rdmol(functional_group):
"""Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule."""
if isinstance(functional_group, str):
# RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled
sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS
ret = Chem.MolFromSmiles(functional_group, sanitize=False)
Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize)
return ret
elif isinstance(functional_group, Molecule):
return to_rdmol(functional_group)
elif isinstance(functional_group, Chem.Mol):
return functional_group
raise TypeError(
"get_substructure: "
+ str(type(functional_group))
+ " is not a supported \
object type"
)
def _get_match(mol, rdmol, functional_group):
"""Perform a substructure match on "mol".
If a match is found, return a list of n-tuples consisting PLAMS |Atom|.
Otherwise return False."""
matches = rdmol.GetSubstructMatches(functional_group)
if matches:
return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches]
return False
rdmol = to_rdmol(mol)
rdmol_func_list = [_to_rdmol(i) for i in func_list]
gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list)
return {key: value for key, value in zip(func_list, gen) if value}
[docs]def yield_coords(rdmol, id=-1):
"""Take an rdkit molecule and yield its coordinates as 3-tuples.
.. code-block:: python
>>> from scm.plams import yield_coords
>>> from rdkit import Chem
>>> rdmol = Chem.Mol(...) # e.g. Methane
>>> for xyz in yield_coords(rdmol):
... print(xyz)
(-0.0, -0.0, -0.0)
(0.6405, 0.6405, -0.6405)
(0.6405, -0.6405, 0.6405)
(-0.6405, 0.6405, 0.6405)
(-0.6405, -0.6405, -0.6405)
The iterator produced by this function can, for example, be passed to
:meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>`
the update the coordinates of a PLAMS Molecule in-place.
.. code-block:: python
>>> from scm.plams import Molecule
>>> mol = Molecule(...)
>>> xyz_iterator = yield_coords(rdmol)
>>> mol.from_array(xyz_iterator)
:parameter rdmol: An RDKit mol.
:type rdmol: rdkit.Chem.Mol
:parameter int id: The ID of the desired conformer.
:return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates.
:rtype: iterator
"""
conf = rdmol.GetConformer(id=id)
for atom in rdmol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
yield (pos.x, pos.y, pos.z)
@add_to_class(Molecule)
def assign_chirality(self):
"""
Assigns stereo-info to PLAMS molecule by invoking RDKIT
"""
rd_mol = to_rdmol(self, assignChirality=True)
pl_mol = from_rdmol(rd_mol)
# Add R/S info to self
for iat, pl_atom in enumerate(pl_mol.atoms):
# Check for R/S information
if pl_atom.properties.rdkit.stereo:
self.atoms[iat].properties.rdkit.stereo = pl_atom.properties.rdkit.stereo
# Add cis/trans information to self
for ibond, pl_bond in enumerate(pl_mol.bonds):
if pl_bond.properties.rdkit.stereo:
self.bonds[ibond] = pl_bond.properties.rdkit.stereo
@add_to_class(Molecule)
def get_chirality(self):
"""
Returns the chirality of the atoms
"""
from rdkit import Chem
rd_mol = to_rdmol(self, assignChirality=True)
return Chem.FindMolChiralCenters(rd_mol, force=True, includeUnassigned=True)
[docs]def canonicalize_mol(mol, inplace=False, **kwargs):
r"""Take a PLAMS molecule and sort its atoms based on their canonical rank.
Example:
.. code:: python
>>> from scm.plams import Molecule, canonicalize_mol
# Methane
>>> mol: Molecule = ...
>>> print(mol)
Atoms:
1 H 0.640510 0.640510 -0.640510
2 H 0.640510 -0.640510 0.640510
3 C 0.000000 0.000000 0.000000
4 H -0.640510 0.640510 0.640510
5 H -0.640510 -0.640510 -0.640510
>>> print(canonicalize_mol(mol))
Atoms:
1 C 0.000000 0.000000 0.000000
2 H -0.640510 -0.640510 -0.640510
3 H -0.640510 0.640510 0.640510
4 H 0.640510 -0.640510 0.640510
5 H 0.640510 0.640510 -0.640510
:parameter mol: The to-be canonicalized molecule.
:type mol: |Molecule|
:parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule.
:parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_.
:return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``.
:rtype: None or |Molecule|
.. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms
"""
from rdkit import Chem
if not isinstance(mol, Molecule):
raise TypeError("`mol` expected a plams Molecule")
rdmol = to_rdmol(mol)
idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs)
if inplace:
mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)]
return None
else:
ret = mol.copy()
ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)]
return ret