Generating multiple conformers for a compound

Different conformers of a molecule can have significantly different sigma profiles, which can lead to big differences in predicted properties with COSMO-RS. For this reason, it’s important for COSMO-RS calculations to use geometries corresponding to the lowest-energy conformer or a set of low-energy conformers when it’s possible several conformers may exist in significant amounts. The script below allows the user to use the ConformerTools functionality in AMS to generate a set of low-energy conformers, refine the geometries with a semi-empirical method, and finally perform the ADF and COSMO calculations necessary to produce .coskf files. The example files are used in a simple binary mixture calculation with COSMO-RS. This calculation models one of the species as a mixture of conformers and plots activity coefficients and the conformer distribution over the mole fraction range.

Python code (Binary mixture)

[show/hide code]
import os
import matplotlib.pyplot as plt
from scm.conformers import ConformersJob
from scm.plams import from_smiles, init, finish, Settings, delete_job, KFFile, AMSJob, Molecule, CRSJob, Units

class COSKFConformers:
    '''
    Args:
        mol_identifier     (str)   : A smiles string or file (.xyz, .pdb, .mol, or mol2) with which to input a molecule
    Keyword Args:
        generator          (str)   : The method used to generate conformers.  Can be one of ['RDKit','CREST']
        initial_conformers (int)   : The number of initial conformers to sample
        coskf_name         (str)   : the filename base for the .coskf files this script produces.  multiple conformers will be saved with the filename base appended with a ``_n`` where n is the index of the conformer.
        max_energy_range   (float) : the largest acceptable energy (relative to the lowest energy conformer) for an adf calculation to be performed.  The energy is compared at the dftb level.
        max_num_confs      (int)   : the maximum number of conformers to produce .coskf files for
        print_output       (bool)  : Whether to print more detailed information about the progress of the script.
        dftb_solvent       (str)   : A solvent to use with GFN-xTB (DFTB).  Can be one of ['Acetone','Acetonitrile','CHCl3','CS2','DMSO','Ether','H2O','Methanol','THF','Toluene']
        coskf_dir          (str)   : The directory where produced .coskf files should be saved.  Defaults to the <plams run directory>/conformers
    '''
    def __init__(self,
                 mol_identifier : str,
                 generator          = 'RDKit',
                 initial_conformers = 500,
                 coskf_name         = None,
                 max_energy_range   = 2.0,
                 max_num_confs      = None,
                 print_output       = False,
                 dftb_solvent       = None,
                 coskf_dir          = None
                 ):

        self.print_output       = print_output
        self.coskf_name         = coskf_name
        self.generator          = generator
        self.initial_conformers = initial_conformers
        self.dftb_solvent       = dftb_solvent
        self.coskf_map          = {}
        self.coskf_dir          = coskf_dir

        self.mol                = self.process_molecule(mol_identifier)
        if self.mol is None:
            print("There was a problem reading the molecule: ", mol_identifier)
            return

        self.confs0 = self.get_conformers()
        self.confs1 = self.conformers_xtb()

        if max_num_confs and len(self.confs1.get_relative_energies())>max_num_confs:
            num_max_energy   = self.confs1.get_relative_energies()[max_num_confs-1]
            num_max_energy   = Units.convert(num_max_energy,'Hartree','kcal/mol') + 0.001
            max_energy_range = min(max_energy_range,num_max_energy) if max_energy_range else num_max_energy

        self.confs2 = self.conformers_adf(max_energy_range=max_energy_range)
        replay = self.conformers_cosmo()
        self.make_coskfs(replay)

        # Remove the results from the replay job because they can be large
        delete_job(replay.job)


    def process_molecule(self, mol_identifier):
        '''
        This function tries to process any molecular identifier passed to the COSKFConformers constructor.
        This first checks for a filename ending with .xyz, .pdb, .mol, or mol2.  If this is not found, the
        molecule is assumed to be input as a SMILES string.
        '''
        if any( mol_identifier.endswith(x) for x in ['.xyz','.pdb','.mol','mol2'] ):
            try:
                return Molecule(mol_identifier)
            except:
                return None
        try:
            return from_smiles(mol_identifier)
        except:
            return None

    def get_conformers(self):
        '''
        Optimize conformers at the UFF level
        '''
        sett = Settings()
        if self.generator == 'RDKit':
            sett.input.AMS.Generator.RDKit
            sett.input.AMS.Generator.RDKit.InitialNConformers = self.initial_conformers

        conformers_uff = ConformersJob(name="conformers_uff", molecule=self.mol).run()
        if self.print_output:
            print("Conformers at the UFF level:")
            print(conformers_uff)
        return conformers_uff

    def conformers_xtb(self):
        '''
        Optimize conformers with DFTB
        '''
        sett = Settings()
        sett.input.AMS.Task = "Optimize"
        sett.input.AMS.InputConformersSet = self.confs0
        sett.input.DFTB
        if self.dftb_solvent:
            sett.input.DFTB.Solvation.Solvent = self.dftb_solvent
        conformers_xtb = ConformersJob(name="conformers_xtb", settings=sett).run()
        if self.print_output:
            print(f"Conformers at the GFN1-xTB level with solvent {self.dftb_solvent}:")
            print(conformers_xtb)
        return conformers_xtb

    def conformers_adf(self,max_energy_range=None):
        '''
        Optimize conformers with ADF and the special settings for CRS
        '''
        sett = Settings()
        sett.input.AMS.Task = "Optimize"
        sett.input.AMS.GeometryOptimization.UseAMSWorker = "False"
        sett.input.AMS.InputConformersSet = self.confs1
        self._add_default_crs_settings(sett)
        if max_energy_range:
            sett.input.AMS.InputMaxEnergy = f'{max_energy_range} [kcal/mol]'
        conformers_adf = ConformersJob(name="conformers", settings=sett).run()
        if self.print_output:
            print("Conformers at the DFT level:")
            print(conformers_adf)
        return conformers_adf

    def conformers_cosmo(self):
        '''
        Replay job on the conformer set to generate all the ADF engine output files with the COSMO sections.
        These are then converted to .coskf files afterwards.
        '''
        sett = Settings()
        self._add_default_crs_settings(sett)
        sett.input.AMS.Task = "Replay"
        sett.input.AMS.Replay.File = self.confs2["conformers.rkf"]
        sett.input.AMS.Replay.StoreAllResultFiles = "True"
        sett.input.ADF.Symmetry = "NOSYM"
        self._add_default_crs_solvation(sett)
        replay = AMSJob(name="replay", settings=sett).run()
        return replay

    def make_coskfs(self,replay):
        '''
        Copy the COSMO sections from all the files back to the folder with the conformers
        '''
        base_name = self.coskf_name if self.coskf_name is not None else "conformer"
        if self.coskf_dir is None:
            self.coskf_dir = self.confs2.job.path
        for i, E in enumerate(self.confs2.get_energies("Ha")):
            if f"Frame{i+1}.rkf" in replay:
                cosmo_section = replay.read_rkf_section("COSMO", f"Frame{i+1}")
                cosmo_section["Gas Phase Bond Energy"] = E
                name = f"{base_name}_{i}.coskf"
                coskf = KFFile(os.path.join(self.coskf_dir, name), autosave=False)
                for key, val in cosmo_section.items():
                    coskf.write("COSMO", key, val)
                coskf.save()
                self.coskf_map[i] = name

    def _add_default_crs_settings(self,sett):
        '''
        This function adds the default settings for optimizing geometries in cosmo-rs/-sac.
        '''
        sett.input.ADF.Basis.Type        = "TZP"
        sett.input.ADF.Basis.Core        = "Small"
        sett.input.ADF.BeckeGrid.Quality = "Good"
        sett.input.ADF.XC.GGA            = "BP86"

    def _add_default_crs_solvation(self,sett):
        '''
        This function adds the solvation block required for generating sigma profiles.
        Note that in this section, only the radii of the most common elements have been added.  If your compound
        contains an element not listed below, you'll have to add its symbol and radius in the same format as the others.
        '''
        sett.input.ADF.Solvation = {
            "Surf": "Delley",
            "Solv": "Name=CRS Cav0=0.0 Cav1=0.0",
            "Charged": "Method=Conj Corr",
            "C-Mat": "Exact",
            "SCF": "Var All",
            "Radii": {
                "H": 1.30,
                "C": 2.00,
                "N": 1.83,
                "O": 1.72,
                "F": 1.72,
                "Si": 2.48,
                "P": 2.13,
                "S": 2.16,
                "Cl": 2.05,
                "Br": 2.16,
                "I": 2.32,
            },
        }

init()

coskf_name = "acetic_acid"
conformers = COSKFConformers(
    'CC(=O)O',
    print_output=True,
    coskf_name = coskf_name,
    max_energy_range=None,
    max_num_confs=100,
    dftb_solvent=None,
    coskf_dir='conformer_coskfs')

# here we'll do a simple cosmo-rs calculation with the conformers produced above
# this will be a binary mixture calculation with water (available in the cosmo-rs compound database)
settings = Settings()
settings.input.property._h = 'BINMIXCOEF'

# absolute paths are usually easier for input
path_to_confs = os.path.abspath(conformers.coskf_dir)

num_compounds = 2
compounds     = [Settings() for i in range(num_compounds)]

compounds[0].name = "acetic_acid"
form = [Settings() for i in range(len(conformers.coskf_map))]
for i,v in conformers.coskf_map.items():
    form[i]._h = os.path.join( path_to_confs, v )
compounds[0].form = form

compounds[1].name = "water"
compounds[1]._h   = os.path.join( os.getcwd(), "Water.coskf" )

settings.input.temperature = 298.15
settings.input.compound    = compounds
crs_job                    = CRSJob(settings=settings)
out                        = crs_job.run()
res                        = out.get_results()
comp_acetic                = out.get_multispecies_dist()[0]

mf1 = res['molar fraction'][0]
fig, axs = plt.subplots(2)

axs[0].plot( mf1, res['gamma'][0], label = '$\gamma_1$ (acetic)')
axs[0].plot( mf1, res['gamma'][1], label = '$\gamma_2$ (water)')

for struct, val in comp_acetic.items():
    axs[1].plot( mf1, val, label = os.path.basename(struct))

plt.setp(axs[0],ylabel='Activity coefficients')
plt.setp(axs[1],ylabel='Distribution ')
axs[0].legend()
axs[0].grid()
axs[1].legend()
axs[1].grid()
plt.xlabel('$x_1$ (acetic acid)')
plt.show()

finish()

This code produces the following figure which plots activity coefficients and the distribution of two conformers.

/scm-uploads/doc.2023/COSMO-RS/_images/as_conformers.png