Screening for cocrystals

In this section, we provide an example application that can be used as a template for many high-throughput screening scripts. Cocrystals are crystals formed from two or more compounds in a defined stoichiometry. There are many uses for cocrystals, especially for pharmaceutical applications where one compound is an active pharmaceutical ingredient (API). This example problem screens multiple compounds for their potential as components of a (1:2) cocrystal with Itraconazole. This problem uses the excess enthalpy for a hypothetical supercooled liquid phase as a proxy for cocrystallization affinity. The rankings of the solvents are in good agreement with model and experimental results for this problem given in 1 .

Download relevant coskf file: coskf_Hex.zip

Python code using pyCRS

[show/hide code]
from scm.plams import Settings, init, finish, CRSJob, config, JobRunner, KFFile
from pyCRS.Database import COSKFDatabase
from pyCRS.CRSManager import CRSSystem
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

init()

####### Step 1 : add compounds to a pyCRS.databse. Ensure coskf_Hex is downloaded #######

#######  Note: Ensure to download the coskf_Hex before running the script  ##############

db = COSKFDatabase("my_coskf_db.db")

coskf_path = Path("./coskf_Hex")

if not coskf_path.exists():
    raise OSError(f"The provided path does not exist. Exiting.")


CAS_ref = { "tartaric_acid" : "526-83-0",
            "fumaric_acid" : "110-17-8",
            "succinic_acid" : "110-15-6",
            "malic_acid" : "6915-15-7",
            "glutaric_acid" : "110-94-1",
            "malonic_acid" : "141-82-2",
            "adipic_acid" : "124-04-9",
            "maleic_acid" : "110-16-7",
            "itz" : "84625-61-6"}

for file in coskf_path.glob("*.coskf"):
    for key, val in CAS_ref.items():
        if(key in file.name):
            CAS = val
            name = key.replace("_"," ")
            if(name == "itz"): name = "itraconazole"

    db.add_compound(file.name,cas=CAS,name=name,coskf_path=coskf_path)

####### Step2 : Iterately set up calculation for solubility and activitycoef #######

## Set up for parallel run ##
if True:
    config.default_jobrunner = JobRunner(
        parallel=True, maxjobs=8
    )  # Set jobrunner to be parallel and specify the numbers of jobs run simutaneously (eg. multiprocessing.cpu_count())
    config.default_jobmanager.settings.hashing = None  # Disable rerun prevention
    config.job.runscript.nproc = 1  # Number of cores for each job
    config.log.stdout = 0  # suppress plams output default=3

solv_name = [x.replace("_"," ") for x in list(CAS_ref.keys())[0:-1]]

System = CRSSystem()
System2 = CRSSystem()

for co_solvent in solv_name:
    mixture = {}
    mixture[co_solvent] = 0.33333
    mixture["itraconazole"] = 0.66667

    System.add_Mixture(
        mixture, database="my_coskf_db.db", temperature=298.15, problem_type="VAPORPRESSURE", conformer=True, jobname="conf"
    )
    System2.add_Mixture(
        mixture, database="my_coskf_db.db", temperature=298.15, problem_type="VAPORPRESSURE", conformer=False, jobname="Emin"
    )

System.runCRSJob()
System2.runCRSJob()


####### Step3 : Output processing to retrive results for plotting #######

print("Solvent".ljust(14), "Hex_Emin", "Hex_conf", "Population of solvent's conformers")
Hex_conf = []
Hex_Emin = []
for out, out2, name in zip(System.outputs, System2.outputs, solv_name):
    res = out.get_results()
    res2 = out2.get_results()

    Hex = res["excess H"]
    Hex2 = res2["excess H"]

    Hex_conf.append(Hex)
    Hex_Emin.append(Hex2)

    compositions = out.get_multispecies_dist()[0]

    print(name.ljust(15), end="")
    print(f"{Hex2:.3f}", end="   ")
    print(f"{Hex:.3f}", end="    ")
    for conf, frac in compositions.items():
        print(f"{frac[0]:.5f}", end=" ")
    print("")

if True:
    plt_index = [i for i in range(len(Hex_conf))]
    plt.xlabel("Excess enthalpy (kcal/mol)")
    plt.barh(plt_index, Hex_conf, zorder=3)
    plt.yticks(plt_index, solv_name)
    plt.grid(axis="x", ls="--", zorder=0)
    plt.gca().invert_xaxis()
    # plt.savefig('./Cocrystal_screening.png',dpi=300)
    plt.title("Hex with conformers")
    plt.tight_layout()
    
    plt.show()

if True:
    plt_index = [i for i in range(len(Hex_Emin))]
    plt.xlabel("Excess enthalpy (kcal/mol)")
    plt.barh(plt_index, Hex_Emin, zorder=3)
    plt.yticks(plt_index, solv_name)
    plt.grid(axis="x", ls="--", zorder=0)
    plt.gca().invert_xaxis()
    # plt.savefig('./Cocrystal_screening_Emin.png',dpi=300)
    plt.title("Hex with Lowest energy conformer")
    plt.tight_layout()
    plt.show()

Python code using PLAMS

[show/hide code]
import os, time
import multiprocessing
import numpy as np
import matplotlib.pyplot as plt
from scm.plams import Settings, init, finish, CRSJob, config, JobRunner


def set_CRSJob_Hex_conf(index, ncomp, coskf, database_path, cal_type, method, temperature, comp_input={}, comp_type=[]):

    s = Settings()  # initialize a settings object
    s.input.property._h = cal_type  # specify problem type
    s.input.method = method  # specify method
    s.input.temperature = temperature  # specify temperature

    compounds = [Settings() for i in range(ncomp)]  # initialization of compounds
    for i in range(ncomp):
        if len(comp_type) > i:
            if "conf" in comp_type[i]:
                form = [Settings() for i in range(len(coskf[i]))]  # initialize compound in multiple form
                for j in range(len(coskf[i])):
                    form[j]._h = os.path.join(
                        database_path, coskf[i][j]
                    )  # specify conformer's coskf file for each form
                compounds[i].form = form
            else:
                compounds[i]._h = os.path.join(database_path, coskf[i])  # specify absolute directory of coskf file
        else:
            compounds[i]._h = os.path.join(database_path, coskf[i])  # specify absolute directory of coskf file

        for column, value in comp_input.items():  # specify compound's information through comp_input, for example
            if value[i] != None:  # column: frac1, meltingpoint, hfusion
                compounds[i][column] = value[i]

    s.input.compound = compounds

    my_job = CRSJob(settings=s)  # create jobs
    my_job.name = cal_type + "_" + str(index)  # specify job name

    return my_job


init()

Parallel_run = True
Plot_option = True

if Parallel_run:
    config.default_jobrunner = JobRunner(
        parallel=True, maxjobs=8
    )  # Set jobrunner to be parallel and specify the numbers of jobs run simutaneously (eg. multiprocessing.cpu_count())
    config.default_jobmanager.settings.hashing = None  # Disable rerun prevention
    config.job.runscript.nproc = 1  # Number of cores for each job
    config.log.stdout = 1  # suppress plams output default=3

###----INPUT YOUR DATA HERE----###

database_path = os.path.join(os.getcwd(), "coskf_Hex")
cal_type = "VAPORPRESSURE"
method = "COSMORS"
ncomp = 2
solute = "itz_c1.coskf"

# a list of different conformers for the screening each line contains 3 conformers of the same molecule
solvents = [
    ["tartaric_acid_c1.coskf", "tartaric_acid_c2.coskf", "tartaric_acid_c3.coskf"],
    ["fumaric_acid_c1.coskf", "fumaric_acid_c2.coskf", "fumaric_acid_c3.coskf"],
    ["succinic_acid_c1.coskf", "succinic_acid_c2.coskf", "succinic_acid_c3.coskf"],
    ["malic_acid_c1.coskf", "malic_acid_c2.coskf", "malic_acid_c3.coskf"],
    ["glutaric_acid_c1.coskf", "glutaric_acid_c2.coskf", "glutaric_acid_c3.coskf"],
    ["malonic_acid_c1.coskf", "malonic_acid_c2.coskf", "malonic_acid_c3.coskf"],
    ["adipic_acid_c1.coskf", "adipic_acid_c2.coskf", "adipic_acid_c3.coskf"],
    ["maleic_acid_c1.coskf", "maleic_acid_c2.coskf", "maleic_acid_c3.coskf"],
]

temperature = 298.15  # K

# store input data along with the necessary thermal property in comp_input dictionary
comp_input = {}
comp_input["frac1"] = [0.33333, 0.66667]  # the stoichiometric ratio of the co-crystal (solvent:solute)

# enter 'conf' for compound with multiple conformers; '' for compound with single structure
comp_type = ["conf", ""]

###----INPUT END----###

outputs = []
solv_name = []
for index, solv in enumerate(solvents):
    solv_name.append(solv[0].replace("_c1.coskf", ""))
    coskf = [solv, solute]
    comp_input["name"] = [solv[0].replace("_c1.coskf", ""), solute.replace("_c1.coskf", "")]

    job = set_CRSJob_Hex_conf(index, ncomp, coskf, database_path, cal_type, method, temperature, comp_input, comp_type)
    outputs.append(job.run())

finish()

# In a parallel run, the get_results function will wait for the completion of the corresponding jobs.
results = []
excess_h = []
print("")
print("Solvent".ljust(14), "Population of solvent's conformers")
for out, name in zip(outputs, solv_name):
    res = out.get_results()
    results.append(res)
    excess_h.append(res["excess H"])

    compositions = out.get_multispecies_dist()[0]
    print(name.ljust(15), end="")
    for conf, frac in compositions.items():
        print(f"{frac[0]:.5f}", end=" ")
    print("")

print("")
print("Solvent".ljust(15), "Excess enthalpy (kcal/mol)")
for (
    name,
    Hex,
) in zip(solv_name, excess_h):
    print(name.ljust(15), round(Hex, 5))


if Plot_option:
    plt_index = [i for i in range(len(excess_h))]
    plt.xlabel("Excess enthalpy (kcal/mol)")
    plt.barh(plt_index, excess_h, zorder=3)
    plt.yticks(plt_index, solv_name)
    plt.grid(axis="x", ls="--", zorder=0)
    plt.gca().invert_xaxis()
    plt.tight_layout()
    # plt.savefig('./Cocrystal_screening.png',dpi=300)
    plt.show()

This figure (produced by the code) shows the excess enthalpy values of all solvents in a supercooled liquid mixture with Itraconazole. The lowest 4 excess enthalpy values correspond to 4 solvent for which a stable co-crystal with Itraconazole is known 1 .

/scm-uploads/doc/COSMO-RS/_images/Cocrystal_screening.png

References

1(1,2)

Abramov, Yuriy A., Christoph Loschen, and Andreas Klamt. “Rational coformer or solvent selection for pharmaceutical cocrystallization or desolvation.” Journal of pharmaceutical sciences 101.10 (2012): 3687-3697.