3. Getting started with prenspire#
[1]:
import random
from pathlib import Path
import lmfit
import numpy as np
from clophfit import prenspire
from clophfit.binding.fitting import (
DataArray,
Dataset,
analyze_spectra,
analyze_spectra_glob,
fit_binding_glob,
weight_multi_ds_titration,
)
from clophfit.binding.plotting import plot_emcee
%load_ext autoreload
%autoreload 2
tpath = Path("../../tests/EnSpire")
2025-04-18 12:56:03,996 - clophfit.binding.fitting - INFO - Plotting module started
[2]:
ef1 = prenspire.EnspireFile(tpath / "h148g-spettroC.csv")
ef2 = prenspire.EnspireFile(tpath / "e2-T-without_sample_column.csv")
ef3 = prenspire.EnspireFile(tpath / "24well_clop0_95.csv")
[3]:
ef3.wells, ef3._wells_platemap, ef3._platemap
[3]:
(['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
[['A', ' ', ' ', '- ', '- ', '- ', '- '],
['B', '- ', '- ', ' ', ' ', ' ', ' '],
['C', '- ', '- ', '- ', ' ', ' ', ' '],
['D', ' ', ' ', ' ', ' ', ' ', ' ']])
[4]:
ef1.__dict__.keys()
[4]:
dict_keys(['file', 'verbose', 'metadata', 'measurements', 'wells', '_ini', '_fin', '_wells_platemap', '_platemap'])
[5]:
ef1.measurements.keys(), ef2.measurements.keys()
[5]:
(dict_keys(['A']), dict_keys(['B', 'A', 'C', 'D', 'E', 'F', 'G', 'H']))
when testing each spectra for the presence of a single wavelength in the appropriate monochromator
[6]:
ef2.measurements["A"]["metadata"]
[6]:
{'temp': '25',
'Monochromator': 'Excitation',
'Min wavelength': '400',
'Max wavelength': '510',
'Wavelength': '530',
'Using of excitation filter': 'Top',
'Measurement height': '8.9',
'Number of flashes': '50',
'Number of flashes integrated': '50',
'Flash power': '100'}
[7]:
ef2.measurements["A"].keys()
[7]:
dict_keys(['metadata', 'lambda', 'F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07'])
[8]:
random.seed(11)
random.sample(ef1.measurements["A"]["F01"], 7)
[8]:
[2163.0, 607.0, 1846.0, 517.0, 572.0, 2145.0, 2028.0]
[9]:
fp = tpath / "h148g-spettroC-nota.csv"
n1 = prenspire.Note(fp, verbose=1)
n1._note.set_index("Well").loc["A01", ["Name", "Temp"]]
Wells ['A01', 'A02']...['G04', 'G05'] generated.
[9]:
Name H148G
Temp 20.0
Name: A01, dtype: object
[10]:
n1.__dict__.keys()
[10]:
dict_keys(['fpath', 'verbose', 'wells', '_note', 'titrations'])
[11]:
n1.wells == ef1.wells, n1.wells == ef2.wells
[11]:
(True, False)
[12]:
n1.build_titrations(ef1)
tit0 = n1.titrations["H148G"][20.0]["Cl_0.0"]["A"]
tit3 = n1.titrations["H148G"][20.0]["pH_7.4"]["A"]
tit0
[12]:
5.2 | 6.3 | 7.4 | 8.1 | 8.2 | |
---|---|---|---|---|---|
272.0 | 3151.0 | 4181.0 | 16413.0 | 29192.0 | 28816.0 |
273.0 | 3130.0 | 4204.0 | 16926.0 | 29909.0 | 29545.0 |
274.0 | 3043.0 | 4232.0 | 17331.0 | 30900.0 | 30750.0 |
275.0 | 3079.0 | 4283.0 | 17680.0 | 31717.0 | 31547.0 |
276.0 | 2975.0 | 4264.0 | 18020.0 | 32564.0 | 32336.0 |
... | ... | ... | ... | ... | ... |
496.0 | 636.0 | 4689.0 | 43230.0 | 87203.0 | 87842.0 |
497.0 | 683.0 | 4923.0 | 45173.0 | 89719.0 | 90666.0 |
498.0 | 632.0 | 4900.0 | 46725.0 | 93452.0 | 94101.0 |
499.0 | 854.0 | 5140.0 | 48452.0 | 96643.0 | 97506.0 |
500.0 | 573.0 | 5573.0 | 50025.0 | 99847.0 | 100715.0 |
229 rows × 5 columns
[13]:
tit0.plot()
tit3.plot()
[13]:
<Axes: >


[14]:
ef = prenspire.EnspireFile(tpath / "G10.csv")
fp = tpath / "NTT-G10_note.csv"
nn = prenspire.Note(fp, verbose=1)
nn.build_titrations(ef)
spectra = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]
Wells ['D01', 'D02']...['G07', 'G08'] generated.
[15]:
f_res = analyze_spectra(spectra, is_ph=True, band=None)
print(f_res.result.chisqr)
f_res.figure
551.9999999999978
[15]:

[16]:
def dataset_from_lres(lkey, lres, is_ph):
data = {}
for k, res in zip(lkey, lres, strict=False):
x = res.mini.userargs[0]["default"].x
y = res.mini.userargs[0]["default"].y
data[k] = DataArray(x, y)
return Dataset(data, is_ph)
spectra_A = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["A"]
spectra_C = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]
spectra_D = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["D"]
resA = analyze_spectra(spectra_A, "pH", (466, 510))
resC = analyze_spectra(spectra_C, "pH", (470, 500))
resD = analyze_spectra(spectra_D, "pH", (450, 600))
ds_bands = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)
resA = analyze_spectra(spectra_A, "pH")
resC = analyze_spectra(spectra_C, "pH")
resD = analyze_spectra(spectra_D, "pH")
ds_svd = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)
[17]:
dbands = {"D": (466, 510)}
tit = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]
sgres = analyze_spectra_glob(tit, ds_svd, dbands)
sgres.svd.figure
The chain is shorter than 50 times the integrated autocorrelation time for 1 parameter(s). Use this estimate with caution and run a longer chain!
N/50 = 108;
tau: [ 76.34195787 131.25986943 59.39022814 75.88564278 92.87846348]
[17]:

[18]:
sgres.gsvd.figure
[18]:

[19]:
dbands = {"A": (466, 510), "C": (470, 500), "D": (450, 600)}
sgres = analyze_spectra_glob(nn.titrations["NTT-G10"][20.0]["Cl_0.0"], ds_bands, dbands)
sgres.bands.figure
[19]:

[20]:
ci = lmfit.conf_interval(sgres.bands.mini, sgres.bands.result)
print(lmfit.ci_report(ci, ndigits=2, with_offset=False))
99.73% 95.45% 68.27% _BEST_ 68.27% 95.45% 99.73%
K : 7.89 7.93 7.97 8.00 8.04 8.07 8.11
S0_C: 9.79 9.99 10.19 10.38 10.58 10.79 11.02
S1_C: 1.58 1.74 1.88 2.02 2.16 2.31 2.46
S0_D: 0.25 0.48 0.69 0.90 1.10 1.30 1.51
S1_D: 9.22 9.37 9.51 9.65 9.79 9.94 10.09
S0_A: 1.54 1.75 1.96 2.15 2.34 2.53 2.73
S1_A: 9.18 9.33 9.47 9.61 9.75 9.89 10.04
[21]:
weight_multi_ds_titration(ds_svd)
res = fit_binding_glob(ds_svd)
res.figure
[21]:

[22]:
xx = np.array([5.2, 6.3, 7.4, 8.1, 8.2])
yy = np.array([6.05, 12.2, 20.38, 48.2, 80.3])
def kd(x, kd1, pka):
return kd1 * (1 + 10 ** (pka - x)) / 10 ** (pka - x)
model = lmfit.Model(kd)
params = lmfit.Parameters()
params.add("kd1", value=10.0)
params.add("pka", value=6.6)
result = model.fit(yy, params, x=xx)
result.plot_fit(numpoints=50, ylabel="$K_d$ (mM)", xlabel="pH")
[22]:
<Axes: title={'center': 'Model(kd)'}, xlabel='pH', ylabel='$K_d$ (mM)'>

[23]:
result_emcee = result.emcee(
steps=1800, burn=150, workers=8, nwalkers=10, seed=1, progress=False
)
fig = plot_emcee(result_emcee.flatchain)

[24]:
import pymc as pm
[25]:
# Assuming x_data and y_data are your data
with pm.Model() as model:
kd1 = pm.Normal(
"kd1", mu=result.params["kd1"].value, sigma=result.params["kd1"].stderr
)
pka = pm.Normal(
"pka", mu=result.params["pka"].value, sigma=result.params["pka"].stderr
)
y_pred = pm.Deterministic("y_pred", kd(xx, kd1, pka))
likelihood = pm.Normal("y", mu=y_pred, sigma=1, observed=yy)
trace = pm.sample(1000, tune=2000, random_seed=1, chains=8, cores=8)
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [kd1, pka]
Sampling 8 chains for 2_000 tune and 1_000 draw iterations (16_000 + 8_000 draws total) took 11 seconds.
[26]:
pm.model_to_graphviz(model)
[26]:
[27]:
import arviz as az
ax = az.plot_pair(
trace,
divergences=1,
var_names=["kd1", "pka"],
kind=["kde", "scatter"],
kde_kwargs={"fill_last": False},
marginals=True,
point_estimate="mean",
figsize=(7, 7),
)

[28]:
import corner
# Extract the posterior samples for the parameters of interest
kd1_samples = trace.posterior["kd1"].to_numpy().flatten()
pka_samples = trace.posterior["pka"].to_numpy().flatten()
# Ensure the samples are in the correct format for the corner plotpm.plot_posterior(trace, var_names=["R0", "R1", "K"])
samples_array = np.column_stack([kd1_samples, pka_samples])
# Plot the corner plot
f = corner.corner(samples_array, labels=["kd1", "pka"])

[29]:
pm.plot_trace(trace, rug=True)
[29]:
array([[<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'kd1'}>],
[<Axes: title={'center': 'pka'}>, <Axes: title={'center': 'pka'}>],
[<Axes: title={'center': 'y_pred'}>,
<Axes: title={'center': 'y_pred'}>]], dtype=object)

[30]:
pm.plot_posterior(trace, var_names=["kd1", "pka"])
[30]:
array([<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'pka'}>],
dtype=object)

[31]:
pm.plot_forest(trace, var_names=["kd1", "pka"])
pm.plot_forest(trace, var_names=["pka"])
[31]:
array([<Axes: title={'center': '94.0% HDI'}>], dtype=object)

