3. Getting started with prenspire#

[1]:
import random
from pathlib import Path

import lmfit
import numpy as np

from clophfit import prenspire
from clophfit.binding.fitting import (
    DataArray,
    Dataset,
    analyze_spectra,
    analyze_spectra_glob,
    fit_binding_glob,
    weight_multi_ds_titration,
)
from clophfit.binding.plotting import plot_emcee

%load_ext autoreload
%autoreload 2
tpath = Path("../../tests/EnSpire")
2025-04-18 12:56:03,996 - clophfit.binding.fitting - INFO - Plotting module started
[2]:
ef1 = prenspire.EnspireFile(tpath / "h148g-spettroC.csv")
ef2 = prenspire.EnspireFile(tpath / "e2-T-without_sample_column.csv")
ef3 = prenspire.EnspireFile(tpath / "24well_clop0_95.csv")
[3]:
ef3.wells, ef3._wells_platemap, ef3._platemap
[3]:
(['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
 ['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
 [['A', '  ', '  ', '- ', '- ', '- ', '- '],
  ['B', '- ', '- ', '  ', '  ', '  ', '  '],
  ['C', '- ', '- ', '- ', '  ', '  ', '  '],
  ['D', '  ', '  ', '  ', '  ', '  ', '  ']])
[4]:
ef1.__dict__.keys()
[4]:
dict_keys(['file', 'verbose', 'metadata', 'measurements', 'wells', '_ini', '_fin', '_wells_platemap', '_platemap'])
[5]:
ef1.measurements.keys(), ef2.measurements.keys()
[5]:
(dict_keys(['A']), dict_keys(['B', 'A', 'C', 'D', 'E', 'F', 'G', 'H']))

when testing each spectra for the presence of a single wavelength in the appropriate monochromator

[6]:
ef2.measurements["A"]["metadata"]
[6]:
{'temp': '25',
 'Monochromator': 'Excitation',
 'Min wavelength': '400',
 'Max wavelength': '510',
 'Wavelength': '530',
 'Using of excitation filter': 'Top',
 'Measurement height': '8.9',
 'Number of flashes': '50',
 'Number of flashes integrated': '50',
 'Flash power': '100'}
[7]:
ef2.measurements["A"].keys()
[7]:
dict_keys(['metadata', 'lambda', 'F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07'])
[8]:
random.seed(11)
random.sample(ef1.measurements["A"]["F01"], 7)
[8]:
[2163.0, 607.0, 1846.0, 517.0, 572.0, 2145.0, 2028.0]
[9]:
fp = tpath / "h148g-spettroC-nota.csv"
n1 = prenspire.Note(fp, verbose=1)
n1._note.set_index("Well").loc["A01", ["Name", "Temp"]]
Wells ['A01', 'A02']...['G04', 'G05'] generated.
[9]:
Name    H148G
Temp     20.0
Name: A01, dtype: object
[10]:
n1.__dict__.keys()
[10]:
dict_keys(['fpath', 'verbose', 'wells', '_note', 'titrations'])
[11]:
n1.wells == ef1.wells, n1.wells == ef2.wells
[11]:
(True, False)
[12]:
n1.build_titrations(ef1)
tit0 = n1.titrations["H148G"][20.0]["Cl_0.0"]["A"]
tit3 = n1.titrations["H148G"][20.0]["pH_7.4"]["A"]
tit0
[12]:
5.2 6.3 7.4 8.1 8.2
272.0 3151.0 4181.0 16413.0 29192.0 28816.0
273.0 3130.0 4204.0 16926.0 29909.0 29545.0
274.0 3043.0 4232.0 17331.0 30900.0 30750.0
275.0 3079.0 4283.0 17680.0 31717.0 31547.0
276.0 2975.0 4264.0 18020.0 32564.0 32336.0
... ... ... ... ... ...
496.0 636.0 4689.0 43230.0 87203.0 87842.0
497.0 683.0 4923.0 45173.0 89719.0 90666.0
498.0 632.0 4900.0 46725.0 93452.0 94101.0
499.0 854.0 5140.0 48452.0 96643.0 97506.0
500.0 573.0 5573.0 50025.0 99847.0 100715.0

229 rows × 5 columns

[13]:
tit0.plot()
tit3.plot()
[13]:
<Axes: >
../_images/tutorials_prenspire_14_1.png
../_images/tutorials_prenspire_14_2.png
@dataclass class Metadata: @dataclass class Datum: well: str pH: float Cl: float T: float mut: str labels: list[str] metadata: dict[str, Metadata]
[14]:
ef = prenspire.EnspireFile(tpath / "G10.csv")
fp = tpath / "NTT-G10_note.csv"
nn = prenspire.Note(fp, verbose=1)
nn.build_titrations(ef)
spectra = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]
Wells ['D01', 'D02']...['G07', 'G08'] generated.
[15]:
f_res = analyze_spectra(spectra, is_ph=True, band=None)
print(f_res.result.chisqr)
f_res.figure
551.9999999999978
[15]:
../_images/tutorials_prenspire_17_1.png
[16]:
def dataset_from_lres(lkey, lres, is_ph):
    data = {}
    for k, res in zip(lkey, lres, strict=False):
        x = res.mini.userargs[0]["default"].x
        y = res.mini.userargs[0]["default"].y
        data[k] = DataArray(x, y)
    return Dataset(data, is_ph)


spectra_A = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["A"]
spectra_C = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]
spectra_D = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["D"]

resA = analyze_spectra(spectra_A, "pH", (466, 510))
resC = analyze_spectra(spectra_C, "pH", (470, 500))
resD = analyze_spectra(spectra_D, "pH", (450, 600))

ds_bands = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)

resA = analyze_spectra(spectra_A, "pH")
resC = analyze_spectra(spectra_C, "pH")
resD = analyze_spectra(spectra_D, "pH")
ds_svd = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)
[17]:
dbands = {"D": (466, 510)}
tit = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]
sgres = analyze_spectra_glob(tit, ds_svd, dbands)

sgres.svd.figure
The chain is shorter than 50 times the integrated autocorrelation time for 1 parameter(s). Use this estimate with caution and run a longer chain!
N/50 = 108;
tau: [ 76.34195787 131.25986943  59.39022814  75.88564278  92.87846348]
[17]:
../_images/tutorials_prenspire_19_1.png
[18]:
sgres.gsvd.figure
[18]:
../_images/tutorials_prenspire_20_0.png
[19]:
dbands = {"A": (466, 510), "C": (470, 500), "D": (450, 600)}
sgres = analyze_spectra_glob(nn.titrations["NTT-G10"][20.0]["Cl_0.0"], ds_bands, dbands)
sgres.bands.figure
[19]:
../_images/tutorials_prenspire_21_0.png
[20]:
ci = lmfit.conf_interval(sgres.bands.mini, sgres.bands.result)
print(lmfit.ci_report(ci, ndigits=2, with_offset=False))
      99.73% 95.45% 68.27% _BEST_ 68.27% 95.45% 99.73%
 K   :   7.89   7.93   7.97   8.00   8.04   8.07   8.11
 S0_C:   9.79   9.99  10.19  10.38  10.58  10.79  11.02
 S1_C:   1.58   1.74   1.88   2.02   2.16   2.31   2.46
 S0_D:   0.25   0.48   0.69   0.90   1.10   1.30   1.51
 S1_D:   9.22   9.37   9.51   9.65   9.79   9.94  10.09
 S0_A:   1.54   1.75   1.96   2.15   2.34   2.53   2.73
 S1_A:   9.18   9.33   9.47   9.61   9.75   9.89  10.04
[21]:
weight_multi_ds_titration(ds_svd)
res = fit_binding_glob(ds_svd)
res.figure
[21]:
../_images/tutorials_prenspire_23_0.png
[22]:
xx = np.array([5.2, 6.3, 7.4, 8.1, 8.2])
yy = np.array([6.05, 12.2, 20.38, 48.2, 80.3])


def kd(x, kd1, pka):
    return kd1 * (1 + 10 ** (pka - x)) / 10 ** (pka - x)


model = lmfit.Model(kd)
params = lmfit.Parameters()
params.add("kd1", value=10.0)
params.add("pka", value=6.6)
result = model.fit(yy, params, x=xx)

result.plot_fit(numpoints=50, ylabel="$K_d$ (mM)", xlabel="pH")
[22]:
<Axes: title={'center': 'Model(kd)'}, xlabel='pH', ylabel='$K_d$ (mM)'>
../_images/tutorials_prenspire_24_1.png
[23]:
result_emcee = result.emcee(
    steps=1800, burn=150, workers=8, nwalkers=10, seed=1, progress=False
)
fig = plot_emcee(result_emcee.flatchain)
../_images/tutorials_prenspire_25_0.png
[24]:
import pymc as pm
[25]:
# Assuming x_data and y_data are your data
with pm.Model() as model:
    kd1 = pm.Normal(
        "kd1", mu=result.params["kd1"].value, sigma=result.params["kd1"].stderr
    )
    pka = pm.Normal(
        "pka", mu=result.params["pka"].value, sigma=result.params["pka"].stderr
    )

    y_pred = pm.Deterministic("y_pred", kd(xx, kd1, pka))

    likelihood = pm.Normal("y", mu=y_pred, sigma=1, observed=yy)

    trace = pm.sample(1000, tune=2000, random_seed=1, chains=8, cores=8)
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [kd1, pka]
Sampling 8 chains for 2_000 tune and 1_000 draw iterations (16_000 + 8_000 draws total) took 11 seconds.
[26]:
pm.model_to_graphviz(model)
[26]:
../_images/tutorials_prenspire_28_0.svg
[27]:
import arviz as az

ax = az.plot_pair(
    trace,
    divergences=1,
    var_names=["kd1", "pka"],
    kind=["kde", "scatter"],
    kde_kwargs={"fill_last": False},
    marginals=True,
    point_estimate="mean",
    figsize=(7, 7),
)
../_images/tutorials_prenspire_29_0.png
[28]:
import corner

# Extract the posterior samples for the parameters of interest
kd1_samples = trace.posterior["kd1"].to_numpy().flatten()
pka_samples = trace.posterior["pka"].to_numpy().flatten()
# Ensure the samples are in the correct format for the corner plotpm.plot_posterior(trace, var_names=["R0", "R1", "K"])
samples_array = np.column_stack([kd1_samples, pka_samples])
# Plot the corner plot
f = corner.corner(samples_array, labels=["kd1", "pka"])
../_images/tutorials_prenspire_30_0.png
[29]:
pm.plot_trace(trace, rug=True)
[29]:
array([[<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'kd1'}>],
       [<Axes: title={'center': 'pka'}>, <Axes: title={'center': 'pka'}>],
       [<Axes: title={'center': 'y_pred'}>,
        <Axes: title={'center': 'y_pred'}>]], dtype=object)
../_images/tutorials_prenspire_31_1.png
[30]:
pm.plot_posterior(trace, var_names=["kd1", "pka"])
[30]:
array([<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'pka'}>],
      dtype=object)
../_images/tutorials_prenspire_32_1.png
[31]:
pm.plot_forest(trace, var_names=["kd1", "pka"])
pm.plot_forest(trace, var_names=["pka"])
[31]:
array([<Axes: title={'center': '94.0% HDI'}>], dtype=object)
../_images/tutorials_prenspire_33_1.png
../_images/tutorials_prenspire_33_2.png