3. Getting started with prenspire#

[1]:

import random
from pathlib import Path

import lmfit
import numpy as np

from clophfit import prenspire
from clophfit.binding.fitting import (
    DataArray,
    Dataset,
    analyze_spectra,
    analyze_spectra_glob,
    fit_binding_glob,
    weight_multi_ds_titration,
)
from clophfit.binding.plotting import plot_emcee

%load_ext autoreload
%autoreload 2
tpath = Path("../../tests/EnSpire")

2025-06-23 11:53:34,498 - clophfit.binding.fitting - INFO - Plotting module started

[2]:

ef1 = prenspire.EnspireFile(tpath / "h148g-spettroC.csv")
ef2 = prenspire.EnspireFile(tpath / "e2-T-without_sample_column.csv")
ef3 = prenspire.EnspireFile(tpath / "24well_clop0_95.csv")

[3]:

ef3.wells, ef3._wells_platemap, ef3._platemap

[3]:

(['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
 ['A03', 'A04', 'A05', 'A06', 'B01', 'B02', 'C01', 'C02', 'C03'],
 [['A', '  ', '  ', '- ', '- ', '- ', '- '],
  ['B', '- ', '- ', '  ', '  ', '  ', '  '],
  ['C', '- ', '- ', '- ', '  ', '  ', '  '],
  ['D', '  ', '  ', '  ', '  ', '  ', '  ']])

[4]:

ef1.__dict__.keys()

[4]:

dict_keys(['file', 'verbose', 'metadata', 'measurements', 'wells', '_ini', '_fin', '_wells_platemap', '_platemap'])

[5]:

ef1.measurements.keys(), ef2.measurements.keys()

[5]:

(dict_keys(['A']), dict_keys(['B', 'A', 'C', 'D', 'E', 'F', 'G', 'H']))

when testing each spectra for the presence of a single wavelength in the appropriate monochromator

[6]:

ef2.measurements["A"]["metadata"]

[6]:

{'temp': '25',
 'Monochromator': 'Excitation',
 'Min wavelength': '400',
 'Max wavelength': '510',
 'Wavelength': '530',
 'Using of excitation filter': 'Top',
 'Measurement height': '8.9',
 'Number of flashes': '50',
 'Number of flashes integrated': '50',
 'Flash power': '100'}

[7]:

ef2.measurements["A"].keys()

[7]:

dict_keys(['metadata', 'lambda', 'F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07'])

[8]:

random.seed(11)
random.sample(ef1.measurements["A"]["F01"], 7)

[8]:

[2163.0, 607.0, 1846.0, 517.0, 572.0, 2145.0, 2028.0]

[9]:

fp = tpath / "h148g-spettroC-nota.csv"
n1 = prenspire.Note(fp, verbose=1)
n1._note.set_index("Well").loc["A01", ["Name", "Temp"]]

Wells ['A01', 'A02']...['G04', 'G05'] generated.

[9]:

Name    H148G
Temp     20.0
Name: A01, dtype: object

[10]:

n1.__dict__.keys()

[10]:

dict_keys(['fpath', 'verbose', 'wells', '_note', 'titrations'])

[11]:

n1.wells == ef1.wells, n1.wells == ef2.wells

[11]:

(True, False)

[12]:

n1.build_titrations(ef1)
tit0 = n1.titrations["H148G"][20.0]["Cl_0.0"]["A"]
tit3 = n1.titrations["H148G"][20.0]["pH_7.4"]["A"]
tit0

[12]:

	5.2	6.3	7.4	8.1	8.2
272.0	3151.0	4181.0	16413.0	29192.0	28816.0
273.0	3130.0	4204.0	16926.0	29909.0	29545.0
274.0	3043.0	4232.0	17331.0	30900.0	30750.0
275.0	3079.0	4283.0	17680.0	31717.0	31547.0
276.0	2975.0	4264.0	18020.0	32564.0	32336.0
...	...	...	...	...	...
496.0	636.0	4689.0	43230.0	87203.0	87842.0
497.0	683.0	4923.0	45173.0	89719.0	90666.0
498.0	632.0	4900.0	46725.0	93452.0	94101.0
499.0	854.0	5140.0	48452.0	96643.0	97506.0
500.0	573.0	5573.0	50025.0	99847.0	100715.0

229 rows × 5 columns

[13]:

tit0.plot()
tit3.plot()

[13]:

<Axes: >

@dataclass class Metadata: @dataclass class Datum: well: str pH: float Cl: float T: float mut: str labels: list[str] metadata: dict[str, Metadata]

[14]:

ef = prenspire.EnspireFile(tpath / "G10.csv")
fp = tpath / "NTT-G10_note.csv"
nn = prenspire.Note(fp, verbose=1)
nn.build_titrations(ef)
spectra = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]

Wells ['D01', 'D02']...['G07', 'G08'] generated.

[15]:

f_res = analyze_spectra(spectra, is_ph=True, band=None)
print(f_res.result.chisqr)
f_res.figure

551.9999999999978

[15]:

[16]:

def dataset_from_lres(lkey, lres, is_ph):
    data = {}
    for k, res in zip(lkey, lres, strict=False):
        x = res.mini.userargs[0]["default"].x
        y = res.mini.userargs[0]["default"].y
        data[k] = DataArray(x, y)
    return Dataset(data, is_ph)


spectra_A = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["A"]
spectra_C = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["C"]
spectra_D = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]["D"]

resA = analyze_spectra(spectra_A, "pH", (466, 510))
resC = analyze_spectra(spectra_C, "pH", (470, 500))
resD = analyze_spectra(spectra_D, "pH", (450, 600))

ds_bands = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)

resA = analyze_spectra(spectra_A, "pH")
resC = analyze_spectra(spectra_C, "pH")
resD = analyze_spectra(spectra_D, "pH")
ds_svd = dataset_from_lres(["A", "C", "D"], [resA, resC, resD], True)

[17]:

dbands = {"D": (466, 510)}
tit = nn.titrations["NTT-G10"][20.0]["Cl_0.0"]
sgres = analyze_spectra_glob(tit, ds_svd, dbands)

sgres.svd.figure

The chain is shorter than 50 times the integrated autocorrelation time for 2 parameter(s). Use this estimate with caution and run a longer chain!
N/50 = 108;
tau: [ 84.57221279 162.24365462  38.02619732 118.90480235  63.5238942 ]

[17]:

[18]:

sgres.gsvd.figure

[18]:

[19]:

dbands = {"A": (466, 510), "C": (470, 500), "D": (450, 600)}
sgres = analyze_spectra_glob(nn.titrations["NTT-G10"][20.0]["Cl_0.0"], ds_bands, dbands)
sgres.bands.figure

[19]:

[20]:

ci = lmfit.conf_interval(sgres.bands.mini, sgres.bands.result)
print(lmfit.ci_report(ci, ndigits=2, with_offset=False))

      99.73% 95.45% 68.27% _BEST_ 68.27% 95.45% 99.73%
 K   :   7.89   7.93   7.97   8.00   8.04   8.07   8.11
 S0_A:   1.54   1.75   1.96   2.15   2.34   2.53   2.73
 S1_A:   9.18   9.33   9.47   9.61   9.75   9.89  10.04
 S0_C:   9.79   9.99  10.19  10.38  10.58  10.79  11.02
 S1_C:   1.58   1.74   1.88   2.02   2.16   2.31   2.46
 S0_D:   0.25   0.48   0.69   0.90   1.10   1.30   1.51
 S1_D:   9.22   9.37   9.51   9.65   9.79   9.94  10.09

[21]:

weight_multi_ds_titration(ds_svd)
res = fit_binding_glob(ds_svd)
res.figure

[21]:

[22]:

xx = np.array([5.2, 6.3, 7.4, 8.1, 8.2])
yy = np.array([6.05, 12.2, 20.38, 48.2, 80.3])


def kd(x, kd1, pka):
    return kd1 * (1 + 10 ** (pka - x)) / 10 ** (pka - x)


model = lmfit.Model(kd)
params = lmfit.Parameters()
params.add("kd1", value=10.0)
params.add("pka", value=6.6)
result = model.fit(yy, params, x=xx)

result.plot_fit(numpoints=50, ylabel="$K_d$ (mM)", xlabel="pH")

[22]:

<Axes: title={'center': 'Model(kd)'}, xlabel='pH', ylabel='$K_d$ (mM)'>

[23]:

result_emcee = result.emcee(
    steps=1800, burn=150, workers=8, nwalkers=10, seed=1, progress=False
)
fig = plot_emcee(result_emcee.flatchain)

[24]:

import pymc as pm

[25]:

# Assuming x_data and y_data are your data
with pm.Model() as model:
    kd1 = pm.Normal(
        "kd1", mu=result.params["kd1"].value, sigma=result.params["kd1"].stderr
    )
    pka = pm.Normal(
        "pka", mu=result.params["pka"].value, sigma=result.params["pka"].stderr
    )

    y_pred = pm.Deterministic("y_pred", kd(xx, kd1, pka))

    likelihood = pm.Normal("y", mu=y_pred, sigma=1, observed=yy)

    trace = pm.sample(1000, tune=2000, random_seed=1, chains=8, cores=8)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [kd1, pka]

Sampling 8 chains for 2_000 tune and 1_000 draw iterations (16_000 + 8_000 draws total) took 12 seconds.

[26]:

pm.model_to_graphviz(model)

[26]:

[27]:

import arviz as az

ax = az.plot_pair(
    trace,
    divergences=1,
    var_names=["kd1", "pka"],
    kind=["kde", "scatter"],
    kde_kwargs={"fill_last": False},
    marginals=True,
    point_estimate="mean",
    figsize=(7, 7),
)

[28]:

import corner

# Extract the posterior samples for the parameters of interest
kd1_samples = trace.posterior["kd1"].to_numpy().flatten()
pka_samples = trace.posterior["pka"].to_numpy().flatten()
# Ensure the samples are in the correct format for the corner plotpm.plot_posterior(trace, var_names=["R0", "R1", "K"])
samples_array = np.column_stack([kd1_samples, pka_samples])
# Plot the corner plot
f = corner.corner(samples_array, labels=["kd1", "pka"])

[29]:

pm.plot_trace(trace, rug=True)

[29]:

array([[<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'kd1'}>],
       [<Axes: title={'center': 'pka'}>, <Axes: title={'center': 'pka'}>],
       [<Axes: title={'center': 'y_pred'}>,
        <Axes: title={'center': 'y_pred'}>]], dtype=object)

[30]:

pm.plot_posterior(trace, var_names=["kd1", "pka"])

[30]:

array([<Axes: title={'center': 'kd1'}>, <Axes: title={'center': 'pka'}>],
      dtype=object)

[31]:

pm.plot_forest(trace, var_names=["kd1", "pka"])
pm.plot_forest(trace, var_names=["pka"])

[31]:

array([<Axes: title={'center': '94.0% HDI'}>], dtype=object)

3. Getting started with prenspire#

This Page