{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting started with prenspire"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "from pathlib import Path\n",
    "\n",
    "import lmfit\n",
    "import numpy as np\n",
    "\n",
    "from clophfit import prenspire\n",
    "from clophfit.fitting.core import (\n",
    "    DataArray,\n",
    "    Dataset,\n",
    "    analyze_spectra,\n",
    "    analyze_spectra_glob,\n",
    "    fit_binding_glob,\n",
    "    weight_multi_ds_titration,\n",
    ")\n",
    "from clophfit.fitting.plotting import plot_emcee\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "tpath = Path(\"../../tests/EnSpire\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef1 = prenspire.EnspireFile(tpath / \"h148g-spettroC.csv\")\n",
    "ef2 = prenspire.EnspireFile(tpath / \"e2-T-without_sample_column.csv\")\n",
    "ef3 = prenspire.EnspireFile(tpath / \"24well_clop0_95.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef3.wells, ef3._wells_platemap, ef3._platemap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef1.__dict__.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef1.measurements.keys(), ef2.measurements.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "when testing each spectra for the presence of a single wavelength in the appropriate monochromator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef2.measurements[\"A\"][\"metadata\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef2.measurements[\"A\"].keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(11)\n",
    "random.sample(ef1.measurements[\"A\"][\"F01\"], 7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fp = tpath / \"h148g-spettroC-nota.csv\"\n",
    "n1 = prenspire.Note(fp, verbose=1)\n",
    "n1._note.set_index(\"Well\").loc[\"A01\", [\"Name\", \"Temp\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n1.__dict__.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n1.wells == ef1.wells, n1.wells == ef2.wells"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n1.build_titrations(ef1)\n",
    "tit0 = n1.titrations[\"H148G\"][20.0][\"Cl_0.0\"][\"A\"]\n",
    "tit3 = n1.titrations[\"H148G\"][20.0][\"pH_7.4\"][\"A\"]\n",
    "tit0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tit0.plot()\n",
    "tit3.plot()"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "@dataclass\n",
    "class Metadata:\n",
    "    \n",
    "@dataclass\n",
    "class Datum:\n",
    "    well: str\n",
    "    pH: float\n",
    "    Cl: float\n",
    "    T: float\n",
    "    mut: str\n",
    "    labels: list[str]\n",
    "    metadata: dict[str, Metadata]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef = prenspire.EnspireFile(tpath / \"G10.csv\")\n",
    "fp = tpath / \"NTT-G10_note.csv\"\n",
    "nn = prenspire.Note(fp, verbose=1)\n",
    "nn.build_titrations(ef)\n",
    "spectra = nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"][\"C\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "f_res = analyze_spectra(spectra, is_ph=True, band=None)\n",
    "print(f_res.result.chisqr)\n",
    "f_res.figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def dataset_from_lres(lkey, lres, is_ph):\n",
    "    data = {}\n",
    "    for k, res in zip(lkey, lres, strict=False):\n",
    "        x = res.mini.userargs[0][\"default\"].x\n",
    "        y = res.mini.userargs[0][\"default\"].y\n",
    "        data[k] = DataArray(x, y)\n",
    "    return Dataset(data, is_ph=is_ph)\n",
    "\n",
    "\n",
    "spectra_A = nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"][\"A\"]\n",
    "spectra_C = nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"][\"C\"]\n",
    "spectra_D = nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"][\"D\"]\n",
    "\n",
    "resA = analyze_spectra(spectra_A, is_ph=True, band=(466, 510))\n",
    "resC = analyze_spectra(spectra_C, is_ph=True, band=(470, 500))\n",
    "resD = analyze_spectra(spectra_D, is_ph=True, band=(450, 600))\n",
    "\n",
    "ds_bands = dataset_from_lres([\"A\", \"C\", \"D\"], [resA, resC, resD], is_ph=True)\n",
    "\n",
    "resA = analyze_spectra(spectra_A, is_ph=True)\n",
    "resC = analyze_spectra(spectra_C, is_ph=True)\n",
    "resD = analyze_spectra(spectra_D, is_ph=True)\n",
    "ds_svd = dataset_from_lres([\"A\", \"C\", \"D\"], [resA, resC, resD], is_ph=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dbands = {\"D\": (466, 510)}\n",
    "tit = nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"]\n",
    "sgres = analyze_spectra_glob(tit, ds_svd, dbands)\n",
    "\n",
    "sgres.svd.figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sgres.gsvd.figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dbands = {\"A\": (466, 510), \"C\": (470, 500), \"D\": (450, 600)}\n",
    "sgres = analyze_spectra_glob(nn.titrations[\"NTT-G10\"][20.0][\"Cl_0.0\"], ds_bands, dbands)\n",
    "sgres.bands.figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ci = lmfit.conf_interval(sgres.bands.mini, sgres.bands.result)\n",
    "print(lmfit.ci_report(ci, ndigits=2, with_offset=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "weight_multi_ds_titration(ds_svd)\n",
    "res = fit_binding_glob(ds_svd)\n",
    "res.figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xx = np.array([5.2, 6.3, 7.4, 8.1, 8.2])\n",
    "yy = np.array([6.05, 12.2, 20.38, 48.2, 80.3])\n",
    "\n",
    "\n",
    "def kd(x, kd1, pka):\n",
    "    return kd1 * (1 + 10 ** (pka - x)) / 10 ** (pka - x)\n",
    "\n",
    "\n",
    "model = lmfit.Model(kd)\n",
    "params = lmfit.Parameters()\n",
    "params.add(\"kd1\", value=10.0)\n",
    "params.add(\"pka\", value=6.6)\n",
    "result = model.fit(yy, params, x=xx)\n",
    "\n",
    "result.plot_fit(numpoints=50, ylabel=\"$K_d$ (mM)\", xlabel=\"pH\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_emcee = result.emcee(\n",
    "    steps=1800, burn=150, workers=8, nwalkers=10, seed=1, progress=False\n",
    ")\n",
    "fig = plot_emcee(result_emcee.flatchain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pymc as pm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assuming x_data and y_data are your data\n",
    "with pm.Model() as model:\n",
    "    kd1 = pm.Normal(\n",
    "        \"kd1\", mu=result.params[\"kd1\"].value, sigma=result.params[\"kd1\"].stderr\n",
    "    )\n",
    "    pka = pm.Normal(\n",
    "        \"pka\", mu=result.params[\"pka\"].value, sigma=result.params[\"pka\"].stderr\n",
    "    )\n",
    "\n",
    "    y_pred = pm.Deterministic(\"y_pred\", kd(xx, kd1, pka))\n",
    "\n",
    "    likelihood = pm.Normal(\"y\", mu=y_pred, sigma=1, observed=yy)\n",
    "\n",
    "    trace = pm.sample(1000, tune=2000, random_seed=1, chains=8, cores=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pm.model_to_graphviz(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import arviz as az\n",
    "\n",
    "ax = az.plot_pair(\n",
    "    trace,\n",
    "    divergences=1,\n",
    "    var_names=[\"kd1\", \"pka\"],\n",
    "    kind=[\"kde\", \"scatter\"],\n",
    "    kde_kwargs={\"fill_last\": False},\n",
    "    marginals=True,\n",
    "    point_estimate=\"mean\",\n",
    "    figsize=(7, 7),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import corner\n",
    "\n",
    "# Extract the posterior samples for the parameters of interest\n",
    "kd1_samples = trace.posterior[\"kd1\"].to_numpy().flatten()\n",
    "pka_samples = trace.posterior[\"pka\"].to_numpy().flatten()\n",
    "# Ensure the samples are in the correct format for the corner plotpm.plot_posterior(trace, var_names=[\"R0\", \"R1\", \"K\"])\n",
    "samples_array = np.column_stack([kd1_samples, pka_samples])\n",
    "# Plot the corner plot\n",
    "f = corner.corner(samples_array, labels=[\"kd1\", \"pka\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pm.plot_trace(trace, rug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pm.plot_posterior(trace, var_names=[\"kd1\", \"pka\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pm.plot_forest(trace, var_names=[\"kd1\", \"pka\"])\n",
    "pm.plot_forest(trace, var_names=[\"pka\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}