{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Levenberg Marquardt Fitting\n", "\n", "\n", "## Conventions\n", "\n", "- S0 Signal for unbound state\n", "- S1 Signal for bound state\n", "- K equilibrium constant (Kd or pKa)\n", "- order data from unbound to bound (e.g. cl: 0–\\>150 mM; pH 9–\\>5)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import arviz as az\n", "import corner\n", "import lmfit\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import pymc as pm\n", "import scipy\n", "import seaborn as sb\n", "\n", "from clophfit.binding.fitting import (\n", " DataArray,\n", " Dataset,\n", " fit_binding_glob,\n", " weight_multi_ds_titration,\n", ")\n", "from clophfit.binding.plotting import plot_emcee, print_emcee\n", "\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Single Cl titration." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_table(\"../../tests/data/copyIP.txt\")\n", "df[\"F\"] /= np.max(df[\"F\"])\n", "sb.scatterplot(data=df, x=\"cl\", y=\"F\", hue=df.cl, palette=\"crest\", s=200)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In general we can use either `lmfit.minimize() -> res` or `lmfit.Minimizer -> mini`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def fz(x, S0, S1, Kd):\n", " return (S0 + S1 * x / Kd) / (1 + x / Kd)\n", "\n", "\n", "def residual(pars, x, y=None):\n", " S0 = pars[\"S0\"]\n", " S1 = pars[\"S1\"]\n", " Kd = pars[\"Kd\"]\n", " model = fz(x, S0, S1, Kd)\n", " if y is None:\n", " return model\n", " return y - model\n", "\n", "\n", "params = lmfit.Parameters()\n", "params.add(\"S0\", value=df.F[0], min=0, max=2)\n", "params.add(\"S1\", value=100, min=-0.1, max=2)\n", "params.add(\"Kd\", value=50, min=0, max=200)\n", "res = lmfit.minimize(residual, params, args=(df.cl, df.F))\n", "\n", "xdelta = (df.cl.max() - df.cl.min()) / 500\n", "xfit = np.arange(df.cl.min() - xdelta, df.cl.max() + xdelta, xdelta)\n", "yfit = residual(res.params, xfit)\n", "print(lmfit.fit_report(res.params))\n", "plt.plot(df.cl, df.F, \"o\", xfit, yfit, \"-\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mini = lmfit.Minimizer(residual, params, fcn_args=(df.cl, df.F))\n", "res = mini.minimize()\n", "ci, tr = lmfit.conf_interval(mini, res, sigmas=[0.68, 0.95], trace=True)\n", "print(lmfit.ci_report(ci, with_offset=False, ndigits=2))\n", "print(lmfit.fit_report(res, show_correl=False, sort_pars=True))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mini.params.add(\"__lnsigma\", value=np.log(0.1), min=np.log(0.001), max=np.log(2))\n", "emcee_res = mini.emcee(\n", " nwalkers=40,\n", " workers=8,\n", " is_weighted=False,\n", " thin=80,\n", " burn=300,\n", " steps=4000,\n", " progress=False,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize=(7, 7))\n", "f = corner.corner(emcee_res.flatchain.to_numpy(), fig=fig)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Extract parameter values and standard errors from lmfit result\n", "S0_mu = res.params[\"S0\"].value\n", "S1_mu = res.params[\"S1\"].value\n", "Kd_mu = res.params[\"Kd\"].value\n", "S0_std = res.params[\"S0\"].stderr\n", "S1_std = res.params[\"S1\"].stderr\n", "Kd_std = res.params[\"Kd\"].stderr\n", "\n", "cov_matrix = res.covar\n", "\n", "# Define PyMC3 model\n", "with pm.Model() as model:\n", " # Define multivariate normal priors for parameters\n", " pars = pm.MvNormal(\"pars\", mu=[S0_mu, S1_mu, Kd_mu], cov=cov_matrix, shape=3)\n", " S0 = pars[0]\n", " S1 = pars[1]\n", " Kd = pars[2]\n", "\n", " y_pred = pm.Deterministic(\"y_pred\", fz(df.cl.to_numpy(), S0, S1, Kd))\n", " # Define likelihood\n", " likelihood = pm.Normal(\"likelihood\", mu=y_pred, sigma=1, observed=df.F.to_numpy())\n", " # Run the inference\n", " trace = pm.sample(2000, tune=1000, cores=8, progressbar=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "az.plot_posterior(\n", " trace,\n", " var_names=[\"pars\"],\n", " hdi_prob=0.95,\n", " textsize=11,\n", " round_to=3,\n", " point_estimate=\"median\",\n", " ref_val=[res.params[\"S0\"].value, res.params[\"S1\"].value, res.params[\"Kd\"].value],\n", " figsize=(12, 3),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pm.summary(trace)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "f = corner.corner(trace)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pm.plot_trace(trace, combined=True, rug=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_scatter_matrix(res, tr):\n", " names = res.params.keys()\n", " gs = plt.GridSpec(4, 4)\n", " sx = {}\n", " sy = {}\n", "\n", " for i, fixed in enumerate(names):\n", " for j, free in enumerate(names):\n", " sharex = sx.get(j)\n", " sharey = sy.get(i)\n", " ax = plt.subplot(gs[i, j], sharex=sharex, sharey=sharey)\n", "\n", " if sharex is None:\n", " sx[j] = ax\n", " if sharey is None:\n", " sy[i] = ax\n", "\n", " if i < 3:\n", " plt.setp(ax.get_xticklabels(), visible=True)\n", " else:\n", " ax.set_xlabel(free)\n", "\n", " if j > 0:\n", " plt.setp(ax.get_yticklabels(), visible=False)\n", " else:\n", " ax.set_ylabel(fixed)\n", "\n", " rest = tr[fixed]\n", " prob = rest[\"prob\"]\n", " f = prob < 0.96\n", "\n", " x, y = rest[free], rest[fixed]\n", " ax.scatter(x[f], y[f], c=1 - prob[f], s=25 * (1 - prob[f] + 0.5))\n", " ax.autoscale(1, 1)\n", "\n", "\n", "plot_scatter_matrix(res, tr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The plots shown below, akin to the examples provided in [lmfit documentation](https://lmfit.github.io/lmfit-py/examples/example_confidence_interval.html#sphx-glr-examples-example-confidence-interval-py), are computationally intensive. They operate under the assumption of a parabolic parameter space. However, it's worth noting that these plots provide similar information to that yielded by a Monte Carlo simulation." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_conf_interval_matrix(res, tr):\n", " names = list(res.params.keys())\n", " plt.figure()\n", "\n", " for i in range(3):\n", " for j in range(3):\n", " indx = 9 - j * 3 - i\n", " ax = plt.subplot(3, 3, indx)\n", " ax.ticklabel_format(style=\"sci\", scilimits=(-2, 2), axis=\"y\")\n", "\n", " # Set-up labels and tick marks\n", " ax.tick_params(labelleft=False, labelbottom=False)\n", " if indx in (1, 4, 7):\n", " plt.ylabel(names[j])\n", " ax.tick_params(labelleft=True)\n", " if indx == 1:\n", " ax.tick_params(labelleft=True)\n", " if indx in (7, 8, 9):\n", " plt.xlabel(names[i])\n", " ax.tick_params(labelbottom=True)\n", " [label.set_rotation(45) for label in ax.get_xticklabels()]\n", "\n", " if i != j:\n", " x, y, m = lmfit.conf_interval2d(mini, res, names[i], names[j], 20, 20)\n", " plt.contourf(x, y, m, np.linspace(0, 1, 10))\n", "\n", " x = tr[names[i]][names[i]]\n", " y = tr[names[i]][names[j]]\n", " pr = tr[names[i]][\"prob\"]\n", " s = np.argsort(x)\n", " plt.scatter(x[s], y[s], c=pr[s], s=30, lw=1)\n", "\n", " else:\n", " x = tr[names[i]][names[i]]\n", " y = tr[names[i]][\"prob\"]\n", "\n", " t, s = np.unique(x, True)\n", " f = scipy.interpolate.interp1d(t, y[s], \"slinear\")\n", " xn = np.linspace(x.min(), x.max(), 50)\n", " plt.plot(xn, f(xn), lw=1)\n", " plt.ylabel(\"prob\")\n", " ax.tick_params(labelleft=True)\n", "\n", " plt.tight_layout()\n", "\n", "\n", "plot_conf_interval_matrix(res, tr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_conf_intervals(mini, res):\n", " # Report fit parameters\n", " lmfit.report_fit(res.params, min_correl=0.25)\n", "\n", " # Calculate confidence intervals and traces\n", " ci, trace = lmfit.conf_interval(mini, res, sigmas=[1, 2], trace=True)\n", " lmfit.printfuncs.report_ci(ci)\n", "\n", " # Create subplots\n", " fig, axes = plt.subplots(2, 2, figsize=(12.8, 9.6), sharey=True)\n", "\n", " # Plot scatter plots for S0 and S1\n", " plot_scatter_trace(axes[0][0], trace[\"S0\"], \"S0\", \"Kd\")\n", " plot_scatter_trace(axes[0][1], trace[\"S1\"], \"S1\", \"Kd\")\n", "\n", " # Plot 2D confidence intervals for S0 and S1\n", " plot_2d_conf_interval(axes[1][0], mini, res, \"S0\", \"Kd\")\n", " plot_2d_conf_interval(axes[1][1], mini, res, \"S1\", \"Kd\")\n", "\n", " plt.tight_layout()\n", "\n", "\n", "def plot_scatter_trace(ax, trace_data, xlabel, ylabel):\n", " cx, cy, prob = trace_data[xlabel], trace_data[ylabel], trace_data[\"prob\"]\n", " ax.scatter(cx, cy, c=prob, s=30)\n", " ax.set_xlabel(xlabel)\n", " ax.set_ylabel(ylabel)\n", "\n", "\n", "def plot_2d_conf_interval(ax, mini, res, xparam, yparam):\n", " # Calculate 2D confidence interval\n", " cx, cy, grid = lmfit.conf_interval2d(mini, res, xparam, yparam, 30, 30)\n", " # Plot the contourf with a colorbar\n", " ctp = ax.contourf(cx, cy, grid, np.linspace(0, 1, 11))\n", " fig = ax.figure\n", " fig.colorbar(ctp, ax=ax)\n", " ax.set_xlabel(xparam)\n", " ax.set_ylabel(yparam)\n", "\n", "\n", "plot_conf_intervals(mini, res)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "f, ax = plt.subplots(1, 1)\n", "plot_2d_conf_interval(ax, mini, res, \"S0\", \"S1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using clophfit.binding" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "da = DataArray(df[\"cl\"].to_numpy(), df[\"F\"].to_numpy())\n", "ds = Dataset.from_da(da)\n", "weight_multi_ds_titration(ds)\n", "f_res = fit_binding_glob(ds)\n", "\n", "f_res.figure" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emcee_res = f_res.mini.emcee(\n", " steps=3000, burn=300, workers=8, nwalkers=30, seed=1, progress=False\n", ")\n", "plot_emcee(emcee_res.flatchain)\n", "print_emcee(emcee_res)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emcee_res.flatchain.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fit titration with multiple datasets\n", "\n", "For example data collected with multiple labelblocks in Tecan plate reader.\n", "\n", "“A01”: pH titration with y1 and y2." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"../../tests/data/A01.dat\", sep=\" \", names=[\"x\", \"y1\", \"y2\"])\n", "df = df[::-1].reset_index(drop=True)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### lmfit of single y1 using analytical Jacobian\n", "\n", "It computes the Jacobian of the fz. Mind that the residual (i.e. y - fz)\n", "will be actually minimized." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sympy\n", "\n", "x, S0_1, S1_1, K = sympy.symbols(\"x S0_1 S1_1 K\")\n", "f = (S0_1 + S1_1 * 10 ** (K - x)) / (1 + 10 ** (K - x))\n", "print(sympy.diff(f, S0_1))\n", "print(sympy.diff(f, S1_1))\n", "print(sympy.diff(f, K))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x, S0, S1, K = sympy.symbols(\"x S0 S1 K\")\n", "f = S0 + (S1 - S0) * x / K / (1 + x / K)\n", "sympy.diff(f, S0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sympy.diff(f, S1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sympy.diff(f, K)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# if is_ph:\n", "f = S0 + (S1 - S0) * 10 ** (K - x) / (1 + 10 ** (K - x))\n", "sympy.diff(f, S0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sympy.diff(f, S1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sympy.diff(f, K)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def residual(pars, x, data):\n", " S0 = pars[\"S0\"]\n", " S1 = pars[\"S1\"]\n", " K = pars[\"K\"]\n", " # model = (S0 + S1 * x / Kd) / (1 + x / Kd)\n", " x = np.array(x)\n", " y = np.array(data)\n", " model = (S0 + S1 * 10 ** (K - x)) / (1 + 10 ** (K - x))\n", " if data is None:\n", " return model\n", " return y - model\n", "\n", "\n", "# Try Jacobian\n", "def dfunc(pars, x, data=None):\n", " if data is None:\n", " pass\n", " S0_1 = pars[\"S0\"]\n", " S1_1 = pars[\"S1\"]\n", " K = pars[\"K\"]\n", " kx = np.array(10 ** (K - x))\n", " return np.array(\n", " [\n", " -1 / (kx + 1),\n", " -kx / (kx + 1),\n", " -kx * np.log(10) * (S1_1 / (kx + 1) - (kx * S1_1 + S0_1) / (kx + 1) ** 2),\n", " ]\n", " )\n", "\n", "\n", "params = lmfit.Parameters()\n", "params.add(\"S0\", value=25000)\n", "params.add(\"S1\", value=50000, min=0.0)\n", "params.add(\"K\", value=7, min=2.0, max=12.0)\n", "\n", "mini = lmfit.Minimizer(residual, params, fcn_args=(df.x,), fcn_kws={\"data\": df.y1})\n", "res = mini.leastsq(Dfun=dfunc, col_deriv=True, ftol=1e-8)\n", "print(lmfit.report_fit(res))\n", "ci = lmfit.conf_interval(mini, res, sigmas=[1, 2, 3])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(lmfit.ci_report(ci, with_offset=False, ndigits=2))" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "### using lmfit with np.r_ trick" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def residual2(pars, x, data=None):\n", " K = pars[\"K\"]\n", " S0_1 = pars[\"S0_1\"]\n", " S1_1 = pars[\"S1_1\"]\n", " S0_2 = pars[\"S0_2\"]\n", " S1_2 = pars[\"S1_2\"]\n", " model_0 = (S0_1 + S1_1 * 10 ** (K - x[0])) / (1 + 10 ** (K - x[0]))\n", " model_1 = (S0_2 + S1_2 * 10 ** (K - x[1])) / (1 + 10 ** (K - x[1]))\n", " if data is None:\n", " return np.r_[model_0, model_1]\n", " return np.r_[data[0] - model_0, data[1] - model_1]\n", "\n", "\n", "params2 = lmfit.Parameters()\n", "params2.add(\"K\", value=7.0, min=2.0, max=12.0)\n", "params2.add(\"S0_1\", value=df.y1[0], min=0.0)\n", "params2.add(\"S0_2\", value=df.y2[0], min=0.0)\n", "params2.add(\"S1_1\", value=df.y1.iloc[-1], min=0.0)\n", "params2.add(\"S1_2\", value=df.y2.iloc[-1], min=0.0)\n", "mini2 = lmfit.Minimizer(\n", " residual2, params2, fcn_args=([df.x, df.x],), fcn_kws={\"data\": [df.y1, df.y2]}\n", ")\n", "res2 = mini2.minimize()\n", "print(lmfit.fit_report(res2))\n", "\n", "ci2, tr2 = lmfit.conf_interval(mini2, res2, sigmas=[0.68, 0.95], trace=True)\n", "print(lmfit.ci_report(ci2, with_offset=False, ndigits=2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xfit = np.linspace(df.x.min(), df.x.max(), 100)\n", "yfit0 = residual2(params2, [xfit, xfit])\n", "yfit0 = yfit0.reshape(2, 100)\n", "yfit = residual2(res2.params, [xfit, xfit])\n", "yfit = yfit.reshape(2, 100)\n", "plt.plot(df.x, df.y1, \"o\", df.x, df.y2, \"s\")\n", "plt.plot(xfit, yfit[0], \"-\", xfit, yfit[1], \"-\")\n", "plt.plot(xfit, yfit0[0], \"--\", xfit, yfit0[1], \"--\")\n", "plt.grid(True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### lmfit constraints aiming for generality\n", "\n", "I believe a name convention would be more robust than relying on\n", "OrderedDict Params object." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "[\"S0\", \"1\"][0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def exception_fcn_handler(func):\n", " def inner_function(*args, **kwargs):\n", " try:\n", " return func(*args, **kwargs)\n", " except TypeError:\n", " print(\n", " f\"{func.__name__} only takes (1D) vector as argument besides lmfit.Parameters.\"\n", " )\n", "\n", " return inner_function\n", "\n", "\n", "@exception_fcn_handler\n", "def titration_pH(params, pH):\n", " p = {k.split(\"_\")[0]: v for k, v in params.items()}\n", " return (p[\"S0\"] + p[\"S1\"] * 10 ** (p[\"K\"] - pH)) / (1 + 10 ** (p[\"K\"] - pH))\n", "\n", "\n", "def residues(params, x, y, fcn):\n", " return y - fcn(params, x)\n", "\n", "\n", "p1 = lmfit.Parameters()\n", "p2 = lmfit.Parameters()\n", "p1.add(\"K_1\", value=7.0, min=2.0, max=12.0)\n", "p2.add(\"K_2\", value=7.0, min=2.0, max=12.0)\n", "p1.add(\"S0_1\", value=df.y1.iloc[0], min=0.0)\n", "p2.add(\"S0_2\", value=df.y2.iloc[0], min=0.0)\n", "p1.add(\"S1_1\", value=df.y1.iloc[-1], min=0.0)\n", "p2.add(\"S1_2\", value=df.y2.iloc[-1])\n", "\n", "print(\n", " residues(p1, np.array(df.x), [1.97, 1.8, 1.7, 0.1, 0.1, 0.16, 0.01], titration_pH)\n", ")\n", "\n", "\n", "def gobjective(params, xl, yl, fcnl):\n", " nset = len(xl)\n", " res = []\n", " for i in range(nset):\n", " pi = {k: v for k, v in params.valuesdict().items() if k[-1] == f\"{i + 1}\"}\n", " res = np.r_[res, residues(pi, xl[i], yl[i], fcnl[i])]\n", " # res = np.r_[res, yl[i] - fcnl[i](parsl[i], x[i])]\n", " return res\n", "\n", "\n", "print(gobjective(p1 + p2, [df.x, df.x], [df.y1, df.y2], [titration_pH, titration_pH]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here single." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mini = lmfit.Minimizer(\n", " residues,\n", " p1,\n", " fcn_args=(\n", " df.x,\n", " df.y1,\n", " titration_pH,\n", " ),\n", ")\n", "res = mini.minimize()\n", "\n", "fit = titration_pH(res.params, df.x)\n", "print(lmfit.report_fit(res))\n", "plt.plot(df.x, df.y1, \"o\", df.x, fit, \"--\")\n", "ci = lmfit.conf_interval(mini, res, sigmas=[1, 2])\n", "lmfit.printfuncs.report_ci(ci)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now global." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pg = p1 + p2\n", "pg[\"K_2\"].expr = \"K_1\"\n", "gmini = lmfit.Minimizer(\n", " gobjective,\n", " pg,\n", " fcn_args=([df.x[:], df.x], [df.y1[:], df.y2], [titration_pH, titration_pH]),\n", ")\n", "gres = gmini.minimize()\n", "print(lmfit.fit_report(gres))\n", "\n", "pp1 = {k: v for k, v in gres.params.valuesdict().items() if k.split(\"_\")[1] == f\"{1}\"}\n", "pp2 = {k: v for k, v in gres.params.valuesdict().items() if k.split(\"_\")[1] == f\"{2}\"}\n", "xfit = np.linspace(df.x.min(), df.x.max(), 100)\n", "yfit1 = titration_pH(pp1, xfit)\n", "yfit2 = titration_pH(pp2, xfit)\n", "plt.plot(df.x, df.y1, \"o\", xfit, yfit1, \"--\")\n", "plt.plot(df.x, df.y2, \"s\", xfit, yfit2, \"--\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ci = lmfit.conf_interval(gmini, gres)\n", "print(lmfit.ci_report(ci, with_offset=False, ndigits=2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To plot ci for the 5 parameters." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, axes = plt.subplots(1, 4, figsize=(24.2, 4.8), sharey=True)\n", "cx, cy, grid = lmfit.conf_interval2d(gmini, gres, \"S0_1\", \"K_1\", 25, 25)\n", "ctp = axes[0].contourf(cx, cy, grid, np.linspace(0, 1, 11))\n", "fig.colorbar(ctp, ax=axes[0])\n", "axes[0].set_xlabel(\"SA1\")\n", "axes[0].set_ylabel(\"pK1\")\n", "cx, cy, grid = lmfit.conf_interval2d(gmini, gres, \"S0_2\", \"K_1\", 25, 25)\n", "ctp = axes[1].contourf(cx, cy, grid, np.linspace(0, 1, 11))\n", "fig.colorbar(ctp, ax=axes[1])\n", "axes[1].set_xlabel(\"SA2\")\n", "axes[1].set_ylabel(\"pK1\")\n", "cx, cy, grid = lmfit.conf_interval2d(gmini, gres, \"S1_1\", \"K_1\", 25, 25)\n", "ctp = axes[2].contourf(cx, cy, grid, np.linspace(0, 1, 11))\n", "fig.colorbar(ctp, ax=axes[2])\n", "axes[2].set_xlabel(\"SB1\")\n", "axes[2].set_ylabel(\"pK1\")\n", "cx, cy, grid = lmfit.conf_interval2d(gmini, gres, \"S1_2\", \"K_1\", 25, 25)\n", "ctp = axes[3].contourf(cx, cy, grid, np.linspace(0, 1, 11))\n", "fig.colorbar(ctp, ax=axes[3])\n", "axes[3].set_xlabel(\"SB2\")\n", "axes[3].set_ylabel(\"pK1\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.plot(np.r_[df.x, df.x], gres.residual, \"o\")\n", "std = gres.residual.std()\n", "for i in range(-3, 4):\n", " plt.hlines(i * std, 5, 9, alpha=0.4)\n", "print(std)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This next block comes from:\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### lmfit.Model\n", "\n", "It took 9 vs 5 ms. It is not possible to do global fitting. In the\n", "documentation it is stressed the need to convert the output of the\n", "residue to be 1D vectors." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mod = lmfit.models.ExpressionModel(\"(SB + SA * 10**(pK-x)) / (1 + 10**(pK-x))\")\n", "result = mod.fit(np.array(df.y1), x=np.array(df.x), pK=7, SB=7e3, SA=10000)\n", "print(result.fit_report())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.plot(df.x, df.y1, \"o\")\n", "plt.plot(df.x, result.init_fit, \"--\", label=\"initial fit\")\n", "plt.plot(df.x, result.best_fit, \"-\", label=\"best fit\")\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(result.ci_report())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "which is faster but still I failed to find the way to global fitting." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def tit_pH(x, S0, S1, K):\n", " return (S0 + S1 * 10 ** (K - x)) / (1 + 10 ** (K - x))\n", "\n", "\n", "tit_model1 = lmfit.Model(tit_pH, prefix=\"ds1_\")\n", "tit_model2 = lmfit.Model(tit_pH, prefix=\"ds2_\")\n", "print(f\"parameter names: {tit_model1.param_names}\")\n", "print(f\"parameter names: {tit_model2.param_names}\")\n", "print(f\"independent variables: {tit_model1.independent_vars}\")\n", "print(f\"independent variables: {tit_model2.independent_vars}\")\n", "\n", "tit_model1.set_param_hint(\"K\", value=7.0, min=2.0, max=12.0)\n", "tit_model1.set_param_hint(\"S0\", value=df.y1[0], min=0.0)\n", "tit_model1.set_param_hint(\"S1\", value=df.y1.iloc[-1], min=0.0)\n", "tit_model2.set_param_hint(\"K\", value=7.0, min=2.0, max=12.0)\n", "tit_model2.set_param_hint(\"S0\", value=df.y1[0], min=0.0)\n", "tit_model2.set_param_hint(\"S1\", value=df.y1.iloc[-1], min=0.0)\n", "pars1 = tit_model1.make_params()\n", "pars2 = tit_model2.make_params()\n", "# gmodel = tit_model1 + tit_model2\n", "# result = gmodel.fit(df.y1 + df.y2, pars, x=df.x)\n", "res1 = tit_model1.fit(df.y1, pars1, x=df.x)\n", "res2 = tit_model2.fit(df.y2, pars2, x=df.x)\n", "print(res1.fit_report())\n", "print(res2.fit_report())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xfit_delta = (df.x.max() - df.x.min()) / 100\n", "xfit = np.arange(df.x.min() - xfit_delta, df.x.max() + xfit_delta, xfit_delta)\n", "dely1 = res1.eval_uncertainty(x=xfit) * 1\n", "dely2 = res2.eval_uncertainty(x=xfit) * 1\n", "best_fit1 = res1.eval(x=xfit)\n", "best_fit2 = res2.eval(x=xfit)\n", "plt.plot(df.x, df.y1, \"o\")\n", "plt.plot(df.x, df.y2, \"o\")\n", "plt.plot(xfit, best_fit1, \"-.\")\n", "plt.plot(xfit, best_fit2, \"-.\")\n", "plt.fill_between(xfit, best_fit1 - dely1, best_fit1 + dely1, color=\"#FEDCBA\", alpha=0.5)\n", "plt.fill_between(xfit, best_fit2 - dely2, best_fit2 + dely2, color=\"#FEDCBA\", alpha=0.5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Please mind the difference in the uncertainty between the 2 label\n", "blocks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def tit_pH2(x, S0_1, S0_2, S1_1, S1_2, K):\n", " y1 = (S0_1 + S1_1 * 10 ** (K - x)) / (1 + 10 ** (K - x))\n", " y2 = (S0_2 + S1_2 * 10 ** (K - x)) / (1 + 10 ** (K - x))\n", " # return y1, y2\n", " return np.r_[y1, y2]\n", "\n", "\n", "tit_model = lmfit.Model(tit_pH2)\n", "tit_model.set_param_hint(\"K\", value=7.0, min=2.0, max=12.0)\n", "tit_model.set_param_hint(\"S0_1\", value=df.y1[0], min=0.0)\n", "tit_model.set_param_hint(\"S0_2\", value=df.y2[0], min=0.0)\n", "tit_model.set_param_hint(\"S1_1\", value=df.y1.iloc[-1], min=0.0)\n", "tit_model.set_param_hint(\"S1_2\", value=df.y2.iloc[-1], min=0.0)\n", "pars = tit_model.make_params()\n", "# res = tit_model.fit([df.y1, df.y2], pars, x=df.x)\n", "res = tit_model.fit(np.r_[df.y1, df.y2], pars, x=df.x)\n", "print(res.fit_report())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dely = res.eval_uncertainty(x=xfit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def fit_pH(fp):\n", " df = pd.read_csv(fp)\n", "\n", " def tit_pH(x, SA, SB, pK):\n", " return (SB + SA * 10 ** (pK - x)) / (1 + 10 ** (pK - x))\n", "\n", " mod = lmfit.Model(tit_pH)\n", " pars = mod.make_params(SA=10000, SB=7e3, pK=7)\n", " result = mod.fit(df.y2, pars, x=df.x)\n", " return result, df.y2, df.x, mod\n", "\n", "\n", "# r,y,x,model = fit_pH(\"/home/dati/ibf/IBF/Database/Random mutag results/Liasan-analyses/2016-05-19/2014-02-20/pH/dat/C12.dat\")\n", "r, y, x, model = fit_pH(\"../../tests/data/H04.dat\")\n", "xfit = np.linspace(x.min(), x.max(), 50)\n", "dely = r.eval_uncertainty(x=xfit) * 1\n", "best_fit = r.eval(x=xfit)\n", "plt.plot(x, y, \"o\")\n", "plt.plot(xfit, best_fit, \"-.\")\n", "plt.fill_between(xfit, best_fit - dely, best_fit + dely, color=\"#FEDCBA\", alpha=0.5)\n", "r.conf_interval(sigmas=[2])\n", "print(r.ci_report(with_offset=False, ndigits=2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "g = r.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(r.ci_report())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### using clophfit.binding" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dictionary = df.loc[:, [\"y1\", \"y2\"]].to_dict(orient=\"series\")\n", "dictionary = {key: value.to_numpy() for key, value in dictionary.items()}\n", "ds_data = {k: DataArray(df.x.to_numpy(), v) for k, v in dictionary.items()}\n", "ds = Dataset(ds_data, True)\n", "weight_multi_ds_titration(ds)\n", "f_res2 = fit_binding_glob(ds)\n", "f_res2.figure" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emcee_res2 = f_res2.mini.emcee(\n", " steps=4000, burn=100, workers=8, nwalkers=40, seed=1, progress=False\n", ")\n", "plot_emcee(emcee_res2.flatchain)\n", "print_emcee(emcee_res2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model: example 2P Cl–ratio" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "filepath = \"../../tests/data/ratio2P.txt\"\n", "df = pd.read_table(filepath)\n", "\n", "\n", "def R_Cl(cl, R0, R1, K):\n", " return (R1 * cl + R0 * K) / (K + cl)\n", "\n", "\n", "def fit_Rcl(df):\n", " mod = lmfit.Model(R_Cl)\n", " # pars = mod.make_params(R0=0.8, R1=0.05, K=10)\n", " pars = lmfit.Parameters()\n", " pars.add(\"R0\", value=df.R[0], min=0.2, max=1.2)\n", " pars.add(\"R1\", value=0.05, min=-0.4, max=0.6)\n", " pars.add(\"K\", value=10, min=0, max=60)\n", " result = mod.fit(df.R, pars, cl=df.cl)\n", " return result, mod\n", "\n", "\n", "def plot_fit(result, x, y):\n", " \"\"\"Plot the original data and the best fit line with uncertainty.\"\"\"\n", " xfit = np.linspace(x.min(), x.max(), 50)\n", " dely = result.eval_uncertainty(cl=xfit) * 3\n", " best_fit = result.eval(cl=xfit)\n", " plt.plot(x, y, \"o\")\n", " plt.grid()\n", " plt.plot(xfit, best_fit, \"-.\")\n", " plt.fill_between(xfit, best_fit - dely, best_fit + dely, color=\"#FEDCBA\", alpha=0.5)\n", " result.conf_interval(sigmas=[2])\n", " print(result.ci_report(with_offset=False, ndigits=2))\n", "\n", "\n", "result, model = fit_Rcl(df)\n", "plot_fit(result, df.cl, df.R)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emcee_kws = {\n", " \"is_weighted\": False,\n", " \"steps\": 2000,\n", " \"burn\": 150,\n", " \"thin\": 20,\n", " \"nwalkers\": 60,\n", " \"seed\": 11,\n", " \"workers\": 8,\n", " \"progress\": False,\n", "}\n", "emcee_params = result.params.copy()\n", "emcee_params.add(\"__lnsigma\", value=np.log(0.1), min=np.log(0.001), max=np.log(2.0))\n", "\n", "result_emcee = model.fit(\n", " data=df.R,\n", " cl=df.cl,\n", " params=emcee_params,\n", " method=\"emcee\",\n", " scale_covar=1,\n", " nan_policy=\"omit\",\n", " fit_kws=emcee_kws,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lmfit.report_fit(result_emcee)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "f = plot_emcee(result_emcee.flatchain)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print_emcee(result_emcee)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def residual(pars, x, data=None):\n", " \"\"\"Model a decaying sine wave and subtract data.\"\"\"\n", " vals = pars.valuesdict()\n", " R0 = vals[\"R0\"]\n", " R1 = vals[\"R1\"]\n", " K = vals[\"K\"]\n", " model = R_Cl(x, R0, R1, K)\n", " if data is None:\n", " return model\n", " return model - data\n", "\n", "\n", "params = lmfit.Parameters()\n", "params.add(\"R0\", value=df[\"R\"][0], min=0, max=1)\n", "params.add(\"R1\", value=df[\"R\"].iloc[-1], min=0, max=0.2)\n", "target_y = (df[\"R\"][0] + df[\"R\"].iloc[-1]) / 2\n", "k_initial = df[\"cl\"][np.argmin(np.abs(df[\"R\"] - target_y))]\n", "params.add(\"K\", value=k_initial, min=3, max=30)\n", "mini = lmfit.Minimizer(residual, params, fcn_args=(df[\"cl\"], df[\"R\"]))\n", "result = mini.minimize()\n", "# Print a report of the fit\n", "lmfit.report_fit(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emcee_params.add(\"__lnsigma\", value=np.log(0.1), min=np.log(0.01), max=np.log(0.1))\n", "\n", "emcee_res3 = mini.emcee(\n", " steps=4000,\n", " burn=300,\n", " workers=16,\n", " nwalkers=30,\n", " seed=1,\n", " is_weighted=False,\n", " progress=False,\n", ")\n", "plot_emcee(emcee_res3.flatchain)\n", "print_emcee(emcee_res3)\n", "# out = lmfit.minimize(residual, params, args=(df[\"cl\"],), kws={'data': df[\"R\"]})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pymc as pm\n", "\n", "with pm.Model() as model:\n", " R0 = pm.Normal(\"R0\", mu=result.params[\"R0\"].value, sigma=result.params[\"R0\"].stderr)\n", " R1 = pm.Normal(\"R1\", mu=result.params[\"R1\"].value, sigma=result.params[\"R1\"].stderr)\n", " K = pm.Normal(\"K\", mu=result.params[\"K\"].value, sigma=result.params[\"K\"].stderr)\n", "\n", " y_pred = pm.Deterministic(\"y_pred\", R_Cl(df[\"cl\"], R0, R1, K))\n", "\n", " likelihood = pm.Normal(\"y\", mu=y_pred, sigma=1, observed=df[\"R\"])\n", "\n", " trace = pm.sample(2000, tune=2000, chains=6, cores=8)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "R0_samples = trace.posterior[\"R0\"]\n", "az.hdi(R0_samples, hdi_prob=0.95)[\"R0\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Extract the posterior samples for the parameters of interest\n", "K_samples = trace.posterior[\"K\"].to_numpy().flatten()\n", "R0_samples = trace.posterior[\"R0\"].to_numpy().flatten()\n", "R1_samples = trace.posterior[\"R1\"].to_numpy().flatten()\n", "# Ensure the samples are in the correct format for the corner plot\n", "samples_array = np.column_stack([K_samples, R0_samples, R1_samples])\n", "# Plot the corner plot\n", "f = corner.corner(samples_array, labels=[\"K\", \"R0\", \"R1\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ax = az.plot_pair(\n", " trace,\n", " divergences=1,\n", " var_names=[\"K\", \"R0\", \"R1\"],\n", " kind=[\"kde\", \"scatter\"],\n", " kde_kwargs={\"fill_last\": False},\n", " marginals=True,\n", " point_estimate=\"mean\",\n", " figsize=(9, 9),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pm.plot_trace(trace)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pm.plot_posterior(\n", " trace,\n", " var_names=[\"R0\", \"R1\", \"K\"],\n", " figsize=(12, 4),\n", " textsize=12,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 4 }