{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# nima_io: Microscopy Data Reading Tutorial\n",
    "\n",
    "This notebook demonstrates reading various microscopy file formats,\n",
    "comparing `nima_io` (bioio-based) with `tifffile`, and inspecting\n",
    "OME metadata for all available test data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "from pathlib import Path\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import tifffile\n",
    "\n",
    "import nima_io.read as ir\n",
    "\n",
    "tdata = Path(\"../../tests/data\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Single-scene OME-TIFF\n",
    "\n",
    "A simple multi-channel time-series OME-TIFF with known structure:\n",
    "5 timepoints, 3 channels, 17x13 pixels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# nima_io: returns xarray.DataArray backed by dask\n",
    "da = ir.read_image(str(tdata / \"im1s1z3c5t_a.ome.tif\"))\n",
    "print(f\"dims={da.dims}, shape={da.shape}, dtype={da.dtype}\")\n",
    "da.data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Access OME metadata from attrs\n",
    "da.attrs[\"ome_metadata\"].images[0].pixels.channels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Structured metadata (consolidated from OME)\n",
    "md = da.attrs[\"metadata\"]\n",
    "print(md)\n",
    "print()\n",
    "print(f\"Objective: {md.objective[0]}\")\n",
    "print(f\"Pixel size: {md.voxel_size[0]}\")\n",
    "print(f\"Date: {md.date[0]}\")\n",
    "print()\n",
    "for ci, ch in enumerate(md.channels[0]):\n",
    "    print(\n",
    "        f\"Ch[{ci}]: wavelength={ch.wavelength}nm, \"\n",
    "        f\"attenuation={ch.attenuation}, \"\n",
    "        f\"exposure={ch.exposure}s, \"\n",
    "        f\"gain={ch.gain}, \"\n",
    "        f\"binning={ch.binning}\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tifffile comparison\n",
    "with tifffile.TiffFile(tdata / \"im1s1z3c5t_a.ome.tif\") as tif:\n",
    "    print(f\"Series: {len(tif.series)}\")\n",
    "    s = tif.series[0]\n",
    "    print(f\"shape={s.shape}, axes={s.axes}, dtype={s.dtype}\")\n",
    "    print(f\"OME: {tif.is_ome}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OME Metadata\n",
    "\n",
    "Access the full OME metadata object via bioio."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bioio import BioImage\n",
    "\n",
    "img = BioImage(tdata / \"im1s1z3c5t_a.ome.tif\")\n",
    "ome = img.ome_metadata\n",
    "px = ome.images[0].pixels\n",
    "print(f\"Image: {ome.images[0].name or ome.images[0].id}\")\n",
    "print(\n",
    "    f\"Dims: X={px.size_x}, Y={px.size_y}, C={px.size_c}, T={px.size_t}, Z={px.size_z}\"\n",
    ")\n",
    "print(f\"Pixel sizes: {img.physical_pixel_sizes}\")\n",
    "print(\"Channels:\")\n",
    "for ci, ch in enumerate(px.channels):\n",
    "    ls = ch.light_source_settings\n",
    "    wl = ls.wavelength if ls else None\n",
    "    print(f\"  [{ci}] id={ch.id}, wavelength={wl}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Per-channel acquisition settings\n",
    "\n",
    "Exposure time and timestamps live in `planes` (one per T/C/Z\n",
    "combination). Channel-level settings (wavelength, attenuation,\n",
    "binning, gain) are on the `Channel` object. Combine both for\n",
    "a complete per-channel summary."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Per-channel acquisition summary\n",
    "for ci, ch in enumerate(px.channels):\n",
    "    ls = ch.light_source_settings\n",
    "    ds = ch.detector_settings\n",
    "    # Exposure from the first plane of this channel\n",
    "    plane = next(p for p in px.planes if p.the_c == ci)\n",
    "    print(\n",
    "        f\"Ch[{ci}]: \"\n",
    "        f\"wl={ls.wavelength if ls else None}, \"\n",
    "        f\"att={ls.attenuation if ls else None}, \"\n",
    "        f\"exposure={plane.exposure_time}, \"\n",
    "        f\"binning={ds.binning if ds else None}, \"\n",
    "        f\"gain={ds.gain if ds else None}\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Channel naming\n",
    "\n",
    "Assign semantic channel names for ratio analysis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da_named = ir.read_image(\n",
    "    str(tdata / \"im1s1z3c5t_a.ome.tif\"),\n",
    "    channels=[\"G\", \"R\", \"C\"],\n",
    ")\n",
    "print(f\"Channel coords: {list(da_named.coords['C'].values)}\")\n",
    "da_named.sel(C=\"G\", T=0).data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Multi-channel time-series OME-TIFF\n",
    "\n",
    "A 7-timepoint, 3-channel image without wavelength metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da_mcts = ir.read_image(str(tdata / \"multi-channel-time-series.ome.tif\"))\n",
    "print(f\"dims={da_mcts.dims}, shape={da_mcts.shape}, dtype={da_mcts.dtype}\")\n",
    "\n",
    "img_mcts = BioImage(tdata / \"multi-channel-time-series.ome.tif\")\n",
    "print(f\"Channel names: {img_mcts.channel_names}\")\n",
    "print(f\"Pixel sizes: {img_mcts.physical_pixel_sizes}\")\n",
    "print(f\"Image name: {img_mcts.ome_metadata.images[0].name}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. File sequences with tifffile.TiffSequence\n",
    "\n",
    "For sets of related TIFF files, `tifffile.TiffSequence` stacks them.\n",
    "This is useful when acquisitions split across multiple files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fp_glob = str(tdata / \"im1s1z3c5t_?.ome.tif\")\n",
    "\n",
    "tifs = tifffile.TiffSequence(fp_glob)\n",
    "d = tifs.asarray()\n",
    "print(f\"Glob matched {len(tifs)} files\")\n",
    "print(f\"Stacked shape: {d.shape}\")\n",
    "print(\"Individual files:\")\n",
    "for f in sorted(tifs):\n",
    "    print(f\"  {Path(f).name}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Tiled images (FEI multi-scene)\n",
    "\n",
    "FEI microscopes save tiled acquisitions as multi-scene OME-TIFFs.\n",
    "Each scene is one tile with stage position metadata."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4a. Regular tile grid (t4_1.tif)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# bioio sees each tile as a separate scene\n",
    "img_tile = BioImage(tdata / \"t4_1.tif\")\n",
    "print(f\"Scenes: {len(img_tile.scenes)}\")\n",
    "print(f\"Per-tile shape: {img_tile.shape}\")\n",
    "print(f\"Pixel sizes: {img_tile.physical_pixel_sizes}\")\n",
    "\n",
    "# tifffile comparison\n",
    "with tifffile.TiffFile(tdata / \"t4_1.tif\") as tif:\n",
    "    print(f\"\\ntifffile series: {len(tif.series)}\")\n",
    "    print(f\"Per-series shape: {tif.series[0].shape}, axes={tif.series[0].axes}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stitch into a single DataArray\n",
    "stitched = ir.stitch_scenes(str(tdata / \"t4_1.tif\"))\n",
    "print(f\"Stitched: dims={stitched.dims}, shape=T{stitched.sizes['T']}\")\n",
    "print(f\"  Y={stitched.sizes['Y']}, X={stitched.sizes['X']}\")\n",
    "stitched.data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Tilemap shows scene layout (row, col) -> scene_index\n",
    "import numpy as np\n",
    "\n",
    "tilemap = stitched.attrs[\"tilemap\"]\n",
    "print(f\"Tile grid: {tilemap.shape[0]} rows x {tilemap.shape[1]} cols\")\n",
    "print(tilemap)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4b. Tile grid with void tiles (tile6_1.tif)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stitched_void = ir.stitch_scenes(str(tdata / \"tile6_1.tif\"))\n",
    "print(f\"Stitched: Y={stitched_void.sizes['Y']}, X={stitched_void.sizes['X']}\")\n",
    "\n",
    "tilemap_void = stitched_void.attrs[\"tilemap\"]\n",
    "print(f\"Tile grid ({tilemap_void.shape}):\")\n",
    "print(tilemap_void)\n",
    "print(f\"Void tiles (=-1): {np.sum(tilemap_void == -1)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.imshow(stitched_void.sel(T=1, Z=0, C=1), cmap=\"Reds\", vmax=1000, vmin=1)\n",
    "plt.colorbar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Stage positions\n",
    "\n",
    "OME metadata provides physical stage positions for each tile."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ome_tile = BioImage(tdata / \"tile6_1.tif\").ome_metadata\n",
    "print(f\"{'Scene':>5} {'X pos':>10} {'Y pos':>10}\")\n",
    "for i, im in enumerate(ome_tile.images):\n",
    "    p = im.pixels.planes[0]\n",
    "    print(f\"{i:5d} {float(p.position_x):10.2f} {float(p.position_y):10.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. TF8 format (.tf8)\n",
    "\n",
    "TF8 files are TIFFs with a non-standard extension.\n",
    "`nima_io` handles this transparently via a temp symlink."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da_tf8 = ir.read_image(str(tdata / \"LC26GFP_1.tf8\"))\n",
    "print(f\"dims={da_tf8.dims}, shape={da_tf8.shape}, dtype={da_tf8.dtype}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Large single-scene image (exp2_2.tif)\n",
    "\n",
    "81 timepoints, 2 channels (340nm/380nm ratiometric), 1200x1600 pixels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_exp = BioImage(tdata / \"exp2_2.tif\")\n",
    "print(f\"Shape: {img_exp.shape}\")\n",
    "print(f\"Pixel sizes: {img_exp.physical_pixel_sizes}\")\n",
    "\n",
    "ome_exp = img_exp.ome_metadata\n",
    "px_exp = ome_exp.images[0].pixels\n",
    "for ci, ch in enumerate(px_exp.channels):\n",
    "    ls = ch.light_source_settings\n",
    "    wl = ls.wavelength if ls else None\n",
    "    print(f\"  Ch[{ci}]: wavelength={wl}\")\n",
    "\n",
    "# Instrument metadata\n",
    "if ome_exp.instruments:\n",
    "    inst = ome_exp.instruments[0]\n",
    "    if inst.objectives:\n",
    "        obj = inst.objectives[0]\n",
    "        print(f\"Objective: NA={obj.lens_na}, mag={obj.nominal_magnification}\")\n",
    "    if inst.detectors:\n",
    "        print(f\"Detector: {inst.detectors[0].model}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Lazy read - no data loaded until .values or .compute()\n",
    "da_exp = ir.read_image(str(tdata / \"exp2_2.tif\"))\n",
    "print(f\"Lazy DataArray: {da_exp.dims}, {da_exp.shape}\")\n",
    "print(f\"Dask chunks: {da_exp.data.chunks}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Leica LIF files\n",
    "\n",
    "Multi-scene confocal Z-stacks read via bioio-lif (pure Python, no Java)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_lif = BioImage(tdata / \"2015Aug28_TransHXB2_50min+DMSO.lif\")\n",
    "print(f\"Reader: {type(img_lif.reader).__module__}\")\n",
    "print(f\"Scenes ({len(img_lif.scenes)}): {img_lif.scenes}\")\n",
    "\n",
    "for si, scene in enumerate(img_lif.scenes):\n",
    "    img_lif.set_scene(si)\n",
    "    print(\n",
    "        f\"  {scene}: shape={img_lif.shape}, \"\n",
    "        f\"channels={img_lif.channel_names}, \"\n",
    "        f\"voxel_z={img_lif.physical_pixel_sizes.Z}\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. File comparison (diff)\n",
    "\n",
    "Compare two files for pixel-level equality."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = str(tdata / \"im1s1z3c5t_a.ome.tif\")\n",
    "b = str(tdata / \"im1s1z3c5t_b.ome.tif\")\n",
    "bpix = str(tdata / \"im1s1z3c5t_bpix.ome.tif\")\n",
    "\n",
    "print(f\"a vs a (identical):  {ir.diff(a, a)}\")\n",
    "print(f\"a vs b (same data):  {ir.diff(a, b)}\")\n",
    "print(f\"a vs bpix (1px off): {ir.diff(a, bpix)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Backend comparison: nima_io vs tifffile\n",
    "\n",
    "Key differences between reading with `nima_io` (bioio) and raw `tifffile`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tifffile: raw arrays, manual dimension handling\n",
    "with tifffile.TiffFile(tdata / \"t4_1.tif\") as tif:\n",
    "    # Each series is a tile - tifffile doesn't auto-stitch\n",
    "    tf_data = tif.series[0].asarray()\n",
    "    print(f\"tifffile single series: shape={tf_data.shape}, axes={tif.series[0].axes}\")\n",
    "\n",
    "# nima_io: auto-stitched, named dims, lazy\n",
    "nio_data = ir.stitch_scenes(str(tdata / \"t4_1.tif\"))\n",
    "print(f\"nima_io stitched:     shape={dict(nio_data.sizes)}\")\n",
    "print(f\"  lazy (dask):        {type(nio_data.data).__name__}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tifffile: reading OME metadata requires manual XML parsing\n",
    "with tifffile.TiffFile(tdata / \"im1s1z3c5t_a.ome.tif\") as tif:\n",
    "    # tifffile exposes raw OME-XML string\n",
    "    ome_xml = tif.ome_metadata  # raw XML string\n",
    "    print(f\"tifffile OME-XML: {type(ome_xml).__name__}, {len(ome_xml)} chars\")\n",
    "\n",
    "# nima_io/bioio: parsed OME object with typed attributes\n",
    "img = BioImage(tdata / \"im1s1z3c5t_a.ome.tif\")\n",
    "ome = img.ome_metadata  # ome_types.OME object\n",
    "print(f\"bioio OME: {type(ome).__name__}\")\n",
    "print(f\"  images: {len(ome.images)}\")\n",
    "print(f\"  instruments: {len(ome.instruments)}\")\n",
    "ch0 = ome.images[0].pixels.channels[0]\n",
    "wl = ch0.light_source_settings.wavelength\n",
    "print(f\"  channels[0].wavelength: {wl}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}