From 23ab6095add56479f07d058b714e12a62d90d13e Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Thu, 19 Feb 2026 11:42:16 -0700
Subject: [PATCH 01/10] add lazy read capabilities

---
 README.md                 |  15 ++
 docs/src/dlpack.md        |  15 ++
 src/ome_arrow/__init__.py |   2 +-
 src/ome_arrow/core.py     | 383 +++++++++++++++++++++++++++++++-------
 src/ome_arrow/tensor.py   | 251 ++++++++++++++++++++++++-
 tests/test_core.py        |  42 +++++
 tests/test_tensor.py      |  24 ++-
 7 files changed, 663 insertions(+), 69 deletions(-)

diff --git a/README.md b/README.md
index 1a2a8bf..a9bed3c 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,21 @@ for cap in view3d.iter_tiles_3d(tile_size=(2, 64, 64), mode="numpy"):
     pass
 ```
 
+Lazy scan-style convention (Polars-like):
+
+```python
+from ome_arrow import OMEArrow
+
+oa = OMEArrow.scan("your_image.ome.parquet")  # deferred load
+# First: queue lazy spatial/index slicing
+lazy_crop = oa.slice_lazy(0, 512, 0, 512).slice_lazy(64, 256, 64, 256)
+cropped = lazy_crop.collect()
+
+# Then: build lazy tensor views from a source scan
+lazy_view = oa.tensor_view(t=0, z=slice(0, 4), roi=(0, 0, 512, 512))
+arr = lazy_view.to_numpy()
+```
+
 Advanced options:
 
 - `chunk_policy="auto" | "combine" | "keep"` controls ChunkedArray handling.
diff --git a/docs/src/dlpack.md b/docs/src/dlpack.md
index 01947d7..aaf15b7 100644
--- a/docs/src/dlpack.md
+++ b/docs/src/dlpack.md
@@ -41,6 +41,21 @@ flat = torch.utils.dlpack.from_dlpack(capsule)
 tensor = flat.reshape(view.shape)
 ```
 
+## Lazy scan-style slicing
+
+```python
+from ome_arrow import OMEArrow
+
+obj = OMEArrow.scan("example.ome.parquet")
+# Prioritize lazy slice planning first.
+lazy_crop = obj.slice_lazy(0, 512, 0, 512).slice_lazy(64, 256, 64, 256)
+cropped = lazy_crop.collect()
+
+# Then execute lazy tensor selections.
+lazy_view = obj.tensor_view(t=0, z=slice(0, 8), roi=(128, 128, 256, 256))
+arr = lazy_view.to_numpy()
+```
+
 ## JAX
 
 ```python
diff --git a/src/ome_arrow/__init__.py b/src/ome_arrow/__init__.py
index 509201f..a446e30 100644
--- a/src/ome_arrow/__init__.py
+++ b/src/ome_arrow/__init__.py
@@ -20,7 +20,7 @@
     to_ome_arrow,
 )
 from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION
-from ome_arrow.tensor import TensorView
+from ome_arrow.tensor import LazyTensorView, TensorView
 from ome_arrow.utils import describe_ome_arrow, verify_ome_arrow
 from ome_arrow.view import view_matplotlib, view_pyvista
 
diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py
index ad975cc..e7f40c1 100644
--- a/src/ome_arrow/core.py
+++ b/src/ome_arrow/core.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import pathlib
+from dataclasses import dataclass
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -36,7 +37,7 @@
     from_tiff,
 )
 from ome_arrow.meta import OME_ARROW_STRUCT
-from ome_arrow.tensor import TensorView
+from ome_arrow.tensor import LazyTensorView, TensorView
 from ome_arrow.transform import slice_ome_arrow
 from ome_arrow.utils import describe_ome_arrow
 from ome_arrow.view import view_matplotlib, view_pyvista
@@ -46,6 +47,30 @@
     import pyvista
 
 
+@dataclass(frozen=True)
+class _LazySourceSpec:
+    """Deferred source description for lazy OMEArrow loading."""
+
+    data: str
+    column_name: str
+    row_index: int
+    image_type: str | None
+
+
+@dataclass(frozen=True)
+class _LazySliceSpec:
+    """Deferred spatial/index slice specification."""
+
+    x_min: int
+    x_max: int
+    y_min: int
+    y_max: int
+    t_indices: tuple[int, ...] | None
+    c_indices: tuple[int, ...] | None
+    z_indices: tuple[int, ...] | None
+    fill_missing: bool
+
+
 class OMEArrow:
     """
     Small convenience toolkit for working with ome-arrow data.
@@ -70,6 +95,7 @@ def __init__(
         column_name: str = "ome_arrow",
         row_index: int = 0,
         image_type: str | None = None,
+        lazy: bool = False,
     ) -> None:
         """
         Construct an OMEArrow from:
@@ -83,11 +109,26 @@ def __init__(
         - a dict already matching the OME-Arrow schema
         - a pa.StructScalar already typed to OME_ARROW_STRUCT
         - optionally override/set image_type metadata on ingest
+        - optionally defer source-file ingestion with lazy=True
         """
 
         # set the tcz for viewing
         self.tcz = tcz
+        self._data: pa.StructScalar | None = None
         self._struct_array: pa.StructArray | None = None
+        self._lazy_source: _LazySourceSpec | None = None
+        self._lazy_slices: list[_LazySliceSpec] = []
+
+        if lazy:
+            if not isinstance(data, str):
+                raise TypeError("lazy=True currently supports only string file inputs.")
+            self._lazy_source = _LazySourceSpec(
+                data=data,
+                column_name=column_name,
+                row_index=row_index,
+                image_type=image_type,
+            )
+            return
 
         # --- 1) Stack pattern (Bio-Formats-style) --------------------------------
         if isinstance(data, str) and any(c in data for c in "<>*"):
@@ -101,69 +142,12 @@ def __init__(
 
         # --- 2) String path/URL: OME-Zarr / OME-Parquet / OME-TIFF ---------------
         elif isinstance(data, str):
-            s = data.strip()
-            path = pathlib.Path(s)
-
-            # Zarr detection
-            if (
-                s.lower().endswith(".zarr")
-                or s.lower().endswith(".ome.zarr")
-                or ".zarr/" in s.lower()
-                or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr")
-            ):
-                self.data = from_ome_zarr(s)
-                if image_type is not None:
-                    self.data = self._wrap_with_image_type(self.data, image_type)
-
-            # OME-Parquet
-            elif s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in {
-                ".parquet",
-                ".pq",
-            }:
-                parquet_result = from_ome_parquet(
-                    s,
-                    column_name=column_name,
-                    row_index=row_index,
-                    return_array=True,
-                )
-                self.data, self._struct_array = parquet_result
-                if image_type is not None:
-                    self.data = self._wrap_with_image_type(self.data, image_type)
-
-            # Vortex
-            elif s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex":
-                vortex_result = from_ome_vortex(
-                    s,
-                    column_name=column_name,
-                    row_index=row_index,
-                    return_array=True,
-                )
-                self.data, self._struct_array = vortex_result
-                if image_type is not None:
-                    self.data = self._wrap_with_image_type(self.data, image_type)
-
-            # TIFF
-            elif path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith(
-                (".tif", ".tiff")
-            ):
-                self.data = from_tiff(s)
-                if image_type is not None:
-                    self.data = self._wrap_with_image_type(self.data, image_type)
-
-            elif path.exists() and path.is_dir():
-                raise ValueError(
-                    f"Directory '{s}' exists but does not look like an OME-Zarr store "
-                    "(expected suffix '.zarr' or '.ome.zarr')."
-                )
-            else:
-                raise ValueError(
-                    "String input must be one of:\n"
-                    "  • Bio-Formats pattern string (contains '<', '>' or '*')\n"
-                    "  • OME-Zarr path/URL ending with '.zarr' or '.ome.zarr'\n"
-                    "  • OME-Parquet file ending with '.parquet' or '.pq'\n"
-                    "  • Vortex file ending with '.vortex'\n"
-                    "  • OME-TIFF path/URL ending with '.tif' or '.tiff'"
-                )
+            self.data, self._struct_array = self._load_from_string_source(
+                data,
+                column_name=column_name,
+                row_index=row_index,
+                image_type=image_type,
+            )
 
         # --- 3) NumPy ndarray ----------------------------------------------------
         elif isinstance(data, np.ndarray):
@@ -191,6 +175,178 @@ def __init__(
                 "input data must be str, dict, pa.StructScalar, or numpy.ndarray"
             )
 
+    @classmethod
+    def scan(
+        cls,
+        data: str,
+        *,
+        tcz: Tuple[int, int, int] = (0, 0, 0),
+        column_name: str = "ome_arrow",
+        row_index: int = 0,
+        image_type: str | None = None,
+    ) -> "OMEArrow":
+        """Create a lazily-loaded OMEArrow, similar to Polars scan semantics.
+
+        Args:
+            data: Input source path/URL.
+            tcz: Default `(t, c, z)` indices used for view helpers.
+            column_name: OME-Arrow column name for tabular sources.
+            row_index: Row index for tabular sources.
+            image_type: Optional image type override.
+
+        Returns:
+            OMEArrow: Lazily planned OMEArrow instance.
+        """
+        return cls(
+            data=data,
+            tcz=tcz,
+            column_name=column_name,
+            row_index=row_index,
+            image_type=image_type,
+            lazy=True,
+        )
+
+    @property
+    def is_lazy(self) -> bool:
+        """Return whether this instance still has deferred work."""
+        return self._lazy_source is not None or bool(self._lazy_slices)
+
+    @property
+    def data(self) -> pa.StructScalar:
+        """Return the materialized OME-Arrow StructScalar.
+
+        Returns:
+            pa.StructScalar: Materialized OME-Arrow record.
+
+        Raises:
+            RuntimeError: If the record could not be initialized.
+        """
+        self._ensure_materialized()
+        if self._data is None:
+            raise RuntimeError("OMEArrow data is not initialized.")
+        return self._data
+
+    @data.setter
+    def data(self, value: pa.StructScalar) -> None:
+        self._data = value
+
+    def collect(self) -> "OMEArrow":
+        """Materialize deferred source data and return ``self``.
+
+        Returns:
+            OMEArrow: The same instance after materialization.
+        """
+        self._ensure_materialized()
+        return self
+
+    @staticmethod
+    def _load_from_string_source(
+        data: str,
+        *,
+        column_name: str,
+        row_index: int,
+        image_type: str | None,
+    ) -> tuple[pa.StructScalar, pa.StructArray | None]:
+        s = data.strip()
+        path = pathlib.Path(s)
+        struct_array: pa.StructArray | None = None
+
+        if (
+            s.lower().endswith(".zarr")
+            or s.lower().endswith(".ome.zarr")
+            or ".zarr/" in s.lower()
+            or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr")
+        ):
+            scalar = from_ome_zarr(s)
+            if image_type is not None:
+                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
+            return scalar, None
+
+        if s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in {
+            ".parquet",
+            ".pq",
+        }:
+            parquet_result = from_ome_parquet(
+                s,
+                column_name=column_name,
+                row_index=row_index,
+                return_array=True,
+            )
+            scalar, struct_array = parquet_result
+            if image_type is not None:
+                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
+            return scalar, struct_array
+
+        if s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex":
+            vortex_result = from_ome_vortex(
+                s,
+                column_name=column_name,
+                row_index=row_index,
+                return_array=True,
+            )
+            scalar, struct_array = vortex_result
+            if image_type is not None:
+                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
+            return scalar, struct_array
+
+        if path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith(
+            (".tif", ".tiff")
+        ):
+            scalar = from_tiff(s)
+            if image_type is not None:
+                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
+            return scalar, None
+
+        if path.exists() and path.is_dir():
+            raise ValueError(
+                f"Directory '{s}' exists but does not look like an OME-Zarr store "
+                "(expected suffix '.zarr' or '.ome.zarr')."
+            )
+
+        raise ValueError(
+            "String input must be one of:\n"
+            "  • Bio-Formats pattern string (contains '<', '>' or '*')\n"
+            "  • OME-Zarr path/URL ending with '.zarr' or '.ome.zarr'\n"
+            "  • OME-Parquet file ending with '.parquet' or '.pq'\n"
+            "  • Vortex file ending with '.vortex'\n"
+            "  • OME-TIFF path/URL ending with '.tif' or '.tiff'"
+        )
+
+    def _ensure_materialized(self) -> None:
+        if self._lazy_source is None:
+            return
+        lazy_source = self._lazy_source
+        scalar, struct_array = self._load_from_string_source(
+            lazy_source.data,
+            column_name=lazy_source.column_name,
+            row_index=lazy_source.row_index,
+            image_type=lazy_source.image_type,
+        )
+        if self._lazy_slices:
+            data = scalar
+            for spec in self._lazy_slices:
+                data = slice_ome_arrow(
+                    data=data,
+                    x_min=spec.x_min,
+                    x_max=spec.x_max,
+                    y_min=spec.y_min,
+                    y_max=spec.y_max,
+                    t_indices=spec.t_indices,
+                    c_indices=spec.c_indices,
+                    z_indices=spec.z_indices,
+                    fill_missing=spec.fill_missing,
+                )
+            self.data = data
+            self._struct_array = None
+        else:
+            self.data, self._struct_array = scalar, struct_array
+        self._lazy_source = None
+        self._lazy_slices = []
+
+    def _tensor_source(self) -> pa.StructScalar | pa.StructArray:
+        self._ensure_materialized()
+        return self._struct_array if self._struct_array is not None else self.data
+
     @staticmethod
     def _wrap_with_image_type(
         data: pa.StructScalar, image_type: str
@@ -284,6 +440,8 @@ def export(  # noqa: PLR0911
         ValueError:
             Unknown 'how' or missing required params.
         """
+        self._ensure_materialized()
+
         # existing modes
         if how == "numpy":
             return to_numpy(self.data, dtype=dtype, strict=strict, clamp=clamp)
@@ -364,6 +522,7 @@ def info(self) -> Dict[str, Any]:
                 - type: classification string
                 - summary: human-readable text
         """
+        self._ensure_materialized()
         return describe_ome_arrow(self.data)
 
     def view(
@@ -455,6 +614,8 @@ def view(
 
         >>> plotter = obj.view(how="pyvista", clim=(100, 2000), show_axes=False)
         """
+        self._ensure_materialized()
+
         if how == "matplotlib":
             return view_matplotlib(
                 self.data,
@@ -529,7 +690,7 @@ def tensor_view(
         dtype: np.dtype | None = None,
         chunk_policy: Literal["auto", "combine", "keep"] = "auto",
         channel_policy: Literal["error", "first"] = "error",
-    ) -> TensorView:
+    ) -> TensorView | LazyTensorView:
         """Create a TensorView of the pixel data.
 
         Args:
@@ -551,7 +712,9 @@ def tensor_view(
                 "first" keeps the first channel.
 
         Returns:
-            TensorView: Tensor view over the selected pixels.
+            TensorView | LazyTensorView: Tensor view over selected pixels.
+            In lazy mode, this returns a deferred ``LazyTensorView`` that
+            materializes on first execution call (for example ``to_numpy()``).
 
         Raises:
             ValueError: If an unsupported scene is requested.
@@ -560,6 +723,21 @@ def tensor_view(
         if scene not in (None, 0):
             raise ValueError("Only scene=0 is supported for single-image records.")
 
+        if self._lazy_source is not None:
+            return LazyTensorView(
+                loader=self._tensor_source,
+                t=t,
+                z=z,
+                c=c,
+                roi=roi,
+                roi3d=roi3d,
+                tile=tile,
+                layout=layout,
+                dtype=dtype,
+                chunk_policy=chunk_policy,
+                channel_policy=channel_policy,
+            )
+
         # TensorView uses an internal canonical axis basis (TZCHW) for shape/stride
         # math, then applies the requested layout permutation for output.
         return TensorView(
@@ -608,6 +786,7 @@ def slice(
         OMEArrow object
             New OME-Arrow record with updated sizes and planes.
         """
+        self._ensure_materialized()
 
         return OMEArrow(
             data=slice_ome_arrow(
@@ -623,11 +802,83 @@ def slice(
             )
         )
 
+    def slice_lazy(
+        self,
+        x_min: int,
+        x_max: int,
+        y_min: int,
+        y_max: int,
+        t_indices: Optional[Iterable[int]] = None,
+        c_indices: Optional[Iterable[int]] = None,
+        z_indices: Optional[Iterable[int]] = None,
+        fill_missing: bool = True,
+    ) -> OMEArrow:
+        """Return a lazily planned slice, collected on first execution.
+
+        For lazy sources created with ``OMEArrow.scan(...)``, this queues a
+        deferred slice operation and returns a new lazy OMEArrow plan.
+        For already materialized sources, this falls back to eager ``slice()``.
+
+        Args:
+            x_min: Inclusive minimum X index for the crop.
+            x_max: Exclusive maximum X index for the crop.
+            y_min: Inclusive minimum Y index for the crop.
+            y_max: Exclusive maximum Y index for the crop.
+            t_indices: Optional time indices to retain.
+            c_indices: Optional channel indices to retain.
+            z_indices: Optional depth indices to retain.
+            fill_missing: Whether to zero-fill missing `(t, c, z)` planes.
+
+        Returns:
+            OMEArrow: Lazy plan when source is lazy; eager slice result otherwise.
+        """
+        if self._lazy_source is None:
+            return self.slice(
+                x_min=x_min,
+                x_max=x_max,
+                y_min=y_min,
+                y_max=y_max,
+                t_indices=t_indices,
+                c_indices=c_indices,
+                z_indices=z_indices,
+                fill_missing=fill_missing,
+            )
+
+        lazy_source = self._lazy_source
+        planned = OMEArrow.scan(
+            lazy_source.data,
+            tcz=self.tcz,
+            column_name=lazy_source.column_name,
+            row_index=lazy_source.row_index,
+            image_type=lazy_source.image_type,
+        )
+        planned._lazy_slices = [
+            *self._lazy_slices,
+            _LazySliceSpec(
+                x_min=int(x_min),
+                x_max=int(x_max),
+                y_min=int(y_min),
+                y_max=int(y_max),
+                t_indices=(
+                    None if t_indices is None else tuple(int(i) for i in t_indices)
+                ),
+                c_indices=(
+                    None if c_indices is None else tuple(int(i) for i in c_indices)
+                ),
+                z_indices=(
+                    None if z_indices is None else tuple(int(i) for i in z_indices)
+                ),
+                fill_missing=bool(fill_missing),
+            ),
+        ]
+        return planned
+
     def _repr_html_(self) -> str:
         """
         Auto-render a plane as inline PNG in Jupyter.
         """
         try:
+            self._ensure_materialized()
             view_matplotlib(
                 data=self.data,
                 tcz=self.tcz,
diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index a1d13f4..699d901 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -5,7 +5,7 @@
 import random
 import warnings
 from dataclasses import dataclass
-from typing import Any, Iterator, List, Literal, Sequence
+from typing import Any, Callable, Iterator, List, Literal, Sequence
 
 import numpy as np
 import pyarrow as pa
@@ -38,6 +38,255 @@ class _Selection:
     roi: tuple[int, int, int, int]
 
 
+class LazyTensorView:
+    """Deferred TensorView plan with Polars-style collect semantics."""
+
+    def __init__(
+        self,
+        *,
+        loader: Callable[
+            [],
+            dict[str, Any] | pa.StructScalar | pa.StructArray | pa.ChunkedArray,
+        ],
+        t: int | slice | Sequence[int] | None = None,
+        z: int | slice | Sequence[int] | None = None,
+        c: int | slice | Sequence[int] | None = None,
+        roi: tuple[int, int, int, int] | None = None,
+        roi3d: tuple[int, int, int, int, int, int] | None = None,
+        tile: tuple[int, int] | None = None,
+        layout: str | None = None,
+        dtype: np.dtype | None = None,
+        chunk_policy: Literal["auto", "combine", "keep"] = "auto",
+        channel_policy: Literal["error", "first"] = "error",
+    ) -> None:
+        """Initialize a deferred TensorView plan.
+
+        Args:
+            loader: Callable that returns concrete OME-Arrow pixel data.
+            t: Time index selection.
+            z: Depth index selection.
+            c: Channel index selection.
+            roi: Spatial crop as ``(x, y, w, h)``.
+            roi3d: Spatial + depth crop as ``(x, y, z, w, h, d)``.
+            tile: Tile index as ``(tile_y, tile_x)``.
+            layout: Requested output layout (TZCHW letters).
+            dtype: Output dtype override.
+            chunk_policy: Chunk handling strategy for ChunkedArray inputs.
+            channel_policy: Behavior when dropping ``C`` from layout.
+        """
+        self._loader = loader
+        self._kwargs: dict[str, Any] = {
+            "t": t,
+            "z": z,
+            "c": c,
+            "roi": roi,
+            "roi3d": roi3d,
+            "tile": tile,
+            "layout": layout,
+            "dtype": dtype,
+            "chunk_policy": chunk_policy,
+            "channel_policy": channel_policy,
+        }
+        self._resolved: TensorView | None = None
+
+    def _spawn(self, **updates: Any) -> "LazyTensorView":
+        kwargs = dict(self._kwargs)
+        kwargs.update(updates)
+        return LazyTensorView(loader=self._loader, **kwargs)
+
+    def collect(self) -> "TensorView":
+        """Materialize this lazy plan into a concrete TensorView."""
+        if self._resolved is None:
+            self._resolved = TensorView(self._loader(), **self._kwargs)
+        return self._resolved
+
+    def with_layout(self, layout: str) -> "LazyTensorView":
+        """Return a new lazy view with an updated layout."""
+        return self._spawn(layout=layout)
+
+    def select(
+        self,
+        *,
+        t: int | slice | Sequence[int] | None = None,
+        z: int | slice | Sequence[int] | None = None,
+        c: int | slice | Sequence[int] | None = None,
+        roi: tuple[int, int, int, int] | None = None,
+        roi3d: tuple[int, int, int, int, int, int] | None = None,
+        tile: tuple[int, int] | None = None,
+    ) -> "LazyTensorView":
+        """Return a new lazy plan with updated index/ROI selections."""
+        return self._spawn(t=t, z=z, c=c, roi=roi, roi3d=roi3d, tile=tile)
+
+    @property
+    def dtype(self) -> np.dtype:
+        """Return the tensor dtype."""
+        return self.collect().dtype
+
+    @property
+    def device(self) -> str:
+        """Return the tensor storage device."""
+        return self.collect().device
+
+    @property
+    def layout(self) -> str:
+        """Return the effective tensor layout."""
+        return self.collect().layout
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        """Return the tensor shape."""
+        return self.collect().shape
+
+    @property
+    def strides(self) -> tuple[int, ...]:
+        """Return tensor strides in bytes."""
+        return self.collect().strides
+
+    def to_numpy(self, *, contiguous: bool = False) -> np.ndarray:
+        """Materialize as a NumPy array.
+
+        Args:
+            contiguous: When True, return a contiguous array copy.
+
+        Returns:
+            np.ndarray: Materialized array.
+        """
+        return self.collect().to_numpy(contiguous=contiguous)
+
+    def to_dlpack(
+        self,
+        *,
+        device: str = "cpu",
+        contiguous: bool = True,
+        mode: str = "arrow",
+    ) -> Any:
+        """Export the planned view as a DLPack object.
+
+        Args:
+            device: Target device (``"cpu"`` or ``"cuda"``).
+            contiguous: When True, materialize contiguous data when needed.
+            mode: Export mode (``"arrow"`` or ``"numpy"``).
+
+        Returns:
+            Any: DLPack-compatible object.
+        """
+        return self.collect().to_dlpack(device=device, contiguous=contiguous, mode=mode)
+
+    def to_torch(
+        self,
+        *,
+        device: str = "cpu",
+        contiguous: bool = True,
+        mode: str = "arrow",
+    ) -> Any:
+        """Convert the planned view to a torch tensor.
+
+        Args:
+            device: Target device (``"cpu"`` or ``"cuda"``).
+            contiguous: When True, materialize contiguous data when needed.
+            mode: Export mode (``"arrow"`` or ``"numpy"``).
+
+        Returns:
+            Any: ``torch.Tensor`` when torch is installed.
+        """
+        return self.collect().to_torch(device=device, contiguous=contiguous, mode=mode)
+
+    def to_jax(
+        self,
+        *,
+        device: str = "cpu",
+        contiguous: bool = True,
+        mode: str = "arrow",
+    ) -> Any:
+        """Convert the planned view to a JAX array.
+
+        Args:
+            device: Target device (``"cpu"`` or ``"cuda"``).
+            contiguous: When True, materialize contiguous data when needed.
+            mode: Export mode (``"arrow"`` or ``"numpy"``).
+
+        Returns:
+            Any: JAX array when JAX is installed.
+        """
+        return self.collect().to_jax(device=device, contiguous=contiguous, mode=mode)
+
+    def iter_dlpack(
+        self,
+        *,
+        batch_size: int | None = None,
+        tile_size: tuple[int, int] | None = None,
+        tiles: tuple[int, int] | None = None,
+        shuffle: bool = False,
+        seed: int | None = None,
+        prefetch: int = 0,
+        device: str = "cpu",
+        contiguous: bool = True,
+        mode: str = "arrow",
+    ) -> Iterator[Any]:
+        """Iterate DLPack outputs in batches or 2D tiles.
+
+        Args:
+            batch_size: Number of time indices per batch.
+            tile_size: Optional tile size as ``(tile_h, tile_w)``.
+            tiles: Deprecated alias for ``tile_size``.
+            shuffle: Whether to shuffle iteration order.
+            seed: Optional random seed for deterministic shuffling.
+            prefetch: Placeholder prefetch count.
+            device: Target device (``"cpu"`` or ``"cuda"``).
+            contiguous: When True, materialize contiguous data when needed.
+            mode: Export mode (``"arrow"`` or ``"numpy"``).
+
+        Returns:
+            Iterator[Any]: Iterator of DLPack-compatible objects.
+        """
+        return self.collect().iter_dlpack(
+            batch_size=batch_size,
+            tile_size=tile_size,
+            tiles=tiles,
+            shuffle=shuffle,
+            seed=seed,
+            prefetch=prefetch,
+            device=device,
+            contiguous=contiguous,
+            mode=mode,
+        )
+
+    def iter_tiles_3d(
+        self,
+        *,
+        tile_size: tuple[int, int, int],
+        shuffle: bool = False,
+        seed: int | None = None,
+        prefetch: int = 0,
+        device: str = "cpu",
+        contiguous: bool = True,
+        mode: str = "numpy",
+    ) -> Iterator[Any]:
+        """Iterate DLPack outputs in 3D tiles.
+
+        Args:
+            tile_size: Tile shape as ``(tile_z, tile_h, tile_w)``.
+            shuffle: Whether to shuffle iteration order.
+            seed: Optional random seed for deterministic shuffling.
+            prefetch: Placeholder prefetch count.
+            device: Target device (``"cpu"`` or ``"cuda"``).
+            contiguous: When True, materialize contiguous data when needed.
+            mode: Export mode (currently ``"numpy"`` only).
+
+        Returns:
+            Iterator[Any]: Iterator of DLPack-compatible objects.
+        """
+        return self.collect().iter_tiles_3d(
+            tile_size=tile_size,
+            shuffle=shuffle,
+            seed=seed,
+            prefetch=prefetch,
+            device=device,
+            contiguous=contiguous,
+            mode=mode,
+        )
+
+
 class TensorView:
     """View OME-Arrow pixel data as a tensor-like object.
 
diff --git a/tests/test_core.py b/tests/test_core.py
index 27f17a1..56515c0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -408,3 +408,45 @@ def test_vortex_custom_column_name(tmp_path: pathlib.Path) -> None:
     reloaded = OMEArrow(str(out), column_name="custom_ome_arrow")
 
     assert reloaded.info() == oa.info()
+
+
+def test_scan_collect_roundtrip() -> None:
+    """Materialize a lazily scanned parquet source via collect()."""
+    oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
+    assert oa.is_lazy
+
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        oa.collect()
+    assert not oa.is_lazy
+    assert oa.info()["shape"] == (1, 1, 1, 72, 84)
+
+
+def test_slice_lazy_scan_collect() -> None:
+    """Queue a lazy slice and materialize it via collect()."""
+    oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
+    sliced = oa.slice_lazy(0, 10, 0, 8)
+
+    assert sliced.is_lazy
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        sliced.collect()
+    assert sliced.info()["shape"] == (1, 1, 1, 8, 10)
+
+
+def test_slice_lazy_chain_scan_collect() -> None:
+    """Allow chaining lazy slices before materialization."""
+    oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
+    sliced = oa.slice_lazy(0, 20, 0, 20).slice_lazy(5, 15, 2, 12)
+
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        shape = sliced.collect().info()["shape"]
+    assert shape == (1, 1, 1, 10, 10)
+
+
+def test_slice_lazy_on_materialized_falls_back_to_eager() -> None:
+    """Use eager slice behavior when source is already materialized."""
+    arr = np.arange(1 * 1 * 1 * 6 * 7, dtype=np.uint16).reshape(1, 1, 1, 6, 7)
+    oa = OMEArrow(arr)
+    out = oa.slice_lazy(1, 5, 1, 4)
+
+    assert not out.is_lazy
+    assert out.info()["shape"] == (1, 1, 1, 3, 4)
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index 0fb870c..d787ccc 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -10,7 +10,7 @@
 from ome_arrow import OMEArrow
 from ome_arrow.export import to_ome_parquet
 from ome_arrow.meta import OME_ARROW_STRUCT
-from ome_arrow.tensor import TensorView
+from ome_arrow.tensor import LazyTensorView, TensorView
 
 
 def _from_dlpack_capsule(capsule: object) -> np.ndarray:
@@ -87,6 +87,28 @@ def test_tensor_view_chunk_policy_invalid(example_correct_data: dict) -> None:
         oa.tensor_view(chunk_policy="invalid")
 
 
+def test_lazy_tensor_view_collects_on_execution() -> None:
+    """Defer source loading until lazy tensor view execution."""
+    oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
+    assert oa.is_lazy
+
+    view = oa.tensor_view(t=0, z=0, c=0, layout="HW")
+    assert isinstance(view, LazyTensorView)
+    assert oa.is_lazy
+
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        arr = view.to_numpy(contiguous=True)
+    assert arr.shape == (72, 84)
+    assert not oa.is_lazy
+
+
+def test_lazy_reader_requires_string_source() -> None:
+    """Reject lazy mode for non-file inputs."""
+    arr = np.zeros((1, 1, 1, 2, 2), dtype=np.uint16)
+    with pytest.raises(TypeError, match="lazy=True currently supports only string"):
+        OMEArrow(arr, lazy=True)
+
+
 def test_dlpack_roundtrip_torch(example_correct_data: dict) -> None:
     """Round-trip DLPack export/import through torch on CPU."""
     torch = pytest.importorskip("torch")

From e0609b09f018b3ac638d1ce17f253e17d9662767 Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Thu, 19 Feb 2026 12:25:43 -0700
Subject: [PATCH 02/10] address copilot and coderabbit reviews

---
 README.md               |  5 +++--
 docs/src/dlpack.md      |  4 ++--
 src/ome_arrow/core.py   | 30 +++++++++++++++++++++++++++++-
 src/ome_arrow/tensor.py | 28 +++++++++++++++++++++-------
 tests/test_core.py      |  9 +++++++++
 tests/test_tensor.py    | 37 +++++++++++++++++++++++++++++++++++++
 6 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index a9bed3c..f7e8f7f 100644
--- a/README.md
+++ b/README.md
@@ -107,8 +107,9 @@ oa = OMEArrow.scan("your_image.ome.parquet")  # deferred load
 lazy_crop = oa.slice_lazy(0, 512, 0, 512).slice_lazy(64, 256, 64, 256)
 cropped = lazy_crop.collect()
 
-# Then: build lazy tensor views from a source scan
-lazy_view = oa.tensor_view(t=0, z=slice(0, 4), roi=(0, 0, 512, 512))
+# slice_lazy returns a new OMEArrow plan; collect does not mutate `oa`.
+# Build tensor_view from the returned sliced object to reuse that plan.
+lazy_view = cropped.tensor_view(t=0, z=slice(0, 4), roi=(0, 0, 512, 512))
 arr = lazy_view.to_numpy()
 ```
 
diff --git a/docs/src/dlpack.md b/docs/src/dlpack.md
index aaf15b7..fbb1784 100644
--- a/docs/src/dlpack.md
+++ b/docs/src/dlpack.md
@@ -51,8 +51,8 @@ obj = OMEArrow.scan("example.ome.parquet")
 lazy_crop = obj.slice_lazy(0, 512, 0, 512).slice_lazy(64, 256, 64, 256)
 cropped = lazy_crop.collect()
 
-# Then execute lazy tensor selections.
-lazy_view = obj.tensor_view(t=0, z=slice(0, 8), roi=(128, 128, 256, 256))
+# Then execute tensor selections on the sliced result.
+lazy_view = cropped.tensor_view(t=0, z=slice(0, 8), roi=(128, 128, 256, 256))
 arr = lazy_view.to_numpy()
 ```
 
diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py
index e7f40c1..30eef38 100644
--- a/src/ome_arrow/core.py
+++ b/src/ome_arrow/core.py
@@ -122,6 +122,12 @@ def __init__(
         if lazy:
             if not isinstance(data, str):
                 raise TypeError("lazy=True currently supports only string file inputs.")
+            if any(c in data for c in "<>*"):
+                raise TypeError(
+                    "lazy=True does not support Bio-Formats pattern strings. "
+                    "Use OMEArrow(..., lazy=False) for pattern ingestion via "
+                    "from_stack_pattern_path."
+                )
             self._lazy_source = _LazySourceSpec(
                 data=data,
                 column_name=column_name,
@@ -316,6 +322,9 @@ def _ensure_materialized(self) -> None:
         if self._lazy_source is None:
             return
         lazy_source = self._lazy_source
+        # Intentionally do not clear `_lazy_source` / `_lazy_slices` before load.
+        # If `_load_from_string_source(...)` raises, lazy state is preserved so
+        # callers can inspect/retry without losing the deferred plan.
         scalar, struct_array = self._load_from_string_source(
             lazy_source.data,
             column_name=lazy_source.column_name,
@@ -336,10 +345,16 @@ def _ensure_materialized(self) -> None:
                     z_indices=spec.z_indices,
                     fill_missing=spec.fill_missing,
                 )
+            # Applying lazy slices via `slice_ome_arrow` materializes through a
+            # StructScalar path, so we intentionally drop `_struct_array` here.
+            # Consequence: Arrow-backed zero-copy tensor paths
+            # (for example `tensor_view(...).to_dlpack(mode="arrow")`) are not
+            # available after lazy slicing.
             self.data = data
             self._struct_array = None
         else:
             self.data, self._struct_array = scalar, struct_array
+        # Lazy state is cleared only after a successful materialization.
         self._lazy_source = None
         self._lazy_slices = []
 
@@ -816,8 +831,21 @@ def slice_lazy(
         """Return a lazily planned slice, collected on first execution.
 
         For lazy sources created with ``OMEArrow.scan(...)``, this queues a
-        deferred slice operation and returns a new lazy OMEArrow plan.
+        deferred slice operation and returns a new lazy OMEArrow plan produced
+        from ``OMEArrow.scan(...)``.
         For already materialized sources, this falls back to eager ``slice()``.
+        This method does not mutate ``self``.
+
+        Notes:
+            ``slice_lazy`` always returns a new plan object. Internally, the
+            returned plan gets a fresh ``_lazy_slices`` list
+            (``[*self._lazy_slices, new_slice]``), so chained plans do not
+            share mutable slice state with the original ``OMEArrow``.
+            A common footgun is:
+            ``oa.slice_lazy(...).collect()`` followed by ``oa.tensor_view(...)``.
+            Those calls can load/materialize the same source twice because
+            ``oa`` remains the original plan. For a single-load workflow, keep
+            working from the value returned by ``slice_lazy`` / ``collect``.
 
         Args:
             x_min: Inclusive minimum X index for the crop.
diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index 699d901..0aaf46c 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -17,6 +17,7 @@
 _TZCHW = "TZCHW"
 _ALLOWED_DIMS = set(_TZCHW)
 _ALLOWED_MODES = {"arrow", "numpy"}
+_UNSET = object()
 
 
 @dataclass(frozen=True)
@@ -107,15 +108,28 @@ def with_layout(self, layout: str) -> "LazyTensorView":
     def select(
         self,
         *,
-        t: int | slice | Sequence[int] | None = None,
-        z: int | slice | Sequence[int] | None = None,
-        c: int | slice | Sequence[int] | None = None,
-        roi: tuple[int, int, int, int] | None = None,
-        roi3d: tuple[int, int, int, int, int, int] | None = None,
-        tile: tuple[int, int] | None = None,
+        t: Any = _UNSET,
+        z: Any = _UNSET,
+        c: Any = _UNSET,
+        roi: Any = _UNSET,
+        roi3d: Any = _UNSET,
+        tile: Any = _UNSET,
     ) -> "LazyTensorView":
         """Return a new lazy plan with updated index/ROI selections."""
-        return self._spawn(t=t, z=z, c=c, roi=roi, roi3d=roi3d, tile=tile)
+        updates = {}
+        if t is not _UNSET:
+            updates["t"] = t
+        if z is not _UNSET:
+            updates["z"] = z
+        if c is not _UNSET:
+            updates["c"] = c
+        if roi is not _UNSET:
+            updates["roi"] = roi
+        if roi3d is not _UNSET:
+            updates["roi3d"] = roi3d
+        if tile is not _UNSET:
+            updates["tile"] = tile
+        return self._spawn(**updates)
 
     @property
     def dtype(self) -> np.dtype:
diff --git a/tests/test_core.py b/tests/test_core.py
index 56515c0..fa0e999 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -450,3 +450,12 @@ def test_slice_lazy_on_materialized_falls_back_to_eager() -> None:
 
     assert not out.is_lazy
     assert out.info()["shape"] == (1, 1, 1, 3, 4)
+
+
+def test_scan_stack_pattern_rejected_in_lazy_mode() -> None:
+    """Reject Bio-Formats stack pattern strings when lazy scan is requested."""
+    pattern = "tests/data/nviz-artificial-4d-dataset/E99_C<111,222>_ZS<000-021>.tif"
+    with pytest.raises(
+        TypeError, match="lazy=True does not support Bio-Formats pattern strings"
+    ):
+        OMEArrow.scan(pattern)
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index d787ccc..ece02fc 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -109,6 +109,43 @@ def test_lazy_reader_requires_string_source() -> None:
         OMEArrow(arr, lazy=True)
 
 
+def test_lazy_tensor_view_select_preserves_existing_dims() -> None:
+    """Preserve existing lazy selections when select() updates one axis."""
+    oa = OMEArrow.scan(
+        "tests/data/ome-artificial-5d-datasets/multi-channel-time-series.ome.tiff"
+    )
+    assert oa.is_lazy
+
+    view = oa.tensor_view(t=2, c=1)
+    assert isinstance(view, LazyTensorView)
+
+    view_z = view.select(z=0)
+    assert isinstance(view_z, LazyTensorView)
+    assert oa.is_lazy
+
+    arr = view_z.to_numpy(contiguous=True)
+    assert arr.shape == (1, 167, 439)
+    assert not oa.is_lazy
+
+
+def test_lazy_tensor_view_with_layout_defers_materialization() -> None:
+    """Update layout lazily and materialize only on execution."""
+    oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
+    assert oa.is_lazy
+
+    view = oa.tensor_view(t=0, z=0, c=0)
+    view_hw = view.with_layout("HW")
+
+    assert isinstance(view_hw, LazyTensorView)
+    assert view_hw._kwargs["layout"] == "HW"
+    assert oa.is_lazy
+
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        arr = view_hw.to_numpy(contiguous=True)
+    assert arr.shape == (72, 84)
+    assert not oa.is_lazy
+
+
 def test_dlpack_roundtrip_torch(example_correct_data: dict) -> None:
     """Round-trip DLPack export/import through torch on CPU."""
     torch = pytest.importorskip("torch")

From d214c7e251ac92b6972a57ff77246f53d067d3af Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Thu, 19 Feb 2026 12:29:31 -0700
Subject: [PATCH 03/10] ignore warnings

---
 tests/test_tensor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index ece02fc..7010da0 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -109,6 +109,9 @@ def test_lazy_reader_requires_string_source() -> None:
         OMEArrow(arr, lazy=True)
 
 
+@pytest.mark.filterwarnings(
+    "ignore:As of version 0.4.0, the parser argument is ignored.*:DeprecationWarning"
+)
 def test_lazy_tensor_view_select_preserves_existing_dims() -> None:
     """Preserve existing lazy selections when select() updates one axis."""
     oa = OMEArrow.scan(

From df6f1b9832ad08141c2e0af37e34f7163e8ea77f Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Fri, 20 Feb 2026 08:15:40 -0700
Subject: [PATCH 04/10] address coderabbit review comments

---
 .pre-commit-config.yaml |  2 +-
 README.md               |  4 ++--
 docs/src/dlpack.md      |  4 ++--
 src/ome_arrow/core.py   |  6 +++++-
 src/ome_arrow/tensor.py | 35 ++++++++++++++++++++++++++++++-----
 tests/test_tensor.py    | 16 +++++++++++-----
 6 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 34f8f7b..5d15765 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
     -   id: yamllint
         exclude: pre-commit-config.yaml
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.15.1"
+    rev: "v0.15.2"
     hooks:
     -   id: ruff-format
     -   id: ruff-check
diff --git a/README.md b/README.md
index f7e8f7f..b504f4f 100644
--- a/README.md
+++ b/README.md
@@ -109,8 +109,8 @@ cropped = lazy_crop.collect()
 
 # slice_lazy returns a new OMEArrow plan; collect does not mutate `oa`.
 # Build tensor_view from the returned sliced object to reuse that plan.
-lazy_view = cropped.tensor_view(t=0, z=slice(0, 4), roi=(0, 0, 512, 512))
-arr = lazy_view.to_numpy()
+tensor_view_result = cropped.tensor_view(t=0, z=slice(0, 4), roi=(0, 0, 192, 192))
+arr = tensor_view_result.to_numpy()
 ```
 
 Advanced options:
diff --git a/docs/src/dlpack.md b/docs/src/dlpack.md
index fbb1784..f5e191e 100644
--- a/docs/src/dlpack.md
+++ b/docs/src/dlpack.md
@@ -52,8 +52,8 @@ lazy_crop = obj.slice_lazy(0, 512, 0, 512).slice_lazy(64, 256, 64, 256)
 cropped = lazy_crop.collect()
 
 # Then execute tensor selections on the sliced result.
-lazy_view = cropped.tensor_view(t=0, z=slice(0, 8), roi=(128, 128, 256, 256))
-arr = lazy_view.to_numpy()
+tensor_view = cropped.tensor_view(t=0, z=slice(0, 8), roi=(64, 64, 128, 128))
+arr = tensor_view.to_numpy()
 ```
 
 ## JAX
diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py
index 30eef38..a9163ea 100644
--- a/src/ome_arrow/core.py
+++ b/src/ome_arrow/core.py
@@ -360,7 +360,11 @@ def _ensure_materialized(self) -> None:
 
     def _tensor_source(self) -> pa.StructScalar | pa.StructArray:
         self._ensure_materialized()
-        return self._struct_array if self._struct_array is not None else self.data
+        if self._struct_array is not None:
+            return self._struct_array
+        if self._data is None:
+            raise RuntimeError("OMEArrow data is not initialized.")
+        return self._data
 
     @staticmethod
     def _wrap_with_image_type(
diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index 0aaf46c..b8e48f4 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -133,27 +133,52 @@ def select(
 
     @property
     def dtype(self) -> np.dtype:
-        """Return the tensor dtype."""
+        """Return the tensor dtype.
+
+        Note:
+            Accessing this property calls ``collect()`` and may materialize data
+            from source files (for example Parquet/TIFF), which can be expensive.
+        """
         return self.collect().dtype
 
     @property
     def device(self) -> str:
-        """Return the tensor storage device."""
+        """Return the tensor storage device.
+
+        Note:
+            Accessing this property calls ``collect()`` and may materialize data
+            from source files (for example Parquet/TIFF), which can be expensive.
+        """
         return self.collect().device
 
     @property
     def layout(self) -> str:
-        """Return the effective tensor layout."""
+        """Return the effective tensor layout.
+
+        Note:
+            Accessing this property calls ``collect()`` and may materialize data
+            from source files (for example Parquet/TIFF), which can be expensive.
+        """
         return self.collect().layout
 
     @property
     def shape(self) -> tuple[int, ...]:
-        """Return the tensor shape."""
+        """Return the tensor shape.
+
+        Note:
+            Accessing this property calls ``collect()`` and may materialize data
+            from source files (for example Parquet/TIFF), which can be expensive.
+        """
         return self.collect().shape
 
     @property
     def strides(self) -> tuple[int, ...]:
-        """Return tensor strides in bytes."""
+        """Return tensor strides in bytes.
+
+        Note:
+            Accessing this property calls ``collect()`` and may materialize data
+            from source files (for example Parquet/TIFF), which can be expensive.
+        """
         return self.collect().strides
 
     def to_numpy(self, *, contiguous: bool = False) -> np.ndarray:
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index 7010da0..bdb8c2f 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -87,7 +87,9 @@ def test_tensor_view_chunk_policy_invalid(example_correct_data: dict) -> None:
         oa.tensor_view(chunk_policy="invalid")
 
 
-def test_lazy_tensor_view_collects_on_execution() -> None:
+def test_lazy_tensor_view_collects_on_execution(
+    recwarn: pytest.WarningsRecorder,
+) -> None:
     """Defer source loading until lazy tensor view execution."""
     oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
     assert oa.is_lazy
@@ -96,10 +98,13 @@ def test_lazy_tensor_view_collects_on_execution() -> None:
     assert isinstance(view, LazyTensorView)
     assert oa.is_lazy
 
-    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
-        arr = view.to_numpy(contiguous=True)
+    arr = view.to_numpy(contiguous=True)
     assert arr.shape == (72, 84)
     assert not oa.is_lazy
+    if recwarn:
+        assert any(
+            "Requested column 'ome_arrow'" in str(w.message) for w in recwarn.list
+        )
 
 
 def test_lazy_reader_requires_string_source() -> None:
@@ -140,11 +145,12 @@ def test_lazy_tensor_view_with_layout_defers_materialization() -> None:
     view_hw = view.with_layout("HW")
 
     assert isinstance(view_hw, LazyTensorView)
-    assert view_hw._kwargs["layout"] == "HW"
     assert oa.is_lazy
 
     with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
-        arr = view_hw.to_numpy(contiguous=True)
+        concrete = view_hw.collect()
+    assert concrete.layout == "HW"
+    arr = concrete.to_numpy(contiguous=True)
     assert arr.shape == (72, 84)
     assert not oa.is_lazy
 

From 66f090572746f2065603cc9fd857453b60942708 Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Fri, 20 Feb 2026 08:25:58 -0700
Subject: [PATCH 05/10] address coderabbit review comment

---
 src/ome_arrow/tensor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index b8e48f4..4bb8be6 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -159,6 +159,9 @@ def layout(self) -> str:
             Accessing this property calls ``collect()`` and may materialize data
             from source files (for example Parquet/TIFF), which can be expensive.
         """
+        layout = self._kwargs.get("layout")
+        if layout is not None:
+            return layout
         return self.collect().layout
 
     @property

From 57f2941765b8c0d5d5a5a55c160e237eee1fa7bb Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Fri, 20 Feb 2026 08:46:19 -0700
Subject: [PATCH 06/10] address coderabbit comments

---
 src/ome_arrow/tensor.py | 43 ++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index 4bb8be6..6da9911 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import random
+import threading
 import warnings
 from dataclasses import dataclass
 from typing import Any, Callable, Iterator, List, Literal, Sequence
@@ -17,7 +18,13 @@
 _TZCHW = "TZCHW"
 _ALLOWED_DIMS = set(_TZCHW)
 _ALLOWED_MODES = {"arrow", "numpy"}
-_UNSET = object()
+
+
+class _Unset:
+    """Typed sentinel for arguments that were not provided."""
+
+
+_UNSET: _Unset = _Unset()
 
 
 @dataclass(frozen=True)
@@ -89,6 +96,7 @@ def __init__(
             "channel_policy": channel_policy,
         }
         self._resolved: TensorView | None = None
+        self._collect_lock = threading.Lock()
 
     def _spawn(self, **updates: Any) -> "LazyTensorView":
         kwargs = dict(self._kwargs)
@@ -97,9 +105,16 @@ def _spawn(self, **updates: Any) -> "LazyTensorView":
 
     def collect(self) -> "TensorView":
         """Materialize this lazy plan into a concrete TensorView."""
-        if self._resolved is None:
-            self._resolved = TensorView(self._loader(), **self._kwargs)
-        return self._resolved
+        resolved = self._resolved
+        if resolved is not None:
+            return resolved
+
+        with self._collect_lock:
+            resolved = self._resolved
+            if resolved is None:
+                resolved = TensorView(self._loader(), **self._kwargs)
+                self._resolved = resolved
+        return resolved
 
     def with_layout(self, layout: str) -> "LazyTensorView":
         """Return a new lazy view with an updated layout."""
@@ -108,12 +123,12 @@ def with_layout(self, layout: str) -> "LazyTensorView":
     def select(
         self,
         *,
-        t: Any = _UNSET,
-        z: Any = _UNSET,
-        c: Any = _UNSET,
-        roi: Any = _UNSET,
-        roi3d: Any = _UNSET,
-        tile: Any = _UNSET,
+        t: int | slice | Sequence[int] | None | _Unset = _UNSET,
+        z: int | slice | Sequence[int] | None | _Unset = _UNSET,
+        c: int | slice | Sequence[int] | None | _Unset = _UNSET,
+        roi: tuple[int, int, int, int] | None | _Unset = _UNSET,
+        roi3d: tuple[int, int, int, int, int, int] | None | _Unset = _UNSET,
+        tile: tuple[int, int] | None | _Unset = _UNSET,
     ) -> "LazyTensorView":
         """Return a new lazy plan with updated index/ROI selections."""
         updates = {}
@@ -146,10 +161,12 @@ def device(self) -> str:
         """Return the tensor storage device.
 
         Note:
-            Accessing this property calls ``collect()`` and may materialize data
-            from source files (for example Parquet/TIFF), which can be expensive.
+            For unresolved lazy plans, this returns ``"cpu"`` without calling
+            ``collect()``.
         """
-        return self.collect().device
+        if self._resolved is None:
+            return "cpu"
+        return self._resolved.device
 
     @property
     def layout(self) -> str:

From b1401be08960e9f900d51f8538d75b14a7fa8478 Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Wed, 25 Feb 2026 09:11:13 -0700
Subject: [PATCH 07/10] address mikes review comments

Co-Authored-By: Mike Lippincott <58147848+MikeLippincott@users.noreply.github.com>
---
 .pre-commit-config.yaml |   2 +-
 README.md               |   6 +-
 docs/src/dlpack.md      |  23 +++----
 src/ome_arrow/core.py   |  15 ++++-
 src/ome_arrow/tensor.py | 141 +++++++++++++++++++++++++++++++++++++---
 tests/test_core.py      |  32 +++++++++
 tests/test_tensor.py    |  98 +++++++++++++++++++++-------
 7 files changed, 269 insertions(+), 48 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5d15765..1b3e714 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     -   id: check-yaml
     -   id: detect-private-key
 -   repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "v2.16.1"
+    rev: "v2.16.2"
     hooks:
     -   id: pyproject-fmt
 -   repo: https://github.com/citation-file-format/cffconvert
diff --git a/README.md b/README.md
index b504f4f..d03c40e 100644
--- a/README.md
+++ b/README.md
@@ -86,11 +86,11 @@ from ome_arrow import OMEArrow
 
 oa = OMEArrow("your_image.ome.parquet")
 
-# Spatial ROI per plane
-view = oa.tensor_view(t=0, z=0, roi=(32, 32, 128, 128), layout="CHW")
+# Spatial ROI per plane (YX convention)
+view = oa.tensor_view(t=0, z=0, roi=(32, 32, 128, 128), layout="CYX")
 
 # Convenience 3D ROI (x, y, z, w, h, d)
-view3d = oa.tensor_view(roi3d=(32, 32, 2, 128, 128, 4), layout="TZCHW")
+view3d = oa.tensor_view(roi3d=(32, 32, 2, 128, 128, 4), layout="TZCYX")
 
 # 3D tiled iteration over (z, y, x)
 for cap in view3d.iter_tiles_3d(tile_size=(2, 64, 64), mode="numpy"):
diff --git a/docs/src/dlpack.md b/docs/src/dlpack.md
index f5e191e..86f19ed 100644
--- a/docs/src/dlpack.md
+++ b/docs/src/dlpack.md
@@ -7,23 +7,24 @@ and (optionally) GPU.
 Key defaults:
 
 - OME-Arrow tensor layouts always include channels (`C`) as a tensor axis.
-- Default layout is `CHW` when both `T` and `Z` are singleton in the source.
-- Otherwise, default layout is `TZCHW` (with singleton `T`/`Z` retained unless you override layout).
-- You can override with any valid TZCHW permutation/subset, for example `HWC`, `ZCHW`, or `CHW`.
+- Default layout is `CHW` (equivalent to `CYX`) when both `T` and `Z` are singleton in the source.
+- Otherwise, default layout is `TZCHW` (equivalent to `TZCYX`, with singleton `T`/`Z` retained unless you override layout).
+- You can override with any valid TZCHW/TZCYX permutation/subset, for example `YXC`, `ZCYX`, or `CYX`.
 
 Layout nomenclature:
 
 - `T`: time index
 - `Z`: z/depth index
 - `C`: channel index
-- `H`: image height (Y axis)
-- `W`: image width (X axis)
+- `Y`: image row axis (height)
+- `X`: image column axis (width)
+  (`H/W` aliases are also accepted for compatibility).
 
 Practical mapping:
 
-- 2D image content (`YX`) is typically exposed as `CHW`.
-- 3D z-stack content (`ZYX`) is typically exposed as `ZCHW` or `TZCHW` (with `T=1`).
-- Time-lapse and volumetric content use `TZCHW` by default.
+- 2D image content (`YX`) is typically exposed as `CYX`.
+- 3D z-stack content (`ZYX`) is typically exposed as `ZCYX` or `TZCYX` (with `T=1`).
+- Time-lapse and volumetric content use `TZCYX`/`TZCHW` by default.
 
 ## PyTorch
 
@@ -62,7 +63,7 @@ arr = tensor_view.to_numpy()
 from ome_arrow import OMEArrow
 
 obj = OMEArrow("example.ome.parquet")
-view = obj.tensor_view(t=0, z=0, c=0, layout="CHW")
+view = obj.tensor_view(t=0, z=0, c=0, layout="CYX")
 
 import jax.numpy as jnp
 
@@ -83,7 +84,7 @@ view = obj.tensor_view()
 # Batch over time (T) dimension.
 for cap in view.iter_dlpack(batch_size=2, shuffle=False, mode="numpy"):
     batch = np.from_dlpack(cap)
-    # batch shape: (batch, Z, C, H, W) in TZCHW layout
+    # batch shape: (batch, Z, C, Y, X) in TZCYX layout
 ```
 
 ```python
@@ -98,7 +99,7 @@ for cap in view.iter_dlpack(
     tile_size=(256, 256), shuffle=True, seed=123, mode="numpy"
 ):
     tile = np.from_dlpack(cap)
-    # tile shape: (C, H, W) in CHW layout
+    # tile shape: (C, Y, X) in CYX layout
 ```
 
 ## Ownership and lifetime
diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py
index a9163ea..207b708 100644
--- a/src/ome_arrow/core.py
+++ b/src/ome_arrow/core.py
@@ -704,6 +704,8 @@ def tensor_view(
         c: int | slice | Sequence[int] | None = None,
         roi: tuple[int, int, int, int] | None = None,
         roi3d: tuple[int, int, int, int, int, int] | None = None,
+        roi_nd: tuple[int, ...] | None = None,
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None = None,
         tile: tuple[int, int] | None = None,
         layout: str | None = None,
         dtype: np.dtype | None = None,
@@ -721,9 +723,13 @@ def tensor_view(
             roi3d: Spatial + depth crop (x, y, z, w, h, d) in pixels/planes.
                 This is a convenience alias for ``roi=(x, y, w, h)`` and
                 ``z=slice(z, z + d)``.
+            roi_nd: General ROI tuple with min/max bounds.
+            roi_type: ROI interpretation mode for ``roi_nd``. Supported values:
+                ``"2d"``, ``"2d_timelapse"``, ``"3d"``, and ``"4d"``.
             tile: Tile index (tile_y, tile_x) based on chunk grid.
-            layout: Desired layout string using TZCHW letters where
-                T=time, Z=depth, C=channel, H=height (Y), W=width (X).
+            layout: Desired layout string using `TZCYX` letters where
+                T=time, Z=depth, C=channel, Y=row axis, X=column axis.
+                `TZCHW` aliases are also accepted for compatibility.
             dtype: Output dtype override.
             chunk_policy: Handling for ``pyarrow.ChunkedArray`` inputs.
             channel_policy: Behavior when dropping `C` from layout while
@@ -750,6 +756,8 @@ def tensor_view(
                 c=c,
                 roi=roi,
                 roi3d=roi3d,
+                roi_nd=roi_nd,
+                roi_type=roi_type,
                 tile=tile,
                 layout=layout,
                 dtype=dtype,
@@ -759,6 +767,7 @@ def tensor_view(
 
         # TensorView uses an internal canonical axis basis (TZCHW) for shape/stride
         # math, then applies the requested layout permutation for output.
+        # Public layout examples prefer TZCYX (Y/X), with H/W accepted as aliases.
         return TensorView(
             self._struct_array if self._struct_array is not None else self.data,
             t=t,
@@ -766,6 +775,8 @@ def tensor_view(
             c=c,
             roi=roi,
             roi3d=roi3d,
+            roi_nd=roi_nd,
+            roi_type=roi_type,
             tile=tile,
             layout=layout,
             dtype=dtype,
diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index 6da9911..b52f489 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -18,6 +18,8 @@
 _TZCHW = "TZCHW"
 _ALLOWED_DIMS = set(_TZCHW)
 _ALLOWED_MODES = {"arrow", "numpy"}
+_ROI_TYPES = {"2d", "2d_timelapse", "3d", "4d"}
+_LAYOUT_ALIASES = str.maketrans({"Y": "H", "X": "W"})
 
 
 class _Unset:
@@ -61,6 +63,8 @@ def __init__(
         c: int | slice | Sequence[int] | None = None,
         roi: tuple[int, int, int, int] | None = None,
         roi3d: tuple[int, int, int, int, int, int] | None = None,
+        roi_nd: tuple[int, ...] | None = None,
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None = None,
         tile: tuple[int, int] | None = None,
         layout: str | None = None,
         dtype: np.dtype | None = None,
@@ -76,8 +80,11 @@ def __init__(
             c: Channel index selection.
             roi: Spatial crop as ``(x, y, w, h)``.
             roi3d: Spatial + depth crop as ``(x, y, z, w, h, d)``.
+            roi_nd: General ROI tuple with min/max bounds.
+            roi_type: ROI interpretation mode for ``roi_nd``.
             tile: Tile index as ``(tile_y, tile_x)``.
-            layout: Requested output layout (TZCHW letters).
+            layout: Requested output layout (`TZCYX` preferred, `TZCHW`
+                aliases accepted).
             dtype: Output dtype override.
             chunk_policy: Chunk handling strategy for ChunkedArray inputs.
             channel_policy: Behavior when dropping ``C`` from layout.
@@ -89,6 +96,8 @@ def __init__(
             "c": c,
             "roi": roi,
             "roi3d": roi3d,
+            "roi_nd": roi_nd,
+            "roi_type": roi_type,
             "tile": tile,
             "layout": layout,
             "dtype": dtype,
@@ -128,6 +137,8 @@ def select(
         c: int | slice | Sequence[int] | None | _Unset = _UNSET,
         roi: tuple[int, int, int, int] | None | _Unset = _UNSET,
         roi3d: tuple[int, int, int, int, int, int] | None | _Unset = _UNSET,
+        roi_nd: tuple[int, ...] | None | _Unset = _UNSET,
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None | _Unset = _UNSET,
         tile: tuple[int, int] | None | _Unset = _UNSET,
     ) -> "LazyTensorView":
         """Return a new lazy plan with updated index/ROI selections."""
@@ -142,6 +153,10 @@ def select(
             updates["roi"] = roi
         if roi3d is not _UNSET:
             updates["roi3d"] = roi3d
+        if roi_nd is not _UNSET:
+            updates["roi_nd"] = roi_nd
+        if roi_type is not _UNSET:
+            updates["roi_type"] = roi_type
         if tile is not _UNSET:
             updates["tile"] = tile
         return self._spawn(**updates)
@@ -358,9 +373,19 @@ class TensorView:
         roi3d: Spatial + depth crop (x, y, z, w, h, d). This is a
             convenience alias for ``roi=(x, y, w, h)`` and
             ``z=slice(z, z + d)``.
+        roi_nd: General ROI tuple with min/max bounds, interpreted by
+            ``roi_type``.
+        roi_type: ROI interpretation mode for ``roi_nd``. Supported values:
+            ``"2d"`` = ``(ymin, xmin, ymax, xmax)``;
+            ``"2d_timelapse"`` =
+            ``(tmin, ymin, xmin, tmax, ymax, xmax)``;
+            ``"3d"`` = ``(zmin, ymin, xmin, zmax, ymax, xmax)``;
+            ``"4d"`` =
+            ``(tmin, zmin, ymin, xmin, tmax, zmax, ymax, xmax)``.
         tile: Tile index (tile_y, tile_x) based on chunk grid.
-        layout: Desired layout string using TZCHW letters where
-            T=time, Z=depth, C=channel, H=height (Y), W=width (X).
+        layout: Desired layout string using `TZCYX` letters where
+            T=time, Z=depth, C=channel, Y=row axis, X=column axis.
+            `TZCHW` aliases are also accepted for compatibility.
         dtype: Output dtype override. Defaults to pixels_meta.type when valid.
         chunk_policy: Handling for ``pyarrow.ChunkedArray`` inputs. "auto"
             keeps multi-chunk arrays and unwraps single-chunk arrays.
@@ -380,6 +405,8 @@ def __init__(
         c: int | slice | Sequence[int] | None = None,
         roi: tuple[int, int, int, int] | None = None,
         roi3d: tuple[int, int, int, int, int, int] | None = None,
+        roi_nd: tuple[int, ...] | None = None,
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None = None,
         tile: tuple[int, int] | None = None,
         layout: str | None = None,
         dtype: np.dtype | None = None,
@@ -397,9 +424,13 @@ def __init__(
             roi3d: Spatial + depth crop (x, y, z, w, h, d). This is a
                 convenience alias for ``roi=(x, y, w, h)`` and
                 ``z=slice(z, z + d)``.
+            roi_nd: General ROI tuple with min/max bounds, interpreted by
+                ``roi_type``.
+            roi_type: ROI interpretation mode for ``roi_nd``.
             tile: Tile index (tile_y, tile_x) derived from chunk_grid.
-            layout: Desired layout string using TZCHW letters where
-                T=time, Z=depth, C=channel, H=height (Y), W=width (X).
+            layout: Desired layout string using `TZCYX` letters where
+                T=time, Z=depth, C=channel, Y=row axis, X=column axis.
+                `TZCHW` aliases are also accepted for compatibility.
             dtype: Output dtype override.
             chunk_policy: Handling for ``pyarrow.ChunkedArray`` inputs.
             channel_policy: Behavior when dropping `C` from layout while
@@ -448,7 +479,14 @@ def __init__(
         self._dtype = np.dtype(dtype)
 
         self._selection = self._normalize_selection(
-            t=t, z=z, c=c, roi=roi, roi3d=roi3d, tile=tile
+            t=t,
+            z=z,
+            c=c,
+            roi=roi,
+            roi3d=roi3d,
+            roi_nd=roi_nd,
+            roi_type=roi_type,
+            tile=tile,
         )
         self._array: np.ndarray | None = None
         self._array_layout: str | None = None
@@ -492,8 +530,9 @@ def with_layout(self, layout: str) -> "TensorView":
         """Return a new TensorView with a layout override.
 
         Args:
-            layout: Desired layout string using TZCHW letters where
-                T=time, Z=depth, C=channel, H=height (Y), W=width (X).
+            layout: Desired layout string using `TZCYX` letters where
+                T=time, Z=depth, C=channel, Y=row axis, X=column axis.
+                `TZCHW` aliases are also accepted for compatibility.
 
         Returns:
             TensorView: New view with the requested layout.
@@ -910,8 +949,29 @@ def _normalize_selection(
         c: int | slice | Sequence[int] | None,
         roi: tuple[int, int, int, int] | None,
         roi3d: tuple[int, int, int, int, int, int] | None,
+        roi_nd: tuple[int, ...] | None,
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None,
         tile: tuple[int, int] | None,
     ) -> _Selection:
+        if roi_nd is not None:
+            if roi is not None or roi3d is not None or tile is not None:
+                raise ValueError("Provide only one of roi_nd, roi3d, roi, or tile.")
+            roi, t_from_roi, z_from_roi = self._parse_roi_nd(roi_nd, roi_type)
+            if t_from_roi is not None:
+                if t is not None:
+                    raise ValueError(
+                        "Provide either t or roi_nd time bounds, not both."
+                    )
+                t = t_from_roi
+            if z_from_roi is not None:
+                if z is not None:
+                    raise ValueError(
+                        "Provide either z or roi_nd depth bounds, not both."
+                    )
+                z = z_from_roi
+        elif roi_type is not None:
+            raise ValueError("roi_type requires roi_nd.")
+
         if roi3d is not None:
             if roi is not None or tile is not None:
                 raise ValueError("Provide only one of roi3d, roi, or tile.")
@@ -951,6 +1011,65 @@ def _normalize_selection(
 
         return _Selection(t=t_idx, z=z_idx, c=c_idx, roi=roi)
 
+    def _parse_roi_nd(
+        self,
+        roi_nd: tuple[int, ...],
+        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None,
+    ) -> tuple[tuple[int, int, int, int], slice | None, slice | None]:
+        vals = tuple(int(v) for v in roi_nd)
+        if roi_type is None:
+            if len(vals) == 4:
+                roi_type = "2d"
+            elif len(vals) == 8:
+                roi_type = "4d"
+            elif len(vals) == 6:
+                raise ValueError(
+                    "roi_nd with 6 values is ambiguous; provide roi_type "
+                    "('2d_timelapse' or '3d')."
+                )
+            else:
+                raise ValueError("roi_nd must have length 4, 6, or 8.")
+        if roi_type not in _ROI_TYPES:
+            raise ValueError(f"Unsupported roi_type: {roi_type!r}.")
+
+        t_sel: slice | None = None
+        z_sel: slice | None = None
+
+        if roi_type == "2d":
+            if len(vals) != 4:
+                raise ValueError("roi_type='2d' requires 4 values.")
+            ymin, xmin, ymax, xmax = vals
+        elif roi_type == "2d_timelapse":
+            if len(vals) != 6:
+                raise ValueError("roi_type='2d_timelapse' requires 6 values.")
+            tmin, ymin, xmin, tmax, ymax, xmax = vals
+            if tmin < 0 or tmax > self._size_t or tmax <= tmin:
+                raise ValueError("roi_nd time bounds are out of range.")
+            t_sel = slice(tmin, tmax)
+        elif roi_type == "3d":
+            if len(vals) != 6:
+                raise ValueError("roi_type='3d' requires 6 values.")
+            zmin, ymin, xmin, zmax, ymax, xmax = vals
+            if zmin < 0 or zmax > self._size_z or zmax <= zmin:
+                raise ValueError("roi_nd depth bounds are out of range.")
+            z_sel = slice(zmin, zmax)
+        else:
+            if len(vals) != 8:
+                raise ValueError("roi_type='4d' requires 8 values.")
+            tmin, zmin, ymin, xmin, tmax, zmax, ymax, xmax = vals
+            if tmin < 0 or tmax > self._size_t or tmax <= tmin:
+                raise ValueError("roi_nd time bounds are out of range.")
+            if zmin < 0 or zmax > self._size_z or zmax <= zmin:
+                raise ValueError("roi_nd depth bounds are out of range.")
+            t_sel = slice(tmin, tmax)
+            z_sel = slice(zmin, zmax)
+
+        x, y = int(xmin), int(ymin)
+        w, h = int(xmax - xmin), int(ymax - ymin)
+        if w <= 0 or h <= 0:
+            raise ValueError("roi_nd spatial bounds must satisfy max > min.")
+        return (x, y, w, h), t_sel, z_sel
+
     def _data_py_dict(self) -> dict[str, Any]:
         """Return the backing record as a Python dict.
 
@@ -1117,8 +1236,12 @@ def _normalize_layout(layout: str | None) -> str | None:
     layout = layout.strip().upper()
     if not layout:
         raise ValueError("layout must be non-empty")
+    layout = layout.translate(_LAYOUT_ALIASES)
     if any(dim not in _ALLOWED_DIMS for dim in layout):
-        raise ValueError("layout must use only TZCHW letters")
+        raise ValueError(
+            "layout must use only TZC + (Y/X or H/W) letters "
+            "(for example CYX, YXC, TZCYX)."
+        )
     if len(set(layout)) != len(layout):
         raise ValueError("layout cannot repeat dimensions")
     return layout
diff --git a/tests/test_core.py b/tests/test_core.py
index fa0e999..f85f5c0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -421,6 +421,38 @@ def test_scan_collect_roundtrip() -> None:
     assert oa.info()["shape"] == (1, 1, 1, 72, 84)
 
 
+@pytest.mark.parametrize(
+    ("input_data", "expected_shape"),
+    [
+        (
+            "tests/data/ome-artificial-5d-datasets/single-channel.ome.tiff",
+            (1, 1, 1, 167, 439),
+        ),  # 2D
+        (
+            "tests/data/ome-artificial-5d-datasets/time-series.ome.tif",
+            (7, 1, 1, 167, 439),
+        ),  # 2D timelapse
+        (
+            "tests/data/ome-artificial-5d-datasets/z-series.ome.tiff",
+            (1, 1, 5, 167, 439),
+        ),  # 3D
+    ],
+)
+@pytest.mark.filterwarnings(
+    "ignore:As of version 0.4.0, the parser argument is ignored.*:DeprecationWarning"
+)
+def test_scan_collect_roundtrip_non4d(
+    input_data: str, expected_shape: tuple[int, int, int, int, int]
+) -> None:
+    """Materialize lazy scans for non-4D sources and preserve shapes."""
+    oa = OMEArrow.scan(input_data)
+    assert oa.is_lazy
+
+    info = oa.collect().info()
+    assert not oa.is_lazy
+    assert info["shape"] == expected_shape
+
+
 def test_slice_lazy_scan_collect() -> None:
     """Queue a lazy slice and materialize it via collect()."""
     oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index bdb8c2f..4f7686c 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -53,15 +53,15 @@ def test_tensor_view_layout_and_values(example_correct_data: dict) -> None:
     )
     np.testing.assert_array_equal(arr, expected)
 
-    view_hwc = oa.tensor_view(t=0, z=0, layout="HWC")
-    arr_hwc = view_hwc.to_numpy(contiguous=False)
-    assert arr_hwc.shape == (3, 4, 2)
-    assert arr_hwc[0, 0, 0] == expected[0, 0, 0]
-    assert arr_hwc[0, 0, 1] == expected[1, 0, 0]
-    assert not arr_hwc.flags["C_CONTIGUOUS"]
+    view_yxc = oa.tensor_view(t=0, z=0, layout="YXC")
+    arr_yxc = view_yxc.to_numpy(contiguous=False)
+    assert arr_yxc.shape == (3, 4, 2)
+    assert arr_yxc[0, 0, 0] == expected[0, 0, 0]
+    assert arr_yxc[0, 0, 1] == expected[1, 0, 0]
+    assert not arr_yxc.flags["C_CONTIGUOUS"]
 
-    arr_hwc_contig = view_hwc.to_numpy(contiguous=True)
-    assert arr_hwc_contig.flags["C_CONTIGUOUS"]
+    arr_yxc_contig = view_yxc.to_numpy(contiguous=True)
+    assert arr_yxc_contig.flags["C_CONTIGUOUS"]
 
 
 def test_tensor_view_chunk_policy_modes(example_correct_data: dict) -> None:
@@ -94,7 +94,7 @@ def test_lazy_tensor_view_collects_on_execution(
     oa = OMEArrow.scan("tests/data/JUMP-BR00117006/BR00117006.ome.parquet")
     assert oa.is_lazy
 
-    view = oa.tensor_view(t=0, z=0, c=0, layout="HW")
+    view = oa.tensor_view(t=0, z=0, c=0, layout="YX")
     assert isinstance(view, LazyTensorView)
     assert oa.is_lazy
 
@@ -142,14 +142,15 @@ def test_lazy_tensor_view_with_layout_defers_materialization() -> None:
     assert oa.is_lazy
 
     view = oa.tensor_view(t=0, z=0, c=0)
-    view_hw = view.with_layout("HW")
+    view_yx = view.with_layout("YX")
 
-    assert isinstance(view_hw, LazyTensorView)
+    assert isinstance(view_yx, LazyTensorView)
+    assert view_yx.layout == "YX"
     assert oa.is_lazy
 
     with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
-        concrete = view_hw.collect()
-    assert concrete.layout == "HW"
+        concrete = view_yx.collect()
+    assert concrete.layout in {"YX", "HW"}
     arr = concrete.to_numpy(contiguous=True)
     assert arr.shape == (72, 84)
     assert not oa.is_lazy
@@ -180,18 +181,18 @@ def test_dlpack_roundtrip_torch(example_correct_data: dict) -> None:
     assert tensor.data_ptr() == arr.__array_interface__["data"][0]
 
 
-def test_tensor_view_layout_hw_with_first_channel_policy(
+def test_tensor_view_layout_yx_with_first_channel_policy(
     example_correct_data: dict,
 ) -> None:
-    """Allow HW layout by selecting the first channel when requested."""
+    """Allow YX layout by selecting the first channel when requested."""
     oa = OMEArrow(example_correct_data)
 
-    view_hw = oa.tensor_view(t=0, z=0, layout="HW", channel_policy="first")
-    arr_hw = view_hw.to_numpy(contiguous=False)
+    view_yx = oa.tensor_view(t=0, z=0, layout="YX", channel_policy="first")
+    arr_yx = view_yx.to_numpy(contiguous=False)
 
-    expected_chw = oa.tensor_view(t=0, z=0, layout="CHW").to_numpy(contiguous=True)
-    np.testing.assert_array_equal(arr_hw, expected_chw[0])
-    assert arr_hw.shape == (3, 4)
+    expected_cyx = oa.tensor_view(t=0, z=0, layout="CYX").to_numpy(contiguous=True)
+    np.testing.assert_array_equal(arr_yx, expected_cyx[0])
+    assert arr_yx.shape == (3, 4)
 
 
 def test_tensor_view_roi3d_selects_z_and_roi() -> None:
@@ -199,7 +200,7 @@ def test_tensor_view_roi3d_selects_z_and_roi() -> None:
     arr = np.arange(1 * 1 * 3 * 4 * 5, dtype=np.uint16).reshape(1, 1, 3, 4, 5)
     oa = OMEArrow(arr)
 
-    view = oa.tensor_view(roi3d=(1, 1, 1, 3, 2, 2), layout="TZCHW")
+    view = oa.tensor_view(roi3d=(1, 1, 1, 3, 2, 2), layout="TZCYX")
     out = view.to_numpy(contiguous=True)
 
     expected = arr[:, :, 1:3, 1:3, 1:4]
@@ -217,6 +218,59 @@ def test_tensor_view_roi3d_conflicts_with_z() -> None:
         oa.tensor_view(z=0, roi3d=(0, 0, 0, 2, 2, 1))
 
 
+def test_tensor_view_roi_nd_3d_selects_z_and_roi() -> None:
+    """Support roi_nd 3D bounds with explicit roi_type."""
+    arr = np.arange(1 * 1 * 3 * 4 * 5, dtype=np.uint16).reshape(1, 1, 3, 4, 5)
+    oa = OMEArrow(arr)
+
+    view = oa.tensor_view(roi_nd=(1, 1, 1, 3, 3, 4), roi_type="3d", layout="TZCYX")
+    out = view.to_numpy(contiguous=True)
+
+    expected = arr[:, :, 1:3, 1:3, 1:4]
+    expected = np.transpose(expected, (0, 2, 1, 3, 4))
+    np.testing.assert_array_equal(out, expected)
+    assert out.shape == (1, 2, 1, 2, 3)
+
+
+def test_tensor_view_roi_nd_2d_timelapse_selects_t_and_roi() -> None:
+    """Support roi_nd timelapse bounds with explicit roi_type."""
+    arr = np.arange(3 * 1 * 1 * 4 * 5, dtype=np.uint16).reshape(3, 1, 1, 4, 5)
+    oa = OMEArrow(arr)
+
+    view = oa.tensor_view(
+        roi_nd=(1, 1, 1, 3, 3, 4), roi_type="2d_timelapse", layout="TZCYX"
+    )
+    out = view.to_numpy(contiguous=True)
+
+    expected = arr[1:3, :, :, 1:3, 1:4]
+    expected = np.transpose(expected, (0, 2, 1, 3, 4))
+    np.testing.assert_array_equal(out, expected)
+    assert out.shape == (2, 1, 1, 2, 3)
+
+
+def test_tensor_view_roi_nd_4d_selects_t_z_and_roi() -> None:
+    """Support roi_nd 4D bounds with implicit roi_type by tuple length."""
+    arr = np.arange(3 * 1 * 4 * 5 * 6, dtype=np.uint16).reshape(3, 1, 4, 5, 6)
+    oa = OMEArrow(arr)
+
+    view = oa.tensor_view(roi_nd=(1, 1, 1, 2, 3, 3, 4, 5), layout="TZCYX")
+    out = view.to_numpy(contiguous=True)
+
+    expected = arr[1:3, :, 1:3, 1:4, 2:5]
+    expected = np.transpose(expected, (0, 2, 1, 3, 4))
+    np.testing.assert_array_equal(out, expected)
+    assert out.shape == (2, 2, 1, 3, 3)
+
+
+def test_tensor_view_roi_nd_len6_requires_roi_type() -> None:
+    """Reject ambiguous roi_nd tuples with 6 values unless roi_type is set."""
+    arr = np.arange(1 * 1 * 3 * 4 * 5, dtype=np.uint16).reshape(1, 1, 3, 4, 5)
+    oa = OMEArrow(arr)
+
+    with pytest.raises(ValueError, match="roi_nd with 6 values is ambiguous"):
+        oa.tensor_view(roi_nd=(0, 0, 0, 1, 2, 3))
+
+
 def test_dlpack_roundtrip_jax(example_correct_data: dict) -> None:
     """Round-trip DLPack export/import through JAX on CPU."""
     jnp = pytest.importorskip("jax.numpy")
@@ -263,7 +317,7 @@ def test_layout_drop_non_singleton_errors() -> None:
     """Reject layout drops when the omitted axis is non-singleton."""
     arr = np.zeros((2, 1, 1, 2, 2), dtype=np.uint16)
     oa = OMEArrow(arr)
-    view = oa.tensor_view(layout="CHW")
+    view = oa.tensor_view(layout="CYX")
     with pytest.raises(ValueError, match="drops non-singleton"):
         view.to_numpy()
 

From 1d4703c77ad5554915c2034f2dd81b7afca96642 Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Wed, 25 Feb 2026 10:40:52 -0700
Subject: [PATCH 08/10] make ome-arrow internals lazy; benchmark

---
 .github/workflows/run-tests.yml     |  84 ++++++++
 README.md                           |  26 +++
 benchmarks/benchmark_lazy_tensor.py | 322 ++++++++++++++++++++++++++++
 benchmarks/ci-baseline.json         |   7 +
 docs/src/dlpack.md                  |  38 ++++
 src/ome_arrow/core.py               |  51 ++++-
 src/ome_arrow/ingest.py             | 112 +++++++++-
 src/ome_arrow/tensor.py             | 106 ++++++++-
 tests/test_core.py                  |  14 ++
 tests/test_tensor.py                |  78 ++++++-
 10 files changed, 829 insertions(+), 9 deletions(-)
 create mode 100644 benchmarks/benchmark_lazy_tensor.py
 create mode 100644 benchmarks/ci-baseline.json

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 54d6745..5edf855 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -46,3 +46,87 @@ jobs:
         run: uv sync --frozen --extra viz --extra dlpack
       - name: Run pytest
         run: uv run --frozen pytest
+
+  benchmark_canary:
+    runs-on: ubuntu-24.04
+    env:
+      BENCH_ENFORCE: "0"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+      - name: Python setup
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@v7
+      - name: Sync dependencies
+        run: uv sync --frozen
+      - name: Run lazy tensor canary benchmark
+        run: |
+          extra_args=""
+          if [ "${BENCH_ENFORCE}" = "1" ]; then
+            extra_args="--fail-on-regression"
+          fi
+          uv run --frozen python benchmarks/benchmark_lazy_tensor.py \
+            --repeats 7 \
+            --warmup 2 \
+            --baseline-json benchmarks/ci-baseline.json \
+            --regression-factor 1.5 \
+            --absolute-slack-ms 5.0 \
+            --json-out benchmark-results.json \
+            ${extra_args}
+      - name: Write benchmark summary
+        run: |
+          python - <<'PY'
+          import json
+          from pathlib import Path
+
+          payload = json.loads(Path("benchmark-results.json").read_text())
+          lines = [
+              "## Lazy Tensor Benchmark Canary",
+              "",
+              f"- repeats: `{payload['repeats']}`",
+              f"- warmup: `{payload['warmup']}`",
+              "",
+              "| case | median (ms) | min (ms) | max (ms) |",
+              "|---|---:|---:|---:|",
+          ]
+          for r in payload["results"]:
+              lines.append(
+                  "| "
+                  f"{r['name']} | {r['median_ms']:.2f} "
+                  f"| {r['min_ms']:.2f} | {r['max_ms']:.2f} |"
+              )
+          lines.extend(
+              [
+                  "",
+                  "| case | baseline (ms) | threshold (ms) | status |",
+                  "|---|---:|---:|---|",
+              ]
+          )
+          for c in payload["regression_checks"]:
+              baseline = (
+                  "-"
+                  if c["baseline_ms"] is None
+                  else f"{c['baseline_ms']:.2f}"
+              )
+              threshold = (
+                  "-"
+                  if c["threshold_ms"] is None
+                  else f"{c['threshold_ms']:.2f}"
+              )
+              status = "regressed" if c["regressed"] else "ok"
+              lines.append(
+                  f"| {c['name']} | {baseline} | "
+                  f"{threshold} | {status} |"
+              )
+
+          summary_path = Path(__import__("os").environ["GITHUB_STEP_SUMMARY"])
+          summary_path.write_text("\n".join(lines) + "\n")
+          PY
+      - name: Upload benchmark artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: lazy-tensor-benchmark-${{ github.run_id }}
+          path: benchmark-results.json
diff --git a/README.md b/README.md
index d03c40e..ac7e1fc 100644
--- a/README.md
+++ b/README.md
@@ -120,6 +120,32 @@ Advanced options:
 
 See full docs: [`docs/src/dlpack.md`](docs/src/dlpack.md)
 
+## Benchmarking lazy reads
+
+Use the lightweight benchmark utility in `benchmarks/` to compare lazy tensor
+read paths (TIFF source-backed, Parquet planes, Parquet chunks):
+
+```bash
+uv run python benchmarks/benchmark_lazy_tensor.py --repeats 5 --warmup 1
+```
+
+Notes:
+
+- This benchmark is for local iteration and relative comparisons.
+- It is not part of CI pass/fail checks.
+- CI also runs this benchmark in a dedicated `benchmark_canary` job and
+  uploads `benchmark-results.json` as a workflow artifact.
+
+Recalibrating `benchmarks/ci-baseline.json`:
+
+1. Run the benchmark on `main` a few times (for example 3-5 runs):
+   `uv run python benchmarks/benchmark_lazy_tensor.py --repeats 7 --warmup 2 --json-out benchmark-results.json`
+1. For each case, collect the observed `median_ms` values.
+1. Update `benchmarks/ci-baseline.json` with stable medians from those runs
+   (prefer a conservative value near the slower side, not the fastest sample).
+1. Keep CI canary tolerance (`regression_factor` + `absolute_slack_ms`) unchanged
+   unless you have repeated false positives.
+
 ## Contributing, Development, and Testing
 
 Please see our [contributing documentation](https://github.com/wayscience/ome-arrow/tree/main/CONTRIBUTING.md) for more details on contributions, development, and testing.
diff --git a/benchmarks/benchmark_lazy_tensor.py b/benchmarks/benchmark_lazy_tensor.py
new file mode 100644
index 0000000..12aee96
--- /dev/null
+++ b/benchmarks/benchmark_lazy_tensor.py
@@ -0,0 +1,322 @@
+"""Lightweight benchmark for lazy tensor read paths.
+
+This script compares `OMEArrow.scan(...).tensor_view(...).to_numpy()` across:
+- TIFF source-backed lazy plane loading
+- OME-Parquet (planes payload)
+- OME-Parquet (chunked payload)
+
+It is intended as a quick local signal, not a rigorous microbenchmark.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import statistics
+import sys
+import tempfile
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable
+
+import numpy as np
+
+from ome_arrow import OMEArrow
+from ome_arrow.export import to_ome_parquet
+from ome_arrow.ingest import from_numpy
+
+
+@dataclass(frozen=True)
+class BenchmarkResult:
+    """Summary stats for one benchmark case."""
+
+    name: str
+    median_ms: float
+    min_ms: float
+    max_ms: float
+    shape: tuple[int, ...]
+
+
+@dataclass(frozen=True)
+class RegressionCheck:
+    """Regression check output for one benchmark case."""
+
+    name: str
+    baseline_ms: float | None
+    threshold_ms: float | None
+    regressed: bool
+
+
+def _time_case(
+    name: str,
+    fn: Callable[[], np.ndarray],
+    *,
+    repeats: int,
+    warmup: int,
+) -> BenchmarkResult:
+    """Run one benchmark case and return timing stats."""
+    out: np.ndarray | None = None
+    for _ in range(warmup):
+        out = fn()
+
+    times_ms: list[float] = []
+    for _ in range(repeats):
+        start = time.perf_counter()
+        out = fn()
+        end = time.perf_counter()
+        times_ms.append((end - start) * 1000.0)
+
+    if out is None:
+        raise RuntimeError("Benchmark case did not produce output.")
+    return BenchmarkResult(
+        name=name,
+        median_ms=statistics.median(times_ms),
+        min_ms=min(times_ms),
+        max_ms=max(times_ms),
+        shape=tuple(out.shape),
+    )
+
+
+def _build_parquet_fixtures(workdir: Path) -> tuple[Path, Path]:
+    """Create small planes/chunks parquet fixtures for local benchmarking."""
+    arr = np.arange(1 * 2 * 3 * 256 * 256, dtype=np.uint16).reshape(1, 2, 3, 256, 256)
+
+    planes_scalar = from_numpy(arr, build_chunks=False, image_id="bench-planes")
+    chunks_scalar = from_numpy(
+        arr,
+        build_chunks=True,
+        chunk_shape=(1, 64, 64),
+        image_id="bench-chunks",
+    )
+
+    planes_path = workdir / "bench_planes.ome.parquet"
+    chunks_path = workdir / "bench_chunks.ome.parquet"
+    to_ome_parquet(planes_scalar, out_path=str(planes_path), column_name="ome_arrow")
+    to_ome_parquet(chunks_scalar, out_path=str(chunks_path), column_name="ome_arrow")
+    return planes_path, chunks_path
+
+
+def _print_results(results: list[BenchmarkResult]) -> None:
+    """Print benchmark results in a compact table."""
+    print("")
+    print("Lazy tensor benchmark (ms)")
+    print(f"{'case':38} {'median':>10} {'min':>10} {'max':>10} {'shape':>16}")
+    print("-" * 92)
+    for r in results:
+        print(
+            f"{r.name:38} {r.median_ms:10.2f} {r.min_ms:10.2f} {r.max_ms:10.2f} {str(r.shape):>16}"
+        )
+
+
+def _load_baseline(path: Path | None) -> dict[str, float]:
+    """Load baseline medians from JSON, if provided."""
+    if path is None or not path.exists():
+        return {}
+    payload = json.loads(path.read_text())
+    cases = payload.get("cases", {})
+    return {str(k): float(v) for k, v in cases.items()}
+
+
+def _check_regressions(
+    results: list[BenchmarkResult],
+    *,
+    baseline: dict[str, float],
+    regression_factor: float,
+    absolute_slack_ms: float,
+) -> list[RegressionCheck]:
+    """Compare benchmark medians against baseline thresholds."""
+    checks: list[RegressionCheck] = []
+    for r in results:
+        baseline_ms = baseline.get(r.name)
+        if baseline_ms is None:
+            checks.append(
+                RegressionCheck(
+                    name=r.name,
+                    baseline_ms=None,
+                    threshold_ms=None,
+                    regressed=False,
+                )
+            )
+            continue
+
+        threshold_ms = baseline_ms * regression_factor + absolute_slack_ms
+        checks.append(
+            RegressionCheck(
+                name=r.name,
+                baseline_ms=baseline_ms,
+                threshold_ms=threshold_ms,
+                regressed=r.median_ms > threshold_ms,
+            )
+        )
+    return checks
+
+
+def _print_regressions(checks: list[RegressionCheck]) -> None:
+    """Print regression-comparison details."""
+    with_baseline = [c for c in checks if c.baseline_ms is not None]
+    if not with_baseline:
+        print("\nNo baseline cases configured; skipping regression checks.")
+        return
+    print("\nCanary comparison")
+    print(f"{'case':38} {'baseline':>10} {'threshold':>10} {'status':>10}")
+    print("-" * 74)
+    for c in checks:
+        if c.baseline_ms is None or c.threshold_ms is None:
+            status = "no-baseline"
+            baseline = "-"
+            threshold = "-"
+        else:
+            status = "regressed" if c.regressed else "ok"
+            baseline = f"{c.baseline_ms:.2f}"
+            threshold = f"{c.threshold_ms:.2f}"
+        print(f"{c.name:38} {baseline:>10} {threshold:>10} {status:>10}")
+
+
+def _write_json_report(
+    path: Path,
+    *,
+    results: list[BenchmarkResult],
+    checks: list[RegressionCheck],
+    repeats: int,
+    warmup: int,
+) -> None:
+    """Write a machine-readable benchmark report for CI artifacts."""
+    payload = {
+        "repeats": repeats,
+        "warmup": warmup,
+        "results": [
+            {
+                "name": r.name,
+                "median_ms": r.median_ms,
+                "min_ms": r.min_ms,
+                "max_ms": r.max_ms,
+                "shape": list(r.shape),
+            }
+            for r in results
+        ],
+        "regression_checks": [
+            {
+                "name": c.name,
+                "baseline_ms": c.baseline_ms,
+                "threshold_ms": c.threshold_ms,
+                "regressed": c.regressed,
+            }
+            for c in checks
+        ],
+    }
+    path.write_text(json.dumps(payload, indent=2))
+
+
+def main() -> None:
+    """Run lightweight lazy tensor benchmarks."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--repeats", type=int, default=5, help="Timed iterations.")
+    parser.add_argument("--warmup", type=int, default=1, help="Warmup iterations.")
+    parser.add_argument(
+        "--json-out",
+        type=Path,
+        default=None,
+        help="Optional JSON output path for CI/reporting.",
+    )
+    parser.add_argument(
+        "--baseline-json",
+        type=Path,
+        default=None,
+        help="Optional baseline JSON with {'cases': {'case-name': median_ms}}.",
+    )
+    parser.add_argument(
+        "--regression-factor",
+        type=float,
+        default=1.25,
+        help="Allowed multiplicative slowdown vs baseline.",
+    )
+    parser.add_argument(
+        "--absolute-slack-ms",
+        type=float,
+        default=2.0,
+        help="Allowed absolute slowdown slack in ms.",
+    )
+    parser.add_argument(
+        "--fail-on-regression",
+        action="store_true",
+        help="Exit non-zero when any case exceeds regression threshold.",
+    )
+    parser.add_argument(
+        "--tiff-path",
+        type=Path,
+        default=Path("tests/data/ome-artificial-5d-datasets/single-channel.ome.tiff"),
+        help="TIFF file used for the source-backed lazy case.",
+    )
+    args = parser.parse_args()
+
+    if args.repeats <= 0:
+        raise ValueError("--repeats must be > 0.")
+    if args.warmup < 0:
+        raise ValueError("--warmup must be >= 0.")
+    if args.regression_factor <= 0:
+        raise ValueError("--regression-factor must be > 0.")
+    if args.absolute_slack_ms < 0:
+        raise ValueError("--absolute-slack-ms must be >= 0.")
+
+    with tempfile.TemporaryDirectory(prefix="ome_arrow_lazy_bench_") as tmp:
+        tmpdir = Path(tmp)
+        planes_path, chunks_path = _build_parquet_fixtures(tmpdir)
+
+        cases: list[tuple[str, Callable[[], np.ndarray]]] = []
+        if args.tiff_path.exists():
+            cases.append(
+                (
+                    "scan+tiff -> tensor_view(YX)",
+                    lambda: OMEArrow.scan(str(args.tiff_path))
+                    .tensor_view(t=0, z=0, c=0, layout="YX")
+                    .to_numpy(contiguous=True),
+                )
+            )
+        else:
+            print(f"Skipping TIFF case; file not found: {args.tiff_path}")
+
+        cases.extend(
+            [
+                (
+                    "scan+parquet(planes) -> tensor_view(YX)",
+                    lambda: OMEArrow.scan(str(planes_path))
+                    .tensor_view(t=0, z=1, c=1, layout="YX")
+                    .to_numpy(contiguous=True),
+                ),
+                (
+                    "scan+parquet(chunks) -> tensor_view(YX)",
+                    lambda: OMEArrow.scan(str(chunks_path))
+                    .tensor_view(t=0, z=1, c=1, layout="YX")
+                    .to_numpy(contiguous=True),
+                ),
+            ]
+        )
+
+        results = [
+            _time_case(name, fn, repeats=args.repeats, warmup=args.warmup)
+            for name, fn in cases
+        ]
+        _print_results(results)
+        baseline = _load_baseline(args.baseline_json)
+        checks = _check_regressions(
+            results,
+            baseline=baseline,
+            regression_factor=args.regression_factor,
+            absolute_slack_ms=args.absolute_slack_ms,
+        )
+        _print_regressions(checks)
+        if args.json_out is not None:
+            _write_json_report(
+                args.json_out,
+                results=results,
+                checks=checks,
+                repeats=args.repeats,
+                warmup=args.warmup,
+            )
+        if args.fail_on_regression and any(c.regressed for c in checks):
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/ci-baseline.json b/benchmarks/ci-baseline.json
new file mode 100644
index 0000000..c720277
--- /dev/null
+++ b/benchmarks/ci-baseline.json
@@ -0,0 +1,7 @@
+{
+  "cases": {
+    "scan+tiff -> tensor_view(YX)": 20.0,
+    "scan+parquet(planes) -> tensor_view(YX)": 12.0,
+    "scan+parquet(chunks) -> tensor_view(YX)": 12.0
+  }
+}
diff --git a/docs/src/dlpack.md b/docs/src/dlpack.md
index 86f19ed..0b99cf9 100644
--- a/docs/src/dlpack.md
+++ b/docs/src/dlpack.md
@@ -55,6 +55,10 @@ cropped = lazy_crop.collect()
 # Then execute tensor selections on the sliced result.
 tensor_view = cropped.tensor_view(t=0, z=slice(0, 8), roi=(64, 64, 128, 128))
 arr = tensor_view.to_numpy()
+
+# Note: executing a LazyTensorView from OMEArrow.scan(...) does not
+# materialize the original OMEArrow object itself.
+# Call obj.collect() explicitly if you need to materialize `obj`.
 ```
 
 ## JAX
@@ -132,3 +136,37 @@ pip install "ome-arrow[dlpack-torch]"  # torch only
 pip install "ome-arrow[dlpack-jax]"    # jax only
 pip install "ome-arrow[dlpack]"        # both
 ```
+
+## Benchmarking lazy reads
+
+To quickly compare lazy tensor read paths (TIFF source-backed execution,
+Parquet planes, Parquet chunks), run:
+
+```bash
+uv run python benchmarks/benchmark_lazy_tensor.py --repeats 5 --warmup 1
+```
+
+This is a lightweight local benchmark intended for directional performance
+checks during development.
+
+In CI, the `tests` workflow runs a `benchmark_canary` job that executes the
+same script and uploads a JSON report artifact.
+
+### Recalibrating `ci-baseline.json`
+
+When performance changes are intentional (or runner behavior shifts), update
+`benchmarks/ci-baseline.json` as follows:
+
+1. Check out the latest `main`.
+1. Run the benchmark multiple times:
+   `uv run python benchmarks/benchmark_lazy_tensor.py --repeats 7 --warmup 2 --json-out benchmark-results.json`
+1. Record `median_ms` per case across runs.
+1. Set each baseline value to a stable, slightly conservative median.
+1. Open a PR that updates baseline values only, with benchmark evidence.
+
+Expected variability:
+
+- Small fluctuations are normal on GitHub-hosted runners.
+- Relative ordering of cases is usually stable.
+- Typical drift should be modest, but occasional jumps can happen due to
+  runner image or dependency changes.
diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py
index 207b708..6a3b5ea 100644
--- a/src/ome_arrow/core.py
+++ b/src/ome_arrow/core.py
@@ -35,6 +35,7 @@
     from_ome_zarr,
     from_stack_pattern_path,
     from_tiff,
+    open_lazy_plane_source,
 )
 from ome_arrow.meta import OME_ARROW_STRUCT
 from ome_arrow.tensor import LazyTensorView, TensorView
@@ -366,6 +367,51 @@ def _tensor_source(self) -> pa.StructScalar | pa.StructArray:
             raise RuntimeError("OMEArrow data is not initialized.")
         return self._data
 
+    def _resolve_lazy_tensor_view(self, view_kwargs: dict[str, Any]) -> TensorView:
+        """Resolve a lazy tensor view plan without mutating this OMEArrow state.
+
+        Args:
+            view_kwargs: TensorView constructor kwargs captured by LazyTensorView.
+
+        Returns:
+            TensorView: Concrete tensor view for the planned selection.
+        """
+        # Deferred slice plans rely on slice_ome_arrow over a materialized scalar;
+        # keep the existing behavior for those plans.
+        if self._lazy_slices:
+            self._ensure_materialized()
+            return TensorView(self._tensor_source(), **view_kwargs)
+
+        if self._lazy_source is None:
+            return TensorView(self._tensor_source(), **view_kwargs)
+
+        lazy_source = self._lazy_source
+        lazy_plane_source = open_lazy_plane_source(lazy_source.data)
+        if lazy_plane_source is not None:
+            pixels_meta, plane_loader = lazy_plane_source
+            lazy_record = {
+                "id": None,
+                "name": None,
+                "image_type": lazy_source.image_type,
+                "acquisition_datetime": None,
+                "pixels_meta": pixels_meta,
+                "channels": [],
+                "planes": [],
+                "masks": [],
+                "chunk_grid": None,
+                "chunks": [],
+            }
+            return TensorView(lazy_record, plane_loader=plane_loader, **view_kwargs)
+
+        scalar, struct_array = self._load_from_string_source(
+            lazy_source.data,
+            column_name=lazy_source.column_name,
+            row_index=lazy_source.row_index,
+            image_type=lazy_source.image_type,
+        )
+        source = struct_array if struct_array is not None else scalar
+        return TensorView(source, **view_kwargs)
+
     @staticmethod
     def _wrap_with_image_type(
         data: pa.StructScalar, image_type: str
@@ -739,7 +785,9 @@ def tensor_view(
         Returns:
             TensorView | LazyTensorView: Tensor view over selected pixels.
             In lazy mode, this returns a deferred ``LazyTensorView`` that
-            materializes on first execution call (for example ``to_numpy()``).
+            resolves on first execution call (for example ``to_numpy()``)
+            without forcing ``self`` to materialize unless deferred
+            ``slice_lazy`` operations are queued.
 
         Raises:
             ValueError: If an unsupported scene is requested.
@@ -751,6 +799,7 @@ def tensor_view(
         if self._lazy_source is not None:
             return LazyTensorView(
                 loader=self._tensor_source,
+                resolver=self._resolve_lazy_tensor_view,
                 t=t,
                 z=z,
                 c=c,
diff --git a/src/ome_arrow/ingest.py b/src/ome_arrow/ingest.py
index 93d5d72..38643f8 100644
--- a/src/ome_arrow/ingest.py
+++ b/src/ome_arrow/ingest.py
@@ -8,7 +8,7 @@
 import warnings
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
 
 import bioio_ome_tiff
 import bioio_tifffile
@@ -200,6 +200,75 @@ def _read_physical_pixel_sizes(
     return psize_x, psize_y, psize_z, unit, True
 
 
+def open_lazy_plane_source(
+    source: str,
+) -> tuple[dict[str, Any], Callable[[int, int, int], np.ndarray]] | None:
+    """Open a source-backed per-plane loader for lazy tensor execution.
+
+    Args:
+        source: Input path/URL string for TIFF or OME-Zarr sources.
+
+    Returns:
+        A tuple of ``(pixels_meta, plane_loader)`` when source-backed lazy plane
+        loading is supported for ``source``; otherwise ``None``.
+    """
+    s = source.strip()
+    path = Path(s)
+    lower = s.lower()
+
+    if path.suffix.lower() in {".tif", ".tiff"} or lower.endswith((".tif", ".tiff")):
+        img = BioImage(
+            image=str(path),
+            reader=(
+                bioio_ome_tiff.Reader
+                if str(path).lower().endswith(("ome.tif", "ome.tiff"))
+                else bioio_tifffile.Reader
+            ),
+        )
+    elif (
+        lower.endswith(".zarr")
+        or lower.endswith(".ome.zarr")
+        or ".zarr/" in lower
+        or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr")
+    ):
+        img = BioImage(image=str(path), reader=OMEZarrReader)
+    else:
+        return None
+
+    dims = img.dims
+    size_t = int(dims.T or 1)
+    size_c = int(dims.C or 1)
+    size_z = int(dims.Z or 1)
+    size_y = int(dims.Y or 0)
+    size_x = int(dims.X or 0)
+    if size_x <= 0 or size_y <= 0:
+        sample = np.asarray(img.get_image_data("YX", T=0, C=0, Z=0))
+        size_y, size_x = int(sample.shape[-2]), int(sample.shape[-1])
+
+    dim_order = "XYCT" if size_z == 1 else "XYZCT"
+    pixels_meta = {
+        "dimension_order": dim_order,
+        "type": "uint16",
+        "size_x": size_x,
+        "size_y": size_y,
+        "size_z": size_z,
+        "size_c": size_c,
+        "size_t": size_t,
+        "physical_size_x": None,
+        "physical_size_y": None,
+        "physical_size_z": None,
+        "physical_size_unit": None,
+    }
+
+    def _plane_loader(t: int, z: int, c: int) -> np.ndarray:
+        plane = np.asarray(img.get_image_data("YX", T=t, C=c, Z=z))
+        if plane.dtype != np.uint16:
+            plane = np.clip(plane, 0, 65535).astype(np.uint16)
+        return plane
+
+    return pixels_meta, _plane_loader
+
+
 def _load_zarr_attrs(zarr_path: Path) -> dict:
     zarr_json = zarr_path / "zarr.json"
     if zarr_json.exists():
@@ -1306,16 +1375,53 @@ def from_ome_parquet(
     Raises:
         FileNotFoundError: If the Parquet path does not exist.
         ValueError: If the row index is out of range or no suitable column exists.
+
+    Notes:
+        This reader targets the row group containing ``row_index`` and requests
+        only ``column_name`` when provided, avoiding eager full-table reads.
     """
     p = Path(parquet_path)
     if not p.exists():
         raise FileNotFoundError(f"No such file: {p}")
 
-    table = pq.read_table(p)
+    parquet_file = pq.ParquetFile(p)
+    metadata = parquet_file.metadata
+    if metadata is None or metadata.num_rows == 0:
+        raise ValueError("Table contains 0 rows; expected at least 1.")
+    if not (0 <= row_index < metadata.num_rows):
+        raise ValueError(
+            f"row_index {row_index} out of range [0, {metadata.num_rows})."
+        )
+
+    row_group_index = 0
+    row_index_in_group = row_index
+    for i in range(metadata.num_row_groups):
+        group_rows = metadata.row_group(i).num_rows
+        if row_index_in_group < group_rows:
+            row_group_index = i
+            break
+        row_index_in_group -= group_rows
+
+    requested_columns = [column_name] if column_name is not None else None
+    try:
+        table = parquet_file.read_row_group(row_group_index, columns=requested_columns)
+    except (KeyError, ValueError, pa.ArrowInvalid):
+        if requested_columns is None:
+            raise
+        # If the requested column is unavailable in the row group read path, fall
+        # back to all columns so downstream auto-detection/warnings remain intact.
+        table = parquet_file.read_row_group(row_group_index)
+    else:
+        if requested_columns is not None and column_name not in table.column_names:
+            # Some parquet backends return an empty projected table when a column
+            # is missing rather than raising. Retry with full row-group columns so
+            # _ome_arrow_from_table can auto-detect and emit the usual warning.
+            table = parquet_file.read_row_group(row_group_index)
+
     return _ome_arrow_from_table(
         table,
         column_name=column_name,
-        row_index=row_index,
+        row_index=row_index_in_group,
         strict_schema=strict_schema,
         return_array=return_array,
     )
diff --git a/src/ome_arrow/tensor.py b/src/ome_arrow/tensor.py
index b52f489..2c7813e 100644
--- a/src/ome_arrow/tensor.py
+++ b/src/ome_arrow/tensor.py
@@ -58,6 +58,7 @@ def __init__(
             [],
             dict[str, Any] | pa.StructScalar | pa.StructArray | pa.ChunkedArray,
         ],
+        resolver: Callable[[dict[str, Any]], "TensorView"] | None = None,
         t: int | slice | Sequence[int] | None = None,
         z: int | slice | Sequence[int] | None = None,
         c: int | slice | Sequence[int] | None = None,
@@ -75,6 +76,8 @@ def __init__(
 
         Args:
             loader: Callable that returns concrete OME-Arrow pixel data.
+            resolver: Optional callable that resolves this lazy plan directly to
+                a concrete ``TensorView`` using current selection kwargs.
             t: Time index selection.
             z: Depth index selection.
             c: Channel index selection.
@@ -90,6 +93,7 @@ def __init__(
             channel_policy: Behavior when dropping ``C`` from layout.
         """
         self._loader = loader
+        self._resolver = resolver
         self._kwargs: dict[str, Any] = {
             "t": t,
             "z": z,
@@ -110,7 +114,7 @@ def __init__(
     def _spawn(self, **updates: Any) -> "LazyTensorView":
         kwargs = dict(self._kwargs)
         kwargs.update(updates)
-        return LazyTensorView(loader=self._loader, **kwargs)
+        return LazyTensorView(loader=self._loader, resolver=self._resolver, **kwargs)
 
     def collect(self) -> "TensorView":
         """Materialize this lazy plan into a concrete TensorView."""
@@ -121,7 +125,10 @@ def collect(self) -> "TensorView":
         with self._collect_lock:
             resolved = self._resolved
             if resolved is None:
-                resolved = TensorView(self._loader(), **self._kwargs)
+                if self._resolver is not None:
+                    resolved = self._resolver(dict(self._kwargs))
+                else:
+                    resolved = TensorView(self._loader(), **self._kwargs)
                 self._resolved = resolved
         return resolved
 
@@ -400,6 +407,7 @@ def __init__(
         self,
         data: dict[str, Any] | pa.StructScalar | pa.StructArray | pa.ChunkedArray,
         *,
+        plane_loader: Callable[[int, int, int], np.ndarray] | None = None,
         t: int | slice | Sequence[int] | None = None,
         z: int | slice | Sequence[int] | None = None,
         c: int | slice | Sequence[int] | None = None,
@@ -417,6 +425,9 @@ def __init__(
 
         Args:
             data: OME-Arrow record as dict/StructScalar/StructArray/ChunkedArray.
+            plane_loader: Optional callback that returns one YX plane for
+                ``(t, z, c)``. When provided, per-plane reads use this loader
+                instead of ``planes``/``chunks`` payloads in ``data``.
             t: Time index selection (int, slice, or sequence). Default: all.
             z: Z index selection (int, slice, or sequence). Default: all.
             c: Channel index selection (int, slice, or sequence). Default: all.
@@ -464,6 +475,7 @@ def __init__(
         # Keep normalized backing data so child TensorViews do not repeatedly
         # combine chunked Arrow arrays during iteration.
         self._data = data
+        self._plane_loader = plane_loader
         self._layout_override = _normalize_layout(layout) if layout else None
         self._channel_policy = _normalize_channel_policy(channel_policy)
 
@@ -540,6 +552,7 @@ def with_layout(self, layout: str) -> "TensorView":
 
         return TensorView(
             self._data,
+            plane_loader=self._plane_loader,
             t=self._selection.t,
             z=self._selection.z,
             c=self._selection.c,
@@ -792,6 +805,7 @@ def iter_tiles_3d(
             t, z_batch, x, y, w, h = batch[0]
             view = TensorView(
                 self._data,
+                plane_loader=self._plane_loader,
                 t=[t],
                 z=list(z_batch),
                 c=self._selection.c,
@@ -828,6 +842,7 @@ def _iter_batches(
         for batch in _batched(t_indices, batch_size, prefetch=prefetch):
             view = TensorView(
                 self._data,
+                plane_loader=self._plane_loader,
                 t=batch,
                 z=self._selection.z,
                 c=self._selection.c,
@@ -870,6 +885,7 @@ def _iter_tiles(
             x, y, w, h = batch[0]
             view = TensorView(
                 self._data,
+                plane_loader=self._plane_loader,
                 t=self._selection.t,
                 z=self._selection.z,
                 c=self._selection.c,
@@ -912,6 +928,9 @@ def _build_tzchw(self) -> np.ndarray:
     def _plane_map(self) -> dict[tuple[int, int, int], list[Any]]:
         if self._has_chunks():
             return {}
+        if self._struct_array is not None:
+            # Arrow-backed plane reads use _select_plane_values in _read_plane.
+            return {}
         plane_map = {}
         data_py = self._data_py_dict()
         for plane in data_py.get("planes", []):
@@ -927,6 +946,30 @@ def _read_plane(
         z: int,
         c: int,
     ) -> np.ndarray:
+        if self._plane_loader is not None:
+            arr = np.asarray(self._plane_loader(t, z, c), dtype=self._dtype)
+            return arr.reshape(self._size_y, self._size_x)
+
+        if self._struct_array is not None and not self._has_chunks():
+            values = _select_plane_values(self._struct_array, t=t, z=z, c=c)
+            arr = values.to_numpy(zero_copy_only=False)
+            return np.asarray(arr, dtype=self._dtype).reshape(
+                self._size_y, self._size_x
+            )
+
+        if self._struct_array is not None and self._has_chunks():
+            chunk_order = str(self._chunk_grid().get("chunk_order") or "ZYX").upper()
+            return _plane_from_chunks_arrow(
+                self._struct_array,
+                t=t,
+                z=z,
+                c=c,
+                size_x=self._size_x,
+                size_y=self._size_y,
+                dtype=self._dtype,
+                chunk_order=chunk_order,
+            )
+
         if self._has_chunks():
             data_py = self._data_py_dict()
             return plane_from_chunks(data_py, t=t, z=z, c=c, dtype=self._dtype)
@@ -1553,3 +1596,62 @@ def _select_chunk_values(
 
     pixels_list = selected.field("pixels")[0]
     return pixels_list.values
+
+
+def _plane_from_chunks_arrow(
+    struct_arr: pa.StructArray,
+    *,
+    t: int,
+    z: int,
+    c: int,
+    size_x: int,
+    size_y: int,
+    dtype: np.dtype,
+    chunk_order: str = "ZYX",
+) -> np.ndarray:
+    """Reconstruct one YX plane from Arrow chunk rows without Python dict casts."""
+    if chunk_order != "ZYX":
+        raise ValueError("Only chunk_order='ZYX' is supported for now.")
+
+    chunks_arr = struct_arr.field("chunks")
+    if len(chunks_arr) == 0 or chunks_arr.is_null().to_pylist()[0]:
+        return np.zeros((size_y, size_x), dtype=dtype)
+
+    chunks = chunks_arr[0].values
+    z_field = chunks.field("z")
+    sz_field = chunks.field("shape_z")
+    z_end = pc.add(z_field, sz_field)
+    mask = pc.and_(
+        pc.equal(chunks.field("t"), t),
+        pc.and_(
+            pc.equal(chunks.field("c"), c),
+            pc.and_(pc.less_equal(z_field, z), pc.greater(z_end, z)),
+        ),
+    )
+    selected = pc.filter(chunks, mask)
+
+    out = np.zeros((size_y, size_x), dtype=dtype)
+    for i in range(len(selected)):
+        z0 = int(selected.field("z")[i].as_py())
+        y0 = int(selected.field("y")[i].as_py())
+        x0 = int(selected.field("x")[i].as_py())
+        sz = int(selected.field("shape_z")[i].as_py())
+        sy = int(selected.field("shape_y")[i].as_py())
+        sx = int(selected.field("shape_x")[i].as_py())
+        if y0 < 0 or x0 < 0 or sz <= 0 or sy <= 0 or sx <= 0:
+            raise ValueError("Chunk has invalid shape or origin.")
+        if z0 + sz <= z or z0 > z:
+            continue
+        if y0 + sy > size_y or x0 + sx > size_x:
+            raise ValueError("Chunk extent out of range.")
+
+        pixels_values = selected.field("pixels")[i].values
+        pix = np.asarray(pixels_values.to_numpy(zero_copy_only=False), dtype=dtype)
+        expected_len = sz * sy * sx
+        if pix.size != expected_len:
+            raise ValueError(
+                f"Chunk pixels length {pix.size} != expected {expected_len}."
+            )
+        arr3d = pix.reshape(sz, sy, sx)
+        out[y0 : y0 + sy, x0 : x0 + sx] = arr3d[z - z0]
+    return out
diff --git a/tests/test_core.py b/tests/test_core.py
index f85f5c0..bb15a43 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pytest
 
+from ome_arrow import ingest
 from ome_arrow.core import OMEArrow
 
 
@@ -421,6 +422,19 @@ def test_scan_collect_roundtrip() -> None:
     assert oa.info()["shape"] == (1, 1, 1, 72, 84)
 
 
+def test_parquet_read_avoids_full_table_scan(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Use row-group reads instead of eager pq.read_table for parquet ingest."""
+    path = "tests/data/JUMP-BR00117006/BR00117006.ome.parquet"
+
+    def _fail_read_table(*_args: object, **_kwargs: object) -> None:
+        raise AssertionError("pq.read_table should not be used for from_ome_parquet")
+
+    monkeypatch.setattr(ingest.pq, "read_table", _fail_read_table)
+    with pytest.warns(UserWarning, match="Requested column 'ome_arrow'"):
+        oa = OMEArrow(path)
+    assert oa.info()["shape"] == (1, 1, 1, 72, 84)
+
+
 @pytest.mark.parametrize(
     ("input_data", "expected_shape"),
     [
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
index 4f7686c..cbde9e4 100644
--- a/tests/test_tensor.py
+++ b/tests/test_tensor.py
@@ -7,6 +7,7 @@
 import pyarrow.compute as pc
 import pytest
 
+import ome_arrow.core as core_module
 from ome_arrow import OMEArrow
 from ome_arrow.export import to_ome_parquet
 from ome_arrow.meta import OME_ARROW_STRUCT
@@ -100,7 +101,7 @@ def test_lazy_tensor_view_collects_on_execution(
 
     arr = view.to_numpy(contiguous=True)
     assert arr.shape == (72, 84)
-    assert not oa.is_lazy
+    assert oa.is_lazy
     if recwarn:
         assert any(
             "Requested column 'ome_arrow'" in str(w.message) for w in recwarn.list
@@ -133,7 +134,7 @@ def test_lazy_tensor_view_select_preserves_existing_dims() -> None:
 
     arr = view_z.to_numpy(contiguous=True)
     assert arr.shape == (1, 167, 439)
-    assert not oa.is_lazy
+    assert oa.is_lazy
 
 
 def test_lazy_tensor_view_with_layout_defers_materialization() -> None:
@@ -153,7 +154,7 @@ def test_lazy_tensor_view_with_layout_defers_materialization() -> None:
     assert concrete.layout in {"YX", "HW"}
     arr = concrete.to_numpy(contiguous=True)
     assert arr.shape == (72, 84)
-    assert not oa.is_lazy
+    assert oa.is_lazy
 
 
 def test_dlpack_roundtrip_torch(example_correct_data: dict) -> None:
@@ -446,6 +447,77 @@ def test_arrow_mode_zero_copy_parquet_jax(
         pytest.skip("JAX did not preserve zero-copy for pyarrow DLPack.")
 
 
+def test_scan_tensor_view_to_numpy_avoids_python_record_materialization(
+    tmp_path: pathlib.Path,
+    example_correct_data: dict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Read selected planes from Arrow without building a full Python record."""
+    out = tmp_path / "lazy_arrow_plane.parquet"
+    record = dict(example_correct_data)
+    record["chunk_grid"] = None
+    record["chunks"] = []
+    to_ome_parquet(record, out_path=str(out), column_name="ome_arrow")
+
+    oa = OMEArrow.scan(str(out))
+
+    def _fail_data_py_dict(self: TensorView) -> dict:
+        raise AssertionError("_data_py_dict should not be used for Arrow plane reads")
+
+    monkeypatch.setattr(TensorView, "_data_py_dict", _fail_data_py_dict)
+    arr = oa.tensor_view(t=0, z=0, c=0, layout="YX").to_numpy(contiguous=True)
+
+    assert arr.shape == (3, 4)
+    assert oa.is_lazy
+
+
+def test_scan_chunked_parquet_tensor_view_avoids_python_record_materialization(
+    tmp_path: pathlib.Path,
+    example_correct_data: dict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Read chunked parquet planes through Arrow path without _data_py_dict."""
+    out = tmp_path / "lazy_chunked.parquet"
+    to_ome_parquet(example_correct_data, out_path=str(out), column_name="ome_arrow")
+
+    oa = OMEArrow.scan(str(out))
+
+    def _fail_data_py_dict(self: TensorView) -> dict:
+        raise AssertionError(
+            "_data_py_dict should not be used for Arrow chunked plane reads"
+        )
+
+    monkeypatch.setattr(TensorView, "_data_py_dict", _fail_data_py_dict)
+    arr = oa.tensor_view(t=0, z=0, c=1, layout="YX").to_numpy(contiguous=True)
+
+    expected = np.array(
+        [[100, 101, 102, 103], [110, 111, 112, 113], [120, 121, 122, 123]],
+        dtype=np.uint16,
+    )
+    np.testing.assert_array_equal(arr, expected)
+    assert oa.is_lazy
+
+
+@pytest.mark.filterwarnings(
+    "ignore:As of version 0.4.0, the parser argument is ignored.*:DeprecationWarning"
+)
+def test_scan_tiff_tensor_view_uses_source_plane_loader(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Execute lazy TIFF tensor views without calling eager from_tiff ingestion."""
+    path = "tests/data/ome-artificial-5d-datasets/single-channel.ome.tiff"
+    oa = OMEArrow.scan(path)
+
+    def _fail_from_tiff(*_args: object, **_kwargs: object) -> None:
+        raise AssertionError("from_tiff should not be used for lazy tensor execution")
+
+    monkeypatch.setattr(core_module, "from_tiff", _fail_from_tiff)
+    arr = oa.tensor_view(t=0, z=0, c=0, layout="YX").to_numpy(contiguous=True)
+
+    assert arr.shape == (167, 439)
+    assert oa.is_lazy
+
+
 def _jax_buffer_ptr(arr: object) -> int:
     """Return a best-effort device pointer for a JAX array.
 

From beff939f405c8f2db9d5ce115327bf04b6464bf6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Wed, 25 Feb 2026 17:42:39 +0000
Subject: [PATCH 09/10] [pre-commit.ci lite] apply automatic fixes

---
 benchmarks/benchmark_lazy_tensor.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/benchmarks/benchmark_lazy_tensor.py b/benchmarks/benchmark_lazy_tensor.py
index 12aee96..bce17cb 100644
--- a/benchmarks/benchmark_lazy_tensor.py
+++ b/benchmarks/benchmark_lazy_tensor.py
@@ -105,7 +105,7 @@ def _print_results(results: list[BenchmarkResult]) -> None:
     print("-" * 92)
     for r in results:
         print(
-            f"{r.name:38} {r.median_ms:10.2f} {r.min_ms:10.2f} {r.max_ms:10.2f} {str(r.shape):>16}"
+            f"{r.name:38} {r.median_ms:10.2f} {r.min_ms:10.2f} {r.max_ms:10.2f} {r.shape!s:>16}"
         )
 
 
@@ -268,9 +268,11 @@ def main() -> None:
             cases.append(
                 (
                     "scan+tiff -> tensor_view(YX)",
-                    lambda: OMEArrow.scan(str(args.tiff_path))
-                    .tensor_view(t=0, z=0, c=0, layout="YX")
-                    .to_numpy(contiguous=True),
+                    lambda: (
+                        OMEArrow.scan(str(args.tiff_path))
+                        .tensor_view(t=0, z=0, c=0, layout="YX")
+                        .to_numpy(contiguous=True)
+                    ),
                 )
             )
         else:
@@ -280,15 +282,19 @@ def main() -> None:
             [
                 (
                     "scan+parquet(planes) -> tensor_view(YX)",
-                    lambda: OMEArrow.scan(str(planes_path))
-                    .tensor_view(t=0, z=1, c=1, layout="YX")
-                    .to_numpy(contiguous=True),
+                    lambda: (
+                        OMEArrow.scan(str(planes_path))
+                        .tensor_view(t=0, z=1, c=1, layout="YX")
+                        .to_numpy(contiguous=True)
+                    ),
                 ),
                 (
                     "scan+parquet(chunks) -> tensor_view(YX)",
-                    lambda: OMEArrow.scan(str(chunks_path))
-                    .tensor_view(t=0, z=1, c=1, layout="YX")
-                    .to_numpy(contiguous=True),
+                    lambda: (
+                        OMEArrow.scan(str(chunks_path))
+                        .tensor_view(t=0, z=1, c=1, layout="YX")
+                        .to_numpy(contiguous=True)
+                    ),
                 ),
             ]
         )

From 0d0596ccde02dec02b8bfa8b582a496743ebaedb Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Wed, 25 Feb 2026 10:55:10 -0700
Subject: [PATCH 10/10] lint

---
 benchmarks/benchmark_lazy_tensor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_lazy_tensor.py b/benchmarks/benchmark_lazy_tensor.py
index bce17cb..7abcc72 100644
--- a/benchmarks/benchmark_lazy_tensor.py
+++ b/benchmarks/benchmark_lazy_tensor.py
@@ -105,7 +105,8 @@ def _print_results(results: list[BenchmarkResult]) -> None:
     print("-" * 92)
     for r in results:
         print(
-            f"{r.name:38} {r.median_ms:10.2f} {r.min_ms:10.2f} {r.max_ms:10.2f} {r.shape!s:>16}"
+            f"{r.name:38} {r.median_ms:10.2f} "
+            f"{r.min_ms:10.2f} {r.max_ms:10.2f} {r.shape!s:>16}"
         )