From 26e5153574da7b3d3a4e1749c0d960ca7547e9a6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 14 May 2026 20:32:04 -0700 Subject: [PATCH] geotiff: extract GPU helpers to _backends/_gpu_helpers.py (#1884) Step 6 of #1813's multi-PR refactor of __init__.py. Pure code motion; no public API change. First PR to create the _backends/ subpackage. Created xrspatial/geotiff/_backends/__init__.py (empty placeholder) and moved into _backends/_gpu_helpers.py: - _is_gpu_data: cupy-backed array / DataArray detection. - _apply_nodata_mask_gpu: cupy mirror of the CPU sentinel-to-NaN mask (handles float and integer arrays, with the same out-of-range / fractional / non-finite skips as _resolve_masked_fill in _reader.py). - _gpu_decode_single_band_tiles: two-stage GPU decode (GDS then CPU-extracted bytes) for the PlanarConfiguration=2 path. - _apply_orientation_gpu: cupy mirror of the CPU orientation-flip helper for the eight TIFF Orientation tag values. - _apply_orientation_geo_info: GeoTransform update after an orientation flip, with the #1765 refusal for georeferenced orientations 5-8. - _gpu_apply_window_band: post-decode window + band slice on device. Every helper lazy-imports cupy at call time so the module imports cleanly on numpy-only environments. __init__.py re-imports the helpers from _backends._gpu_helpers so existing callers (read_geotiff_gpu, to_geotiff's GPU branch, write_geotiff_gpu) keep their identical signatures. Leading-underscore module name (_gpu_helpers.py) keeps _backends/gpu.py free for the read_geotiff_gpu entry-point body in step 7. Net __init__.py change: 4271 -> 3969 lines (-302). _gpu_helpers.py is 335 lines. Verification: pixel-parity matrix from #1889 (192 cells, all GPU cells included), runtime identity (9), GPU byteswap (#1508), overview nodata inheritance (#1739) -- 230/230 pass. Refs #1813. --- xrspatial/geotiff/__init__.py | 318 +------------------ xrspatial/geotiff/_backends/__init__.py | 8 + xrspatial/geotiff/_backends/_gpu_helpers.py | 335 ++++++++++++++++++++ 3 files changed, 351 insertions(+), 310 deletions(-) create mode 100644 xrspatial/geotiff/_backends/__init__.py create mode 100644 xrspatial/geotiff/_backends/_gpu_helpers.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 2d3fa45f..f0b4cde8 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -68,6 +68,14 @@ _populate_attrs_from_geo_info, _resolve_nodata_attr, ) +from ._backends._gpu_helpers import ( + _apply_nodata_mask_gpu, + _apply_orientation_geo_info, + _apply_orientation_gpu, + _gpu_apply_window_band, + _gpu_decode_single_band_tiles, + _is_gpu_data, +) from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg from ._reader import read_to_array as _read_to_array from ._runtime import ( @@ -503,76 +511,6 @@ def open_geotiff(source: str | BinaryIO, *, return da -def _is_gpu_data(data) -> bool: - """Check if data is CuPy-backed (raw array or DataArray).""" - try: - import cupy - _cupy_type = cupy.ndarray - except ImportError: - return False - - if isinstance(data, xr.DataArray): - raw = data.data - if hasattr(raw, 'compute'): - meta = getattr(raw, '_meta', None) - return isinstance(meta, _cupy_type) - return isinstance(raw, _cupy_type) - return isinstance(data, _cupy_type) - - -def _apply_nodata_mask_gpu(arr_gpu, nodata): - """Replace nodata sentinel values with NaN on a CuPy array. - - Mirrors the CPU eager path in :func:`open_geotiff` (lines around the - ``# Apply nodata mask`` comment). Float arrays get NaN substituted in - place of the sentinel; integer arrays are promoted to float64 with NaN - where the sentinel was. NaN nodata on a float array is a no-op (the - sentinel already matches NaN-aware code). - - Returns the (possibly promoted, possibly nodata-masked) CuPy array. - The caller is responsible for setting ``attrs['nodata']`` so the - sentinel is still discoverable downstream. - """ - import cupy - - if nodata is None: - return arr_gpu - arr_dtype = np.dtype(str(arr_gpu.dtype)) - if arr_dtype.kind == 'f': - if not np.isnan(nodata): - sentinel = arr_dtype.type(nodata) - arr_gpu = cupy.where(arr_gpu == sentinel, - arr_dtype.type('nan'), arr_gpu) - return arr_gpu - if arr_dtype.kind in ('u', 'i'): - # Out-of-range sentinels (e.g. uint16 + GDAL_NODATA="-9999") cannot - # match any decoded pixel; skip the cast that would otherwise raise - # OverflowError. A non-finite sentinel ("NaN" / "Inf" GDAL_NODATA - # strings) also cannot match an integer pixel and would raise - # ValueError on ``int(nodata)``; gate on ``np.isfinite`` first to - # mirror ``_resolve_masked_fill`` in ``_reader.py`` (#1774). A - # fractional sentinel (e.g. ``"3.5"`` on a ``uint16`` file) also - # cannot match an integer pixel and ``int(3.5)`` would truncate - # to 3, silently masking a real pixel value; gate on - # ``float(nodata).is_integer()`` as well (mirrors the - # ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616). - # attrs['nodata'] is still set by the caller so the original - # sentinel survives a write round-trip. - if not (np.isfinite(nodata) and float(nodata).is_integer()): - return arr_gpu - nodata_int = int(nodata) - info = np.iinfo(arr_dtype) - if not (info.min <= nodata_int <= info.max): - return arr_gpu - sentinel = arr_dtype.type(nodata_int) - mask = arr_gpu == sentinel - if bool(mask.any().item()): - arr_gpu = arr_gpu.astype(cupy.float64) - arr_gpu = cupy.where(mask, cupy.float64('nan'), arr_gpu) - return arr_gpu - return arr_gpu - - def to_geotiff(data: xr.DataArray | np.ndarray, path: str | BinaryIO, *, crs: int | str | None = None, @@ -1849,246 +1787,6 @@ def _read(http_meta): return _read(http_meta_key) -def _gpu_decode_single_band_tiles( - source, lazy_data, offsets, byte_counts, - tw, th, width, height, - compression, predictor, file_dtype, - *, - byte_order: str, - gpu: str, -): - """Decode one band's tile sequence into a 2-D ``(H, W)`` cupy array. - - Helper for the ``PlanarConfiguration=2`` GPU path: the existing - ``gpu_decode_tiles`` / ``gpu_decode_tiles_from_file`` kernels assume - a single chunky tile sequence with ``bytes_per_pixel = itemsize * - samples``. For planar=2 each band has its own list of tiles, so we - invoke those kernels once per band with ``samples=1`` and stack the - resulting 2-D arrays into ``(H, W, samples)`` afterwards. - - Mirrors the two-stage GPU pipeline in ``read_geotiff_gpu`` -- GDS - first, then CPU-extracted-tiles GPU decode. ``lazy_data`` is a - zero-arg callable that returns the full file bytes; it caches its - result so the first band that needs the stage-2 fallback pays the - ``read_all()``, and subsequent bands reuse the same buffer. When - every band's GDS path succeeds the file is never read at all. - Sparse tiles are not expected here; the caller routes sparse files - to the CPU path. - - Auto-mode semantics: a stage-1 GDS failure warns and falls through - to stage 2; a stage-2 failure warns and returns ``None`` so the - caller can trigger a whole-image CPU fallback (a per-band CPU - decode would require a single-band CPU path keyed off planar - config, which doesn't exist). Strict mode re-raises from either - stage. - """ - from ._gpu_decode import gpu_decode_tiles, gpu_decode_tiles_from_file - - arr_gpu = None - try: - arr_gpu = gpu_decode_tiles_from_file( - source, offsets, byte_counts, - tw, th, width, height, - compression, predictor, file_dtype, 1, - byte_order=byte_order, - ) - except Exception as e: - if gpu == 'strict' or _geotiff_strict_mode(): - raise - warnings.warn( - f"read_geotiff_gpu: GPU decode failed " - f"({type(e).__name__}: {e}); falling back to CPU.", - RuntimeWarning, - stacklevel=3, - ) - arr_gpu = None - - if arr_gpu is None: - shared_data = lazy_data() - compressed_tiles = [ - bytes(shared_data[offsets[i]:offsets[i] + byte_counts[i]]) - for i in range(len(offsets)) - ] - try: - arr_gpu = gpu_decode_tiles( - compressed_tiles, - tw, th, width, height, - compression, predictor, file_dtype, 1, - byte_order=byte_order, - ) - except Exception as e: - if gpu == 'strict' or _geotiff_strict_mode(): - raise - warnings.warn( - f"read_geotiff_gpu: GPU decode failed " - f"({type(e).__name__}: {e}); falling back to CPU.", - RuntimeWarning, - stacklevel=3, - ) - return None - - if arr_gpu.ndim != 2 or arr_gpu.shape != (height, width): - raise RuntimeError( - f"single-band GPU tile decode produced shape " - f"{arr_gpu.shape}, expected ({height}, {width})" - ) - return arr_gpu - - -def _apply_orientation_gpu(arr_gpu, orientation: int): - """cupy-side mirror of :func:`._reader._apply_orientation`. - - The CPU reader applies the TIFF Orientation tag (274) post-decode so - pixel (0, 0) is always the visual top-left. The GPU read path used - to skip this remap, so reads of any file with orientation != 1 - returned different pixel buffers than the CPU reader (#1540). - - Same eight orientations the CPU helper handles. Operates on a cupy - ndarray and returns a cupy ndarray; ``cupy.ascontiguousarray`` is - applied so downstream views (DataArray.data) work without surprise - re-strides on the GPU. - """ - import cupy - if orientation == 1: - return arr_gpu - if orientation == 2: - return cupy.ascontiguousarray(arr_gpu[:, ::-1]) - if orientation == 3: - return cupy.ascontiguousarray(arr_gpu[::-1, ::-1]) - if orientation == 4: - return cupy.ascontiguousarray(arr_gpu[::-1, :]) - if arr_gpu.ndim == 3: - if orientation == 5: - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)) - if orientation == 6: - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[:, ::-1]) - if orientation == 7: - return cupy.ascontiguousarray( - arr_gpu.transpose(1, 0, 2)[::-1, ::-1]) - if orientation == 8: - return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[::-1, :]) - else: - if orientation == 5: - return cupy.ascontiguousarray(arr_gpu.T) - if orientation == 6: - return cupy.ascontiguousarray(arr_gpu.T[:, ::-1]) - if orientation == 7: - return cupy.ascontiguousarray(arr_gpu.T[::-1, ::-1]) - if orientation == 8: - return cupy.ascontiguousarray(arr_gpu.T[::-1, :]) - raise ValueError( - f"Invalid TIFF Orientation tag value: {orientation} " - f"(must be 1-8 per TIFF 6.0)" - ) - - -def _apply_orientation_geo_info(geo_info, orientation: int, - file_h: int, file_w: int): - """Mirror the transform updates `_reader.read_to_array` does post-flip. - - Centralised so both ``read_to_array`` (CPU) and ``read_geotiff_gpu`` - (this module) update the GeoTransform consistently. Operates only - on ``geo_info.transform``; the rest of the GeoInfo struct stays as - parsed. - """ - if orientation == 1 or geo_info is None or geo_info.transform is None: - return geo_info - t = geo_info.transform - if orientation in (2, 3, 4): - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: - x_shift = file_w - 1 - y_shift = file_h - 1 - else: - x_shift = file_w - y_shift = file_h - new_origin_x = t.origin_x - new_origin_y = t.origin_y - new_px_w = t.pixel_width - new_px_h = t.pixel_height - if orientation in (2, 3): - new_origin_x = t.origin_x + x_shift * t.pixel_width - new_px_w = -t.pixel_width - if orientation in (3, 4): - new_origin_y = t.origin_y + y_shift * t.pixel_height - new_px_h = -t.pixel_height - geo_info.transform = GeoTransform( - origin_x=new_origin_x, - origin_y=new_origin_y, - pixel_width=new_px_w, - pixel_height=new_px_h, - ) - elif orientation in (5, 6, 7, 8): - # Match the CPU reader's #1765 refusal: a pixel-size swap alone - # cannot express the per-orientation origin shift plus rotation - # these orientations require, so the x/y coords would be wrong. - # ``has_georef`` is True for any file carrying ModelTransformation, - # ModelPixelScale, or ModelTiepoint, with or without a CRS tag, so - # gate on that flag rather than CRS presence. - if getattr(geo_info, 'has_georef', False): - raise NotImplementedError( - f"TIFF Orientation {orientation} on a georeferenced file " - f"requires a per-orientation origin shift plus a rotation " - f"that the axis-aligned GeoTransform used here cannot " - f"represent, so the returned x/y coords would be wrong. " - f"Reproject the file with another tool (e.g. GDAL) or " - f"strip the Orientation tag before reading. See issue " - f"#1765." - ) - # Non-georeferenced file: swap pixel sizes to match the - # transposed array shape. No geographic claim to violate. - geo_info.transform = GeoTransform( - origin_x=t.origin_x, - origin_y=t.origin_y, - pixel_width=t.pixel_height, - pixel_height=t.pixel_width, - ) - return geo_info - - -def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band): - """Slice a fully-decoded GPU array down to a window and/or band. - - Used by ``read_geotiff_gpu`` to keep the public surface in line with - ``open_geotiff`` and ``read_geotiff_dask``: callers can pass ``window`` - and ``band``, and the returned DataArray covers exactly that subset. - - The current implementation slices on device after the full-image GPU - decode is complete. That preserves correctness but does no I/O - savings -- a future PR can short-circuit tile decode for partial - windows. For ``band`` selection, the savings are also post-decode - because the planar=1 (chunky) tile assembly returns all bands in a - single GPU buffer. - - Returns ``(arr_gpu, coords)`` where ``coords`` is a dict with - ``y`` / ``x`` numpy arrays sized to the output array. The caller is - responsible for setting ``attrs['transform']`` to the windowed origin - via ``_populate_attrs_from_geo_info(..., window=window)`` so the array - and the transform agree. - """ - if window is not None: - r0, c0, r1, c1 = window - arr_gpu = arr_gpu[r0:r1, c0:c1] - out_h = r1 - r0 - out_w = c1 - c0 - # Mirror the eager-numpy windowed coord computation in - # open_geotiff so the GPU-windowed coords carry the same - # absolute pixel-center values as the CPU path. For files - # with no GeoTIFF tags (``has_georef=False``), fall back to - # integer pixel coords matching ``_geo_to_coords`` (#1710). - coords = _coords_from_geo_info( - geo_info, out_h, out_w, window=window, - ) - else: - out_h = arr_gpu.shape[0] - out_w = arr_gpu.shape[1] - coords = _geo_to_coords(geo_info, out_h, out_w) - - if band is not None and arr_gpu.ndim == 3: - arr_gpu = arr_gpu[:, :, band] - - return arr_gpu, coords - - def read_geotiff_gpu(source: str, *, dtype: str | np.dtype | None = None, overview_level: int | None = None, diff --git a/xrspatial/geotiff/_backends/__init__.py b/xrspatial/geotiff/_backends/__init__.py new file mode 100644 index 00000000..a26615d5 --- /dev/null +++ b/xrspatial/geotiff/_backends/__init__.py @@ -0,0 +1,8 @@ +"""Per-backend implementations for the geotiff read entry points. + +Step 6 of issue #1813 creates this subpackage and adds +``_gpu_helpers.py``. Later steps add ``gpu.py``, ``dask.py``, and +``vrt.py`` for the backend entry-point bodies. The package +``__init__`` stays empty so nothing leaks into ``xrspatial.geotiff`` +through implicit re-exports. +""" diff --git a/xrspatial/geotiff/_backends/_gpu_helpers.py b/xrspatial/geotiff/_backends/_gpu_helpers.py new file mode 100644 index 00000000..49399c40 --- /dev/null +++ b/xrspatial/geotiff/_backends/_gpu_helpers.py @@ -0,0 +1,335 @@ +"""GPU-only helpers consumed by ``read_geotiff_gpu`` and the GPU write path. + +Lazy-imports cupy at call time so the module imports cleanly on +numpy-only environments. Every public entry point that touches GPU +data routes through one of these helpers, so a single canonical copy +keeps nodata-mask, orientation-flip, and window/band-slice semantics +in lockstep with the CPU reader. + +Extracted in step 6 of issue #1813 so the next PR (step 7) can move +``read_geotiff_gpu`` itself into ``_backends/gpu.py`` as a near- +mechanical lift. +""" +from __future__ import annotations + +import warnings + +import numpy as np +import xarray as xr + +from .._coords import ( + coords_from_geo_info as _coords_from_geo_info, + geo_to_coords as _geo_to_coords, +) +from .._geotags import GeoTransform, RASTER_PIXEL_IS_POINT +from .._runtime import _geotiff_strict_mode + + +def _is_gpu_data(data) -> bool: + """Check if data is CuPy-backed (raw array or DataArray).""" + try: + import cupy + _cupy_type = cupy.ndarray + except ImportError: + return False + + if isinstance(data, xr.DataArray): + raw = data.data + if hasattr(raw, 'compute'): + meta = getattr(raw, '_meta', None) + return isinstance(meta, _cupy_type) + return isinstance(raw, _cupy_type) + return isinstance(data, _cupy_type) + + +def _apply_nodata_mask_gpu(arr_gpu, nodata): + """Replace nodata sentinel values with NaN on a CuPy array. + + Mirrors the CPU eager path in :func:`open_geotiff` (lines around the + ``# Apply nodata mask`` comment). Float arrays get NaN substituted in + place of the sentinel; integer arrays are promoted to float64 with NaN + where the sentinel was. NaN nodata on a float array is a no-op (the + sentinel already matches NaN-aware code). + + Returns the (possibly promoted, possibly nodata-masked) CuPy array. + The caller is responsible for setting ``attrs['nodata']`` so the + sentinel is still discoverable downstream. + """ + import cupy + + if nodata is None: + return arr_gpu + arr_dtype = np.dtype(str(arr_gpu.dtype)) + if arr_dtype.kind == 'f': + if not np.isnan(nodata): + sentinel = arr_dtype.type(nodata) + arr_gpu = cupy.where(arr_gpu == sentinel, + arr_dtype.type('nan'), arr_gpu) + return arr_gpu + if arr_dtype.kind in ('u', 'i'): + # Out-of-range sentinels (e.g. uint16 + GDAL_NODATA="-9999") cannot + # match any decoded pixel; skip the cast that would otherwise raise + # OverflowError. A non-finite sentinel ("NaN" / "Inf" GDAL_NODATA + # strings) also cannot match an integer pixel and would raise + # ValueError on ``int(nodata)``; gate on ``np.isfinite`` first to + # mirror ``_resolve_masked_fill`` in ``_reader.py`` (#1774). A + # fractional sentinel (e.g. ``"3.5"`` on a ``uint16`` file) also + # cannot match an integer pixel and ``int(3.5)`` would truncate + # to 3, silently masking a real pixel value; gate on + # ``float(nodata).is_integer()`` as well (mirrors the + # ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616). + # attrs['nodata'] is still set by the caller so the original + # sentinel survives a write round-trip. + if not (np.isfinite(nodata) and float(nodata).is_integer()): + return arr_gpu + nodata_int = int(nodata) + info = np.iinfo(arr_dtype) + if not (info.min <= nodata_int <= info.max): + return arr_gpu + sentinel = arr_dtype.type(nodata_int) + mask = arr_gpu == sentinel + if bool(mask.any().item()): + arr_gpu = arr_gpu.astype(cupy.float64) + arr_gpu = cupy.where(mask, cupy.float64('nan'), arr_gpu) + return arr_gpu + return arr_gpu + + +def _gpu_decode_single_band_tiles( + source, lazy_data, offsets, byte_counts, + tw, th, width, height, + compression, predictor, file_dtype, + *, + byte_order: str, + gpu: str, +): + """Decode one band's tile sequence into a 2-D ``(H, W)`` cupy array. + + Helper for the ``PlanarConfiguration=2`` GPU path: the existing + ``gpu_decode_tiles`` / ``gpu_decode_tiles_from_file`` kernels assume + a single chunky tile sequence with ``bytes_per_pixel = itemsize * + samples``. For planar=2 each band has its own list of tiles, so we + invoke those kernels once per band with ``samples=1`` and stack the + resulting 2-D arrays into ``(H, W, samples)`` afterwards. + + Mirrors the two-stage GPU pipeline in ``read_geotiff_gpu`` -- GDS + first, then CPU-extracted-tiles GPU decode. ``lazy_data`` is a + zero-arg callable that returns the full file bytes; it caches its + result so the first band that needs the stage-2 fallback pays the + ``read_all()``, and subsequent bands reuse the same buffer. When + every band's GDS path succeeds the file is never read at all. + Sparse tiles are not expected here; the caller routes sparse files + to the CPU path. + + Auto-mode semantics: a stage-1 GDS failure warns and falls through + to stage 2; a stage-2 failure warns and returns ``None`` so the + caller can trigger a whole-image CPU fallback (a per-band CPU + decode would require a single-band CPU path keyed off planar + config, which doesn't exist). Strict mode re-raises from either + stage. + """ + from .._gpu_decode import gpu_decode_tiles, gpu_decode_tiles_from_file + + arr_gpu = None + try: + arr_gpu = gpu_decode_tiles_from_file( + source, offsets, byte_counts, + tw, th, width, height, + compression, predictor, file_dtype, 1, + byte_order=byte_order, + ) + except Exception as e: + if gpu == 'strict' or _geotiff_strict_mode(): + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=3, + ) + arr_gpu = None + + if arr_gpu is None: + shared_data = lazy_data() + compressed_tiles = [ + bytes(shared_data[offsets[i]:offsets[i] + byte_counts[i]]) + for i in range(len(offsets)) + ] + try: + arr_gpu = gpu_decode_tiles( + compressed_tiles, + tw, th, width, height, + compression, predictor, file_dtype, 1, + byte_order=byte_order, + ) + except Exception as e: + if gpu == 'strict' or _geotiff_strict_mode(): + raise + warnings.warn( + f"read_geotiff_gpu: GPU decode failed " + f"({type(e).__name__}: {e}); falling back to CPU.", + RuntimeWarning, + stacklevel=3, + ) + return None + + if arr_gpu.ndim != 2 or arr_gpu.shape != (height, width): + raise RuntimeError( + f"single-band GPU tile decode produced shape " + f"{arr_gpu.shape}, expected ({height}, {width})" + ) + return arr_gpu + + +def _apply_orientation_gpu(arr_gpu, orientation: int): + """cupy-side mirror of :func:`._reader._apply_orientation`. + + The CPU reader applies the TIFF Orientation tag (274) post-decode so + pixel (0, 0) is always the visual top-left. The GPU read path used + to skip this remap, so reads of any file with orientation != 1 + returned different pixel buffers than the CPU reader (#1540). + + Same eight orientations the CPU helper handles. Operates on a cupy + ndarray and returns a cupy ndarray; ``cupy.ascontiguousarray`` is + applied so downstream views (DataArray.data) work without surprise + re-strides on the GPU. + """ + import cupy + if orientation == 1: + return arr_gpu + if orientation == 2: + return cupy.ascontiguousarray(arr_gpu[:, ::-1]) + if orientation == 3: + return cupy.ascontiguousarray(arr_gpu[::-1, ::-1]) + if orientation == 4: + return cupy.ascontiguousarray(arr_gpu[::-1, :]) + if arr_gpu.ndim == 3: + if orientation == 5: + return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)) + if orientation == 6: + return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[:, ::-1]) + if orientation == 7: + return cupy.ascontiguousarray( + arr_gpu.transpose(1, 0, 2)[::-1, ::-1]) + if orientation == 8: + return cupy.ascontiguousarray(arr_gpu.transpose(1, 0, 2)[::-1, :]) + else: + if orientation == 5: + return cupy.ascontiguousarray(arr_gpu.T) + if orientation == 6: + return cupy.ascontiguousarray(arr_gpu.T[:, ::-1]) + if orientation == 7: + return cupy.ascontiguousarray(arr_gpu.T[::-1, ::-1]) + if orientation == 8: + return cupy.ascontiguousarray(arr_gpu.T[::-1, :]) + raise ValueError( + f"Invalid TIFF Orientation tag value: {orientation} " + f"(must be 1-8 per TIFF 6.0)" + ) + + +def _apply_orientation_geo_info(geo_info, orientation: int, + file_h: int, file_w: int): + """Mirror the transform updates `_reader.read_to_array` does post-flip. + + Centralised so both ``read_to_array`` (CPU) and ``read_geotiff_gpu`` + (this module) update the GeoTransform consistently. Operates only + on ``geo_info.transform``; the rest of the GeoInfo struct stays as + parsed. + """ + if orientation == 1 or geo_info is None or geo_info.transform is None: + return geo_info + t = geo_info.transform + if orientation in (2, 3, 4): + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + x_shift = file_w - 1 + y_shift = file_h - 1 + else: + x_shift = file_w + y_shift = file_h + new_origin_x = t.origin_x + new_origin_y = t.origin_y + new_px_w = t.pixel_width + new_px_h = t.pixel_height + if orientation in (2, 3): + new_origin_x = t.origin_x + x_shift * t.pixel_width + new_px_w = -t.pixel_width + if orientation in (3, 4): + new_origin_y = t.origin_y + y_shift * t.pixel_height + new_px_h = -t.pixel_height + geo_info.transform = GeoTransform( + origin_x=new_origin_x, + origin_y=new_origin_y, + pixel_width=new_px_w, + pixel_height=new_px_h, + ) + elif orientation in (5, 6, 7, 8): + # Match the CPU reader's #1765 refusal: a pixel-size swap alone + # cannot express the per-orientation origin shift plus rotation + # these orientations require, so the x/y coords would be wrong. + # ``has_georef`` is True for any file carrying ModelTransformation, + # ModelPixelScale, or ModelTiepoint, with or without a CRS tag, so + # gate on that flag rather than CRS presence. + if getattr(geo_info, 'has_georef', False): + raise NotImplementedError( + f"TIFF Orientation {orientation} on a georeferenced file " + f"requires a per-orientation origin shift plus a rotation " + f"that the axis-aligned GeoTransform used here cannot " + f"represent, so the returned x/y coords would be wrong. " + f"Reproject the file with another tool (e.g. GDAL) or " + f"strip the Orientation tag before reading. See issue " + f"#1765." + ) + # Non-georeferenced file: swap pixel sizes to match the + # transposed array shape. No geographic claim to violate. + geo_info.transform = GeoTransform( + origin_x=t.origin_x, + origin_y=t.origin_y, + pixel_width=t.pixel_height, + pixel_height=t.pixel_width, + ) + return geo_info + + +def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band): + """Slice a fully-decoded GPU array down to a window and/or band. + + Used by ``read_geotiff_gpu`` to keep the public surface in line with + ``open_geotiff`` and ``read_geotiff_dask``: callers can pass ``window`` + and ``band``, and the returned DataArray covers exactly that subset. + + The current implementation slices on device after the full-image GPU + decode is complete. That preserves correctness but does no I/O + savings -- a future PR can short-circuit tile decode for partial + windows. For ``band`` selection, the savings are also post-decode + because the planar=1 (chunky) tile assembly returns all bands in a + single GPU buffer. + + Returns ``(arr_gpu, coords)`` where ``coords`` is a dict with + ``y`` / ``x`` numpy arrays sized to the output array. The caller is + responsible for setting ``attrs['transform']`` to the windowed origin + via ``_populate_attrs_from_geo_info(..., window=window)`` so the array + and the transform agree. + """ + if window is not None: + r0, c0, r1, c1 = window + arr_gpu = arr_gpu[r0:r1, c0:c1] + out_h = r1 - r0 + out_w = c1 - c0 + # Mirror the eager-numpy windowed coord computation in + # open_geotiff so the GPU-windowed coords carry the same + # absolute pixel-center values as the CPU path. For files + # with no GeoTIFF tags (``has_georef=False``), fall back to + # integer pixel coords matching ``_geo_to_coords`` (#1710). + coords = _coords_from_geo_info( + geo_info, out_h, out_w, window=window, + ) + else: + out_h = arr_gpu.shape[0] + out_w = arr_gpu.shape[1] + coords = _geo_to_coords(geo_info, out_h, out_w) + + if band is not None and arr_gpu.ndim == 3: + arr_gpu = arr_gpu[:, :, band] + + return arr_gpu, coords