Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3326,7 +3326,8 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
bigtiff: bool | None = None,
max_z_error: float = 0.0,
streaming_buffer_bytes: int = 256 * 1024 * 1024,
photometric: str | int = 'auto') -> None:
photometric: str | int = 'auto',
allow_internal_only_jpeg: bool = False) -> None:
"""Write a CuPy-backed DataArray as a GeoTIFF with GPU compression.

Tiles are extracted and compressed on the GPU via nvCOMP, then
Expand Down Expand Up @@ -3371,17 +3372,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
- ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch
compression on the GPU -- the fastest paths and the reason to
use this entry point.
- ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow
otherwise. Note that ``to_geotiff`` rejects
``compression='jpeg'`` at runtime because its CPU encoder
omits the required TIFF JPEGTables tag (347); this GPU entry
point instead emits self-contained JFIF tiles. The two
writers therefore disagree about JPEG-in-TIFF interop. Files
produced here decode through this library's own reader but
may not round-trip through GDAL, rasterio, or libtiff
readers that require the JPEGTables tag. Treat the JPEG path
as experimental and internal-reader-only until the
JPEGTables fix lands.
- ``'jpeg'``: rejected by default for parity with
``to_geotiff``. Both writers emit self-contained JFIF tiles
and skip the required TIFF JPEGTables tag (347), so the
resulting files are unreadable by libtiff, GDAL, and
rasterio. Pass ``allow_internal_only_jpeg=True`` to opt in
to the experimental encode path; the writer then routes to
nvJPEG when libnvjpeg is loadable and falls back to Pillow
otherwise, and emits a ``GeoTIFFFallbackWarning`` so the
caller knows the output will not round-trip through external
readers. See issue #1845.
- ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when
available, glymur CPU encode otherwise. The two paths are
not byte-for-byte identical (different libraries, different
Expand Down Expand Up @@ -3444,6 +3444,15 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
GPU writer forwards this kwarg unchanged. Default ``'auto'``
writes MinIsBlack for any band count, so a 4-band raster is
not silently tagged as RGB+alpha (issue #1769).
allow_internal_only_jpeg : bool
Opt in to the experimental ``compression='jpeg'`` encode path
(default ``False``). The encoder emits self-contained JFIF
tiles without the TIFF JPEGTables tag (347); the file decodes
through this library's reader but not through libtiff, GDAL,
or rasterio. With the flag set, the write proceeds and a
``GeoTIFFFallbackWarning`` is emitted at call time. Without
the flag, ``compression='jpeg'`` raises ``ValueError`` for
parity with ``to_geotiff``. See issue #1845.

Raises
------
Expand All @@ -3461,6 +3470,37 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
"compression is tile-based; the strip layout is not "
"implemented on the GPU path. Use to_geotiff(..., gpu=False, "
"tiled=False) for strip output on CPU.")
# JPEG-in-TIFF parity with to_geotiff (issue #1845). The GPU encode
# path writes self-contained JFIF tiles without the TIFF JPEGTables
# tag (347), matching the broken CPU encoder. ``to_geotiff`` refuses
# the codec outright; this writer offered no rejection at all, so
# callers could produce GeoTIFFs that decoded through xrspatial but
# broke in libtiff/GDAL/rasterio. Reject by default with the same
# wording so both entry points agree, and surface an opt-in flag for
# users who explicitly want the internal-only path.
if (isinstance(compression, str)
and compression.lower() == 'jpeg'
and not allow_internal_only_jpeg):
raise ValueError(
"compression='jpeg' is not supported: the encoder writes "
"self-contained JFIF streams without the required "
"JPEGTables tag (347), so other readers (libtiff, GDAL, "
"rasterio) reject the file. Use 'deflate', 'zstd', or "
"'lzw' instead. Pass allow_internal_only_jpeg=True to opt "
"in to the experimental internal-reader-only path (issue "
"#1845).")
if (isinstance(compression, str)
and compression.lower() == 'jpeg'
and allow_internal_only_jpeg):
warnings.warn(
"write_geotiff_gpu(compression='jpeg', "
"allow_internal_only_jpeg=True) writes JFIF tiles without "
"the TIFF JPEGTables tag (347); the file decodes through "
"xrspatial but may fail in libtiff, GDAL, or rasterio. "
"See issue #1845.",
GeoTIFFFallbackWarning,
stacklevel=2,
)
# MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer.
# The GPU writer assembles tile bytes directly on device; threading
# the pixel + nodata-sentinel transform through that pipeline is out
Expand Down
151 changes: 151 additions & 0 deletions xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Issue #1845: ``write_geotiff_gpu`` must reject ``compression='jpeg'``
by default.

Background
----------
``to_geotiff`` raises a ``ValueError`` for ``compression='jpeg'`` because
the encoder writes self-contained JFIF tiles without the TIFF JPEGTables
tag (347); the resulting files are unreadable by libtiff, GDAL, and
rasterio. The GPU writer sat in the same module and silently accepted
the same kwarg, producing the same broken format. The fix introduces an
``allow_internal_only_jpeg`` opt-in: callers who want the experimental
internal-reader-only path must ask for it explicitly, and they get a
``GeoTIFFFallbackWarning`` reminding them the file will not round-trip
through external readers.

These tests pin the rejection (CPU-only, no CUDA required) and the
opt-in warning behaviour. The internal-only encode path itself is
covered by the updated tests in
``test_gpu_writer_compression_modes_2026_05_11.py`` which run only when
CUDA is present.
"""
from __future__ import annotations

import importlib.util

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import GeoTIFFFallbackWarning, write_geotiff_gpu


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()


def _make_rgb_uint8_da() -> xr.DataArray:
"""64x64x3 uint8 RGB raster suitable for the JPEG encode path."""
rng = np.random.RandomState(0)
arr = rng.randint(0, 256, size=(64, 64, 3), dtype=np.uint8)
return xr.DataArray(
arr,
dims=("y", "x", "band"),
coords={
"y": np.arange(64, dtype=np.float64),
"x": np.arange(64, dtype=np.float64),
"band": np.array([1, 2, 3], dtype=np.int32),
},
)


def test_write_geotiff_gpu_rejects_jpeg_without_opt_in(tmp_path):
"""``compression='jpeg'`` without the opt-in raises ``ValueError``.

Mirrors the ``to_geotiff`` rejection so both writers in the same
module agree about JPEG-in-TIFF interop. The check runs before any
GPU work, so the test does not need CUDA.
"""
da = _make_rgb_uint8_da()
path = str(tmp_path / "rejected_issue_D_1845.tif")

with pytest.raises(ValueError, match="JPEGTables"):
write_geotiff_gpu(da, path, compression='jpeg')


def test_write_geotiff_gpu_rejects_jpeg_message_mentions_alternatives(tmp_path):
"""The rejection error mentions the same alternative codecs that
``to_geotiff`` recommends, so callers landing on either entry point
learn what to switch to."""
da = _make_rgb_uint8_da()
path = str(tmp_path / "rejected_msg_issue_D_1845.tif")

with pytest.raises(ValueError) as exc:
write_geotiff_gpu(da, path, compression='jpeg')

# The shared message wording from to_geotiff. If the two writers
# drift apart, callers reading the error get inconsistent advice.
msg = str(exc.value)
assert "deflate" in msg
assert "zstd" in msg


def test_write_geotiff_gpu_rejects_jpeg_case_insensitive(tmp_path):
"""Upper-case ``compression='JPEG'`` is rejected too.

``to_geotiff`` lower-cases the compression string before comparing,
so the GPU writer must follow the same rule -- otherwise a caller
who types ``'JPEG'`` slips past the gate."""
da = _make_rgb_uint8_da()
path = str(tmp_path / "rejected_upper_issue_D_1845.tif")

with pytest.raises(ValueError, match="JPEGTables"):
write_geotiff_gpu(da, path, compression='JPEG')


@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
def test_write_geotiff_gpu_jpeg_opt_in_emits_warning(tmp_path):
"""Setting ``allow_internal_only_jpeg=True`` proceeds with the JPEG
encode and emits ``GeoTIFFFallbackWarning``.

The warning is the only signal the caller gets that their file may
not round-trip through GDAL or rasterio, so a missing warning here
would let the footgun back in.
"""
da = _make_rgb_uint8_da()
path = str(tmp_path / "opt_in_issue_D_1845.tif")

with pytest.warns(GeoTIFFFallbackWarning, match="JPEGTables"):
write_geotiff_gpu(
da, path,
compression='jpeg',
allow_internal_only_jpeg=True,
)

# File was actually written, not just warned-about.
import os
assert os.path.exists(path)
assert os.path.getsize(path) > 0


def test_write_geotiff_gpu_non_jpeg_unaffected_by_flag(tmp_path):
"""Setting ``allow_internal_only_jpeg=True`` on a non-JPEG codec is a
no-op (no warning, no error).

The flag is JPEG-specific; other codecs must not pay any cost for
it being present in the signature."""
pytest.importorskip("cupy")
if not _HAS_GPU:
pytest.skip("cupy + CUDA required")

da = _make_rgb_uint8_da()
path = str(tmp_path / "non_jpeg_flag_issue_D_1845.tif")

import warnings as _warnings
with _warnings.catch_warnings():
_warnings.simplefilter("error", GeoTIFFFallbackWarning)
# Should not raise (no JPEG-related warning fires).
write_geotiff_gpu(
da, path,
compression='zstd',
allow_internal_only_jpeg=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,13 @@ def test_write_geotiff_gpu_jpeg_rgb_roundtrip(tmp_path):
da, arr = _make_rgb_uint8_da()
path = str(tmp_path / "jpeg_rgb.tif")

write_geotiff_gpu(da, path, compression='jpeg')
# Issue #1845: the JPEG encode path is opt-in. The writer also
# emits a GeoTIFFFallbackWarning, which is the documented contract.
with pytest.warns(Warning):
write_geotiff_gpu(
da, path, compression='jpeg',
allow_internal_only_jpeg=True,
)

out = open_geotiff(path)
assert out.shape == arr.shape
Expand All @@ -251,7 +257,12 @@ def test_write_geotiff_gpu_jpeg_uint8_single_band_roundtrip(tmp_path):
da, arr = _make_mono_uint8_da()
path = str(tmp_path / "jpeg_mono.tif")

write_geotiff_gpu(da, path, compression='jpeg')
# Issue #1845: opt-in flag required; warning fires.
with pytest.warns(Warning):
write_geotiff_gpu(
da, path, compression='jpeg',
allow_internal_only_jpeg=True,
)

out = open_geotiff(path)
assert out.shape == arr.shape
Expand All @@ -278,7 +289,12 @@ def test_write_geotiff_gpu_jpeg_uses_nvjpeg_when_available(tmp_path,
da, _ = _make_rgb_uint8_da()
path = str(tmp_path / "jpeg_nvjpeg_spy.tif")

write_geotiff_gpu(da, path, compression='jpeg')
# Issue #1845: opt-in flag required; warning fires.
with pytest.warns(Warning):
write_geotiff_gpu(
da, path, compression='jpeg',
allow_internal_only_jpeg=True,
)

assert spy.calls >= 1, (
"libnvjpeg is loadable but _nvjpeg_batch_encode was never called; "
Expand Down Expand Up @@ -317,7 +333,12 @@ def test_write_geotiff_gpu_jpeg_compression_tag(tmp_path):
da, _ = _make_rgb_uint8_da()
path = str(tmp_path / "jpeg_tag.tif")

write_geotiff_gpu(da, path, compression='jpeg')
# Issue #1845: opt-in flag required; warning fires.
with pytest.warns(Warning):
write_geotiff_gpu(
da, path, compression='jpeg',
allow_internal_only_jpeg=True,
)

assert _read_compression_tag(path) == _COMPRESSION_TAGS['jpeg']

Expand Down
Loading