diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5b6115c..9ae2b872 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@
#### Bug fixes and improvements
- Refresh the geotiff mmap cache when a file is replaced under the same path so re-reads after an atomic-rename overwrite no longer return stale bytes
- Decode TIFF predictor=3 un-transpose by file byte order so big-endian floating-point TIFFs read back exactly
+- Default internal `_vrt.read_vrt` `missing_sources` to `'raise'` so an unreadable VRT source no longer produces a silent zero-fill hole on integer rasters; pass `missing_sources='warn'` to opt back into the previous lenient behaviour (#1843)
### Version 0.9.9 - 2026-05-05
diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py
index 812b5bf4..a070016a 100644
--- a/xrspatial/geotiff/_vrt.py
+++ b/xrspatial/geotiff/_vrt.py
@@ -252,13 +252,14 @@ class VRTDataset:
# coords must be emitted without the shift. Parsed from
# ``Point``.
raster_type: str = 'area' # 'area' or 'point'
- # Per-load record of sources skipped by ``read_vrt`` under the
- # lenient default (not strict mode). Each entry is a dict with
- # ``source``, ``band`` (1-based), ``dst_rect`` (xoff, yoff,
- # xsize, ysize), and ``error`` keys. Empty when no sources failed
- # to read. Populated by :func:`read_vrt` so callers can detect
- # holes by attribute lookup instead of parsing a UserWarning
- # message (issue #1734).
+ # Per-load record of sources skipped by ``read_vrt`` when called
+ # with ``missing_sources='warn'`` (the lenient opt-in; the default
+ # since #1843 is ``'raise'``, and strict mode always raises). Each
+ # entry is a dict with ``source``, ``band`` (1-based), ``dst_rect``
+ # (xoff, yoff, xsize, ysize), and ``error`` keys. Empty when no
+ # sources failed to read. Populated by :func:`read_vrt` so callers
+ # on the warn path can detect holes by attribute lookup instead of
+ # parsing the ``GeoTIFFFallbackWarning`` message (issue #1734).
holes: list[dict] = field(default_factory=list)
@@ -852,7 +853,7 @@ def _apply_integer_sentinel_mask(arr, vrt, band):
def read_vrt(vrt_path: str, *, window=None,
band: int | None = None,
max_pixels: int | None = None,
- missing_sources: str = 'warn',
+ missing_sources: str = 'raise',
parsed: VRTDataset | None = None,
) -> tuple[np.ndarray, VRTDataset]:
"""Read a VRT file by assembling pixel data from its source files.
@@ -868,11 +869,22 @@ def read_vrt(vrt_path: str, *, window=None,
max_pixels : int or None
Maximum allowed pixel count (width * height * samples) for the
assembled VRT region. None uses the reader default.
- missing_sources : {'warn', 'raise'}
+ missing_sources : {'raise', 'warn'}, default 'raise'
Policy for unreadable source files referenced by the VRT.
- ``'warn'`` emits ``GeoTIFFFallbackWarning`` and records
- ``vrt.holes`` unless ``XRSPATIAL_GEOTIFF_STRICT=1`` is set.
- ``'raise'`` fails immediately.
+ ``'raise'`` (the default) fails immediately on an unreadable
+ source so a partial mosaic never surfaces silently. This matches
+ the rest of the geotiff module's up-front rejection of malformed
+ input and avoids the ambiguity of a zero-fill hole on an integer
+ raster without a nodata sentinel (see issue #1843). Prior to
+ #1843 the default was ``'warn'``; callers that relied on the
+ lenient behaviour should pass ``missing_sources='warn'``
+ explicitly.
+ ``'warn'`` is the opt-in escape hatch for partial mosaics: it
+ emits ``GeoTIFFFallbackWarning`` and records the skipped source
+ on ``vrt.holes`` (surfaced as ``attrs['vrt_holes']`` on the
+ public DataArray).
+ ``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole
+ module regardless of this kwarg (see issue #1662).
parsed : VRTDataset or None
Pre-parsed VRT structure. When supplied, ``vrt_path`` is not
re-read or re-parsed and the source-path containment check is
@@ -1160,21 +1172,22 @@ def read_vrt(vrt_path: str, *, window=None,
) + _CODEC_DECODE_EXCEPTIONS as e:
if isinstance(e, PixelSafetyLimitError):
raise
- # Under XRSPATIAL_GEOTIFF_STRICT=1, surface the read failure
- # so partial mosaics are caught in CI. Default mode warns
- # once per missing source then continues, preserving the
- # historical behaviour. See issue #1662.
+ # Default (``missing_sources='raise'``) surfaces the read
+ # failure immediately so a partial mosaic never ships
+ # silently. ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the same
+ # raise module-wide regardless of the kwarg (#1662).
#
- # The lenient default leaves zero-filled holes in
- # integer VRTs that downstream code cannot tell from
- # real data unless the band has a nodata sentinel set.
- # Recording the skipped source on ``vrt.holes`` lets the
- # public ``read_vrt`` surface a machine-readable
- # ``attrs['vrt_holes']`` entry on the returned
- # DataArray, alongside the warning. Callers that need
- # to fail loudly can either inspect that attr or set
- # ``XRSPATIAL_GEOTIFF_STRICT=1`` to raise here.
- # See issue #1734.
+ # The ``missing_sources='warn'`` opt-in keeps the legacy
+ # lenient path: emit ``GeoTIFFFallbackWarning`` once per
+ # unreadable source and record the skip on ``vrt.holes``,
+ # which the public ``read_vrt`` surfaces as
+ # ``attrs['vrt_holes']`` on the returned DataArray. That
+ # path is for callers that explicitly want a holey mosaic
+ # plus a machine-readable list of which sources failed --
+ # zero-filled holes in an integer VRT are otherwise
+ # indistinguishable from real data without a nodata
+ # sentinel, which is why the default was flipped to raise
+ # in #1843. See also issues #1734 and #1843.
import warnings
from . import _geotiff_strict_mode, GeoTIFFFallbackWarning
if missing_sources == 'raise' or _geotiff_strict_mode():
diff --git a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py
index 5edb8ca4..07aa487a 100644
--- a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py
+++ b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py
@@ -117,14 +117,19 @@ def test_negative_srcrect_message_names_bad_values():
def test_missing_source_still_takes_lenient_warning_path():
"""A *valid* SrcRect with a missing source file must still hit the
lenient warning path -- the new SrcRect check must not swallow the
- missing-file case that PR #1675 narrowed."""
+ missing-file case that PR #1675 narrowed.
+
+ Issue #1843 flipped the default to ``missing_sources='raise'`` so
+ this test now passes ``'warn'`` explicitly to exercise the opt-in
+ lenient branch.
+ """
from xrspatial.geotiff import GeoTIFFFallbackWarning
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
# No source file written; SrcRect itself is well-formed.
vrt_path = _write_vrt(td, src_filename='does_not_exist.tif')
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter('always')
- arr, _ = read_vrt(vrt_path)
+ arr, _ = read_vrt(vrt_path, missing_sources='warn')
# The lenient path must produce a fallback warning and a result
# array (zero-filled at the hole), not raise.
fallback = [w for w in caught
diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py b/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py
new file mode 100644
index 00000000..2f7ea9c1
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_vrt_missing_sources_default_raise_1843.py
@@ -0,0 +1,83 @@
+"""Regression test for #1843: the internal ``read_vrt`` in ``_vrt.py``
+defaults to ``missing_sources='raise'`` so an unreadable source halts
+the call instead of leaving a silent zero-fill hole on integer rasters.
+
+Callers wanting the historical lenient behaviour pass
+``missing_sources='warn'`` explicitly. The strict-mode env var
+``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise across the
+whole module (orthogonal axis, not affected by this change).
+"""
+from __future__ import annotations
+
+import pytest
+
+from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal
+from xrspatial.geotiff import GeoTIFFFallbackWarning
+
+
+def _write_missing_source_vrt(path):
+ path.write_text(
+ '\n'
+ ' \n'
+ ' \n'
+ ' missing_1843.tif'
+ '\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+
+
+def test_read_vrt_default_raises_on_unreadable_source(tmp_path):
+ """Without an explicit ``missing_sources`` kwarg, an unreadable
+ backing source must raise rather than silently zero-fill.
+
+ This is the behaviour change from #1843. Before this commit the
+ default was ``'warn'`` and a missing ``Byte`` tile produced a hole
+ of zero pixels that was indistinguishable from real data unless
+ the caller checked ``attrs['vrt_holes']``.
+ """
+ vrt = tmp_path / "tmp_1843_default_raise.vrt"
+ _write_missing_source_vrt(vrt)
+
+ with pytest.raises((OSError, ValueError)):
+ _read_vrt_internal(str(vrt))
+
+
+def test_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path):
+ """``missing_sources='warn'`` is still the escape hatch for callers
+ that want partial mosaics with ``vrt.holes`` populated.
+
+ Pinning the lenient path here keeps the historical contract
+ available to callers who opt in. The warning and the hole record
+ must both still surface.
+ """
+ vrt = tmp_path / "tmp_1843_explicit_warn.vrt"
+ _write_missing_source_vrt(vrt)
+
+ with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
+ arr, parsed = _read_vrt_internal(str(vrt), missing_sources='warn')
+
+ assert arr.shape == (2, 2)
+ assert len(parsed.holes) == 1
+ assert parsed.holes[0]['source'].endswith('missing_1843.tif')
+
+
+def test_read_vrt_strict_env_still_raises_under_warn(monkeypatch, tmp_path):
+ """``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even
+ when the caller explicitly asks for the lenient ``'warn'`` policy.
+
+ The strict env var is a module-wide override (see #1662); it must
+ still win over per-call ``missing_sources='warn'`` so CI runs with
+ strict mode catch partial mosaics regardless of caller settings.
+ """
+ vrt = tmp_path / "tmp_1843_strict_env.vrt"
+ _write_missing_source_vrt(vrt)
+
+ monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")
+
+ with pytest.raises((OSError, ValueError)):
+ _read_vrt_internal(str(vrt), missing_sources='warn')