diff --git a/Cargo.lock b/Cargo.lock index a5f233704..780dacc38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5701,7 +5701,9 @@ dependencies = [ "approx", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", + "datafusion-common", "sedona-common", "sedona-schema", "sedona-testing", diff --git a/rust/sedona-raster-functions/src/rs_pixel_functions.rs b/rust/sedona-raster-functions/src/rs_pixel_functions.rs index c6bb048bf..dcb82eaa3 100644 --- a/rust/sedona-raster-functions/src/rs_pixel_functions.rs +++ b/rust/sedona-raster-functions/src/rs_pixel_functions.rs @@ -191,7 +191,7 @@ impl SedonaScalarKernel for RsPixelAsCentroid { let grid_x = (col_x - 1) as f64 + 0.5; let grid_y = (row_y - 1) as f64 + 0.5; - let affine = AffineMatrix::from_metadata(raster.metadata()); + let affine = AffineMatrix::from_metadata(&raster.metadata()); let (wx, wy) = affine.transform(grid_x, grid_y); write_wkb_point(&mut builder, (wx, wy)) diff --git a/rust/sedona-raster-gdal/src/gdal_common.rs b/rust/sedona-raster-gdal/src/gdal_common.rs index 0c96fd1cb..c4378366d 100644 --- a/rust/sedona-raster-gdal/src/gdal_common.rs +++ b/rust/sedona-raster-gdal/src/gdal_common.rs @@ -24,6 +24,8 @@ use sedona_gdal::mem::MemDatasetBuilder; use sedona_gdal::raster::types::DatasetOptions; use sedona_gdal::raster::types::GdalDataType; +use std::borrow::Cow; + use sedona_raster::traits::{MetadataRef, RasterMetadata, RasterRef}; use sedona_schema::raster::{BandDataType, StorageType}; @@ -182,19 +184,27 @@ pub(crate) fn convert_gdal_err(e: GdalError) -> DataFusionError { DataFusionError::External(Box::new(e)) } -/// This function creates a GDAL dataset backed by the MEM driver that directly -/// references the band data stored in the [RasterRef]. No data copying occurs - -/// the GDAL bands point to the same memory as the data buffer held by [RasterRef]. +/// Build a GDAL MEM dataset whose bands point at the bytes held by `raster`. +/// +/// Each band's bytes come from `BandRef::contiguous_data()`. When that returns +/// `Cow::Borrowed`, the GDAL band points directly at the StructArray's +/// backing buffer (zero-copy); the caller must keep `raster` alive for the +/// dataset's lifetime. When it returns `Cow::Owned` (e.g. a sliced or +/// permuted view materialized by the reader), the moved `Vec` is +/// returned alongside the dataset and the caller must keep it alive too. /// /// # Arguments /// * `raster` - The RasterRef value /// * `band_indices` - The indices of the bands to include in the GDAL dataset (1-based) /// /// # Returns -/// A [`Dataset`] that provides access to the GDAL dataset. +/// A pair `(Dataset, Vec>)`. The second element holds any +/// reader-allocated band bytes that GDAL pointers may reference; it must +/// outlive the dataset. /// /// # Errors /// Returns an error if: +/// - Any band is N-D (not the legacy `["y","x"]` 2-D shape with identity view) /// - Any band uses OutDb storage /// - GDAL driver operations fail /// - Accessing RasterRef fails @@ -202,7 +212,7 @@ pub unsafe fn raster_ref_to_gdal_mem( gdal: &Gdal, raster: &R, band_indices: &[usize], -) -> Result { +) -> Result<(Dataset, Vec>)> { let metadata = raster.metadata(); let bands = raster.bands(); @@ -212,7 +222,12 @@ pub unsafe fn raster_ref_to_gdal_mem( // Create internal MEM dataset via sedona-gdal shim to avoid open dataset list contention. let mut mem_ds_builder = MemDatasetBuilder::new(width, height); - // Add bands with DATAPOINTER option (zero-copy) + // Reader-allocated band bytes (Cow::Owned). Each entry is moved out of + // the Cow without a copy and must outlive the dataset, since GDAL holds + // a raw pointer into it. + let mut owned_band_bytes: Vec> = Vec::new(); + + // Add bands with DATAPOINTER option. // // Note: GDALAddBand always appends a new band, so the destination band index // is sequential (1..=band_indices.len()), even if the source band indices are @@ -222,6 +237,13 @@ pub unsafe fn raster_ref_to_gdal_mem( .band(src_band_index) .map_err(|e| arrow_datafusion_err!(e))?; + if !band.is_2d() { + return exec_err!( + "GDAL backend requires a 2-dim band; got dim_names={:?}", + band.dim_names() + ); + } + if band.metadata().storage_type()? != StorageType::InDb { return Err(DataFusionError::NotImplemented( "OutDb bands are not supported in raster_to_mem_dataset".to_string(), @@ -231,8 +253,20 @@ pub unsafe fn raster_ref_to_gdal_mem( let band_metadata = band.metadata(); let band_type = band_metadata.data_type()?; let gdal_type = band_data_type_to_gdal(&band_type); - let band_data = band.data(); - let data_ptr = band_data.as_ptr(); + let band_data = band + .contiguous_data() + .map_err(|e| arrow_datafusion_err!(e))?; + // For Cow::Borrowed the pointer is into the StructArray (caller keeps + // it alive). For Cow::Owned we move the Vec into `owned_band_bytes` + // — no extra copy of the reader's materialization — and point GDAL + // at it; the Vec is kept alive alongside the returned Dataset. + let data_ptr: *const u8 = match band_data { + Cow::Borrowed(b) => b.as_ptr(), + Cow::Owned(v) => { + owned_band_bytes.push(v); + owned_band_bytes.last().unwrap().as_ptr() + } + }; unsafe { mem_ds_builder = mem_ds_builder.add_band(gdal_type, data_ptr as *mut u8); } @@ -295,14 +329,17 @@ pub unsafe fn raster_ref_to_gdal_mem( } } - Ok(dataset) + Ok((dataset, owned_band_bytes)) } pub fn raster_ref_to_gdal_empty(gdal: &Gdal, raster: &R) -> Result { unsafe { // SAFETY: raster_ref_to_gdal_mem is safe to call with an empty band list. The - // returned dataset will have zero bands and references no external memory. - raster_ref_to_gdal_mem(gdal, raster, &[]) + // returned dataset has zero bands, references no external memory, and the + // owned-bytes Vec is necessarily empty. + let (dataset, owned) = raster_ref_to_gdal_mem(gdal, raster, &[])?; + debug_assert!(owned.is_empty()); + Ok(dataset) } } @@ -770,7 +807,7 @@ mod tests { let raster = single_raster(&raster_array); with_gdal(|gdal| { - let dataset = unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[3, 1])? }; + let (dataset, _owned) = unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[3, 1])? }; assert_eq!(dataset.raster_size(), (2, 2)); assert_eq!(dataset.raster_count(), 2); assert_eq!( @@ -825,4 +862,40 @@ mod tests { .unwrap(); assert!(err.to_string().contains("OutDb bands are not supported")); } + + #[test] + fn test_raster_ref_to_gdal_mem_rejects_nd_bands() { + // Build a 3-D in-db band shaped ["time","y","x"] over a 2-D raster. + // The N-D guard should fire before any GDAL call. + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(2, 2, 0.0, 2.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[3, 2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 3 * 2 * 2]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let raster_array = builder.finish().unwrap(); + let raster = single_raster(&raster_array); + + let err = with_gdal(|gdal| unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[1]) }) + .err() + .unwrap(); + assert!( + err.to_string().contains("requires a 2-dim band"), + "got: {err}" + ); + } } diff --git a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs index 03e45621e..1328bd938 100644 --- a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs +++ b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs @@ -69,6 +69,11 @@ pub(crate) struct RasterDataset<'a> { _gdal_mem_source: Option>, /// External datasets referenced by the VRT; kept alive for the lifetime of this struct. _gdal_outdb_sources: Vec>, + /// Reader-allocated band bytes that GDAL pointers in the MEM dataset may + /// reference (i.e. bytes returned by `BandRef::contiguous_data()` as + /// `Cow::Owned`, moved here without a copy). Kept alive for as long as + /// the MEM dataset that holds the pointers. + _owned_band_bytes: Vec>, /// Binds this dataset's lifetime to the borrowed source raster. _source_raster: PhantomData<&'a dyn RasterRef>, } @@ -259,6 +264,14 @@ impl GDALDatasetCache { for i in 1..=num_bands { let band = bands.band(i).map_err(|e| arrow_datafusion_err!(e))?; + + if !band.is_2d() { + return exec_err!( + "GDAL backend requires 2-dim bands; got dim_names={:?}", + band.dim_names() + ); + } + let band_metadata = band.metadata(); let band_type = band_metadata.data_type()?; let gdal_type = band_data_type_to_gdal(&band_type); @@ -402,6 +415,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset: Rc::new(dataset), _gdal_mem_source: None, _gdal_outdb_sources: Vec::new(), + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -416,12 +430,12 @@ impl<'a> GDALDatasetProvider<'a> { } } - let mut gdal_mem_source = if !indb_band_indices.is_empty() { - Some(Rc::new(unsafe { - raster_ref_to_gdal_mem(self.gdal, raster, &indb_band_indices)? - })) + let (mut gdal_mem_source, owned_band_bytes) = if !indb_band_indices.is_empty() { + let (mem_ds, owned) = + unsafe { raster_ref_to_gdal_mem(self.gdal, raster, &indb_band_indices)? }; + (Some(Rc::new(mem_ds)), owned) } else { - None + (None, Vec::new()) }; if !has_outdb { @@ -430,6 +444,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: None, _gdal_outdb_sources: Vec::new(), + _owned_band_bytes: owned_band_bytes, _source_raster: PhantomData, }); } @@ -441,6 +456,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset: Rc::clone(&cached.dataset), _gdal_mem_source: None, _gdal_outdb_sources: cached.outdb_sources.clone(), + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -460,6 +476,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: None, _gdal_outdb_sources: outdb_sources, + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -472,6 +489,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: gdal_mem_source, _gdal_outdb_sources: outdb_sources, + _owned_band_bytes: owned_band_bytes, _source_raster: PhantomData, }) } @@ -660,6 +678,32 @@ mod tests { path_str } + /// Two-band GeoTIFF on disk: band 1 is filled with `band1_fill`, band 2 + /// with `band2_fill`. Used to exercise `#band=2` selection end-to-end. + fn create_two_band_source_tiff(temp_dir: &TempDir, band1_fill: u8, band2_fill: u8) -> String { + let path = temp_dir.path().join("two_band.tif"); + let path_str = path.to_string_lossy().to_string(); + + with_gdal(|gdal| { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver + .create_with_band_type::(&path_str, 8, 8, 2) + .unwrap(); + dataset + .set_geo_transform(&[0.0, 1.0, 0.0, 8.0, 0.0, -1.0]) + .unwrap(); + for (i, fill) in [band1_fill, band2_fill].iter().enumerate() { + let band = dataset.rasterband(i + 1).unwrap(); + let mut buffer = Buffer::new((8, 8), vec![*fill; 8 * 8]); + band.write((0, 0), (8, 8), &mut buffer).unwrap(); + } + Ok(()) + }) + .unwrap(); + + path_str + } + fn build_outdb_raster(path: &str) -> arrow_array::StructArray { let mut builder = RasterBuilder::new(1); let metadata = RasterMetadata { @@ -976,4 +1020,109 @@ mod tests { assert!(key_a != key_b); } + + #[test] + fn test_provider_rejects_nd_band_in_vrt_path() { + let temp_dir = TempDir::new().unwrap(); + let path = create_source_tiff(&temp_dir); + + // Build a raster mixing one in-db 3-D band (forces N-D rejection inside + // build_vrt_from_sources) with one out-db band. + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(8, 8, 0.0, 8.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[2, 8, 8], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 2 * 8 * 8]); + builder.finish_band().unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[8, 8], + BandDataType::UInt8, + Some(&[0u8]), + Some(&path), + Some("geotiff"), + ) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let raster_struct = builder.finish().unwrap(); + let raster_array = RasterStructArray::new(&raster_struct); + let raster = raster_array.get(0).unwrap(); + let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); + + let err = with_gdal(|gdal| { + let provider = GDALDatasetProvider::new(gdal, Rc::clone(&cache)); + provider.raster_ref_to_gdal(&raster) + }) + .err() + .unwrap(); + assert!(err.to_string().contains("2-dim band"), "got: {err}"); + } + + #[test] + fn test_provider_selects_outdb_band_via_band_fragment() { + let temp_dir = TempDir::new().unwrap(); + // Source TIFF: band 1 filled with 7s, band 2 filled with 99s. + let path = create_two_band_source_tiff(&temp_dir, 7u8, 99u8); + + // Build a 1-band raster whose single band points at source band 2. + let metadata = RasterMetadata { + width: 8, + height: 8, + upperleft_x: 0.0, + upperleft_y: 8.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + }; + let mut builder = RasterBuilder::new(1); + builder.start_raster(&metadata, None).unwrap(); + builder + .start_band(BandMetadata { + nodata_value: Some(vec![0u8]), + storage_type: StorageType::OutDbRef, + datatype: BandDataType::UInt8, + outdb_url: Some(path.clone()), + outdb_band_id: Some(2), + }) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let raster_struct = builder.finish().unwrap(); + let raster_array = RasterStructArray::new(&raster_struct); + let raster = raster_array.get(0).unwrap(); + let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); + + let dataset = with_gdal(|gdal| { + let provider = GDALDatasetProvider::new(gdal, Rc::clone(&cache)); + provider.raster_ref_to_gdal(&raster) + }) + .unwrap(); + + let band = dataset + .as_dataset() + .rasterband(1) + .unwrap() + .read_as::((0, 0), (8, 8), (8, 8), None) + .unwrap(); + assert_eq!(band.data().to_vec(), vec![99u8; 8 * 8]); + } } diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml index 7407a0507..37e7ecfbf 100644 --- a/rust/sedona-raster/Cargo.toml +++ b/rust/sedona-raster/Cargo.toml @@ -34,9 +34,11 @@ result_large_err = "allow" arrow-schema = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } +datafusion-common = { workspace = true } sedona-common = { workspace = true } sedona-schema = { workspace = true } [dev-dependencies] sedona-testing = { workspace = true } approx = { workspace = true } +arrow-ipc = { workspace = true } diff --git a/rust/sedona-raster/src/affine_transformation.rs b/rust/sedona-raster/src/affine_transformation.rs index ca6441e73..032bf8e01 100644 --- a/rust/sedona-raster/src/affine_transformation.rs +++ b/rust/sedona-raster/src/affine_transformation.rs @@ -108,7 +108,7 @@ pub fn rotation(raster: &dyn RasterRef) -> f64 { /// * `y` - Y coordinate in pixel space (row) #[inline] pub fn to_world_coordinate(raster: &dyn RasterRef, x: i64, y: i64) -> (f64, f64) { - AffineMatrix::from_metadata(raster.metadata()).transform(x as f64, y as f64) + AffineMatrix::from_metadata(&raster.metadata()).transform(x as f64, y as f64) } /// Performs the inverse affine transformation to convert world coordinates back to raster pixel coordinates. @@ -124,14 +124,14 @@ pub fn to_raster_coordinate( world_y: f64, ) -> Result<(i64, i64), ArrowError> { let (rx, ry) = - AffineMatrix::from_metadata(raster.metadata()).inv_transform(world_x, world_y)?; + AffineMatrix::from_metadata(&raster.metadata()).inv_transform(world_x, world_y)?; Ok((rx as i64, ry as i64)) } #[cfg(test)] mod tests { use super::*; - use crate::traits::{MetadataRef, RasterMetadata}; + use crate::traits::{BandRef, Bands, RasterMetadata}; use approx::assert_relative_eq; use std::f64::consts::FRAC_1_SQRT_2; use std::f64::consts::PI; @@ -141,14 +141,32 @@ mod tests { } impl RasterRef for TestRaster { - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata + fn num_bands(&self) -> usize { + 0 + } + fn bands(&self) -> Bands<'_> { + Bands::new(self) + } + fn band(&self, _index: usize) -> Option> { + None + } + fn band_name(&self, _index: usize) -> Option<&str> { + None } fn crs(&self) -> Option<&str> { None } - fn bands(&self) -> &dyn crate::traits::BandsRef { - unimplemented!() + fn transform(&self) -> &[f64] { + &[] + } + fn spatial_dims(&self) -> Vec<&str> { + vec![] + } + fn spatial_shape(&self) -> &[i64] { + &[] + } + fn metadata(&self) -> RasterMetadata { + self.metadata.clone() } } diff --git a/rust/sedona-raster/src/array.rs b/rust/sedona-raster/src/array.rs index 07a4bce04..b570fa267 100644 --- a/rust/sedona-raster/src/array.rs +++ b/rust/sedona-raster/src/array.rs @@ -15,484 +15,724 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; + use arrow_array::{ - Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray, StringViewArray, - StructArray, UInt32Array, UInt64Array, + Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray, StringArray, + StringViewArray, StructArray, UInt32Array, UInt64Array, }; use arrow_schema::ArrowError; use crate::traits::{ - BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef, RasterMetadata, RasterRef, -}; -use sedona_schema::raster::{ - band_indices, band_metadata_indices, metadata_indices, raster_indices, BandDataType, - StorageType, + validate_view, visible_shape_from_view, BandRef, Bands, NdBuffer, RasterRef, ViewEntry, }; +use sedona_schema::raster::{band_indices, band_view_indices, raster_indices, BandDataType}; -/// Implement MetadataRef for RasterMetadata to allow direct use with builder -impl MetadataRef for RasterMetadata { - fn width(&self) -> u64 { - self.width - } - fn height(&self) -> u64 { - self.height - } - fn upper_left_x(&self) -> f64 { - self.upperleft_x - } - fn upper_left_y(&self) -> f64 { - self.upperleft_y - } - fn scale_x(&self) -> f64 { - self.scale_x - } - fn scale_y(&self) -> f64 { - self.scale_y - } - fn skew_x(&self) -> f64 { - self.skew_x - } - fn skew_y(&self) -> f64 { - self.skew_y - } -} - -/// Implementation of MetadataRef for Arrow StructArray -struct MetadataRefImpl<'a> { - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - index: usize, +/// Arrow-backed implementation of BandRef for a single band within a raster. +/// +/// View-derived layout (`visible_shape`, `byte_strides`, `byte_offset`, +/// `is_identity_view`) is computed once at construction and reused by every +/// accessor. Source-shape and dim-name slices are borrowed directly from +/// the underlying Arrow buffers. +struct BandRefImpl<'a> { + dim_names_list: &'a ListArray, + dim_names_values: &'a StringArray, + source_shape_list: &'a ListArray, + source_shape_values: &'a UInt64Array, + nodata_array: &'a BinaryArray, + outdb_uri_array: &'a StringArray, + outdb_format_array: &'a StringViewArray, + data_array: &'a BinaryViewArray, + /// Absolute row index within the flattened bands arrays + band_row: usize, + /// Resolved at construction so accessors don't re-decode the discriminant. + data_type: BandDataType, + /// Per-visible-axis view, length = ndim + view_entries: Vec, + /// Visible shape (== `[v.steps for v in view_entries]`), length = ndim + visible_shape: Vec, + /// Byte strides per visible axis. May be 0 (broadcast) or negative. + byte_strides: Vec, + /// Byte offset into `data` of the visible region's `[0,...,0]` element. + byte_offset: u64, + /// True iff this view is the identity over a C-order source buffer — + /// `contiguous_data()` can then borrow `data` directly. Relies on + /// `validate_view` having enforced `view.len() == source_shape.len()`; + /// otherwise a "shorter identity" could be wrongly accepted here. + is_identity_view: bool, + /// Lazy row-major copy of the visible bytes, materialized on the first + /// `data()` call against a non-identity view. Identity views borrow + /// directly from the Arrow column and never touch this cell. + materialized: std::cell::OnceCell>, } -impl<'a> MetadataRef for MetadataRefImpl<'a> { - #[inline(always)] - fn width(&self) -> u64 { - self.width_array.value(self.index) - } - - #[inline(always)] - fn height(&self) -> u64 { - self.height_array.value(self.index) +impl<'a> BandRef for BandRefImpl<'a> { + fn ndim(&self) -> usize { + self.view_entries.len() } - #[inline(always)] - fn upper_left_x(&self) -> f64 { - self.upper_left_x_array.value(self.index) + fn dim_names(&self) -> Vec<&str> { + let start = self.dim_names_list.value_offsets()[self.band_row] as usize; + let end = self.dim_names_list.value_offsets()[self.band_row + 1] as usize; + (start..end) + .map(|i| self.dim_names_values.value(i)) + .collect() } - #[inline(always)] - fn upper_left_y(&self) -> f64 { - self.upper_left_y_array.value(self.index) + fn shape(&self) -> &[u64] { + &self.visible_shape } - #[inline(always)] - fn scale_x(&self) -> f64 { - self.scale_x_array.value(self.index) + fn raw_source_shape(&self) -> &[u64] { + let start = self.source_shape_list.value_offsets()[self.band_row] as usize; + let end = self.source_shape_list.value_offsets()[self.band_row + 1] as usize; + &self.source_shape_values.values()[start..end] } - #[inline(always)] - fn scale_y(&self) -> f64 { - self.scale_y_array.value(self.index) + fn view(&self) -> &[ViewEntry] { + &self.view_entries } - #[inline(always)] - fn skew_x(&self) -> f64 { - self.skew_x_array.value(self.index) + fn data_type(&self) -> BandDataType { + self.data_type } - #[inline(always)] - fn skew_y(&self) -> f64 { - self.skew_y_array.value(self.index) + fn data(&self) -> &[u8] { + // OutDb: no in-line bytes available. Returns `&[]` to match main's + // pre-N-D behavior — callers that care must check `is_indb()` first. + if !self.is_indb() { + return &[]; + } + // Identity-view InDb: the column bytes ARE the row-major visible + // buffer. Borrow directly from Arrow — no allocation, no copy. + if self.is_identity_view { + return self.data_array.value(self.band_row); + } + // Non-identity view: walk strides and cache the row-major copy in + // `materialized` so repeat `.data()` calls reuse the same buffer. + // The materialization is infallible on validated bands; an error + // here would indicate corruption beyond what `validate_view` + // catches, so we panic rather than hide it through `&[]`. + self.materialized.get_or_init(|| { + materialize_strided( + self.data_array.value(self.band_row), + &self.visible_shape, + &self.byte_strides, + self.byte_offset, + self.data_type.byte_size(), + ) + .expect("non-identity view materialization failed on a validated band") + }) } -} - -/// Implementation of BandMetadataRef for Arrow StructArray -struct BandMetadataRefImpl<'a> { - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, - band_index: usize, -} -impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { - fn nodata_value(&self) -> Option<&[u8]> { - if self.nodata_array.is_null(self.band_index) { + fn nodata(&self) -> Option<&[u8]> { + if self.nodata_array.is_null(self.band_row) { None } else { - Some(self.nodata_array.value(self.band_index)) + Some(self.nodata_array.value(self.band_row)) } } - fn storage_type(&self) -> Result { - let value = self.storage_type_array.value(self.band_index); - let storage_type = match value { - 0 => StorageType::InDb, - 1 => StorageType::OutDbRef, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown storage type: {}", - value - ))) - } - }; - Ok(storage_type) - } - - fn data_type(&self) -> Result { - let value = self.datatype_array.value(self.band_index); - let band_data_type = match value { - 1 => BandDataType::UInt8, - 2 => BandDataType::UInt16, - 3 => BandDataType::Int16, - 4 => BandDataType::UInt32, - 5 => BandDataType::Int32, - 6 => BandDataType::Float32, - 7 => BandDataType::Float64, - 8 => BandDataType::UInt64, - 9 => BandDataType::Int64, - 10 => BandDataType::Int8, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown band data type: {}", - self.datatype_array.value(self.band_index) - ))) - } - }; - Ok(band_data_type) - } - - fn outdb_url(&self) -> Option<&str> { - if self.outdb_url_array.is_null(self.band_index) { + fn outdb_uri(&self) -> Option<&str> { + if self.outdb_uri_array.is_null(self.band_row) { None } else { - Some(self.outdb_url_array.value(self.band_index)) + Some(self.outdb_uri_array.value(self.band_row)) } } - fn outdb_band_id(&self) -> Option { - if self.outdb_band_id_array.is_null(self.band_index) { + fn outdb_format(&self) -> Option<&str> { + if self.outdb_format_array.is_null(self.band_row) { None } else { - Some(self.outdb_band_id_array.value(self.band_index)) + Some(self.outdb_format_array.value(self.band_row)) } } -} -/// Implementation of BandRef for accessing individual band data -struct BandRefImpl<'a> { - band_metadata: BandMetadataRefImpl<'a>, - band_data: &'a [u8], + fn is_indb(&self) -> bool { + !self.data_array.value(self.band_row).is_empty() + } + + fn nd_buffer(&self) -> Result, ArrowError> { + if !self.is_indb() { + return Err(ArrowError::NotYetImplemented( + "OutDb byte access via nd_buffer() is not yet implemented; \ + backend-specific OutDb resolvers are tracked separately" + .to_string(), + )); + } + // shape and strides are owned by NdBuffer (see its doc comment). + // Cloning here is cheap — both vecs are O(ndim), a handful of values. + Ok(NdBuffer { + buffer: self.data_array.value(self.band_row), + shape: self.visible_shape.clone(), + strides: self.byte_strides.clone(), + offset: self.byte_offset, + data_type: self.data_type, + }) + } + + fn contiguous_data(&self) -> Result, ArrowError> { + let buf = self.nd_buffer()?; + if self.is_identity_view { + // Identity view over a C-order source buffer: the source bytes + // ARE the visible bytes. Borrow them. + return Ok(Cow::Borrowed(buf.buffer)); + } + let out = materialize_strided( + buf.buffer, + &buf.shape, + &buf.strides, + buf.offset, + buf.data_type.byte_size(), + )?; + Ok(Cow::Owned(out)) + } } -impl<'a> BandRef for BandRefImpl<'a> { - fn metadata(&self) -> &dyn BandMetadataRef { - &self.band_metadata +/// Walk a strided view over `buffer` and return its bytes in canonical +/// row-major (C-order) layout. Shared between `contiguous_data` (returns +/// the bytes through `Cow::Owned` + `Result`) and the `data()` shim +/// (caches the result in `BandRefImpl::materialized`). +/// +/// Uses checked arithmetic at every step so a pathological stride × index +/// product can't wrap and silently pass the subsequent bound checks. +/// Returns an error rather than panicking so the caller decides how to +/// surface the failure. +fn materialize_strided( + buffer: &[u8], + visible_shape: &[u64], + byte_strides: &[i64], + byte_offset: u64, + dtype_size: usize, +) -> Result, ArrowError> { + let ndim = visible_shape.len(); + let total: u64 = visible_shape.iter().product(); + if total == 0 { + return Ok(Vec::new()); } + let total = total as usize; + let mut out = Vec::with_capacity(total * dtype_size); + let base = byte_offset as i64; + + // Innermost-axis fast path. We always step the innermost (last) + // visible axis as the inner loop; everything outer drives the + // starting byte for that row. + let inner = ndim - 1; + let inner_steps = visible_shape[inner] as usize; + let inner_stride = byte_strides[inner]; + // `dtype_size` is always >= 1 (every BandDataType has a positive + // byte size), so it doesn't need its own guard. + let row_bytes_contiguous = inner_stride == dtype_size as i64 && inner_steps > 0; + + // Precompute a small index vector for outer axes (everything except + // the innermost). For 1D this is empty and we run a single pass. + let mut outer_idx = vec![0u64; ndim.saturating_sub(1)]; + loop { + // Compute the byte offset of the row's first element from the + // current outer index combination. Checked arithmetic guards + // against a pathological outer-stride × index product wrapping. + let mut row_off = base; + for (k, &i) in outer_idx.iter().enumerate() { + let step_off = (i as i64).checked_mul(byte_strides[k]).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "view stride composition overflows i64 \ + at outer axis {k} (index {i}, stride {})", + byte_strides[k] + )) + })?; + row_off = row_off.checked_add(step_off).ok_or_else(|| { + ArrowError::InvalidArgumentError( + "view offset accumulation overflows i64".to_string(), + ) + })?; + } - fn data(&self) -> &[u8] { - self.band_data + if row_bytes_contiguous { + // Whole row is one flat slice — copy it in one go. + let len = inner_steps * dtype_size; + if row_off < 0 || (row_off as usize) + len > buffer.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "view addresses out-of-bounds byte range {row_off}..{} (buffer len {})", + row_off as usize + len, + buffer.len() + ))); + } + out.extend_from_slice(&buffer[row_off as usize..row_off as usize + len]); + } else { + // Step element-by-element along the innermost axis. + let mut byte_off = row_off; + for _ in 0..inner_steps { + if byte_off < 0 || (byte_off as usize) + dtype_size > buffer.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "view addresses out-of-bounds byte offset {byte_off} (buffer len {})", + buffer.len() + ))); + } + out.extend_from_slice(&buffer[byte_off as usize..byte_off as usize + dtype_size]); + byte_off = byte_off.checked_add(inner_stride).ok_or_else(|| { + ArrowError::InvalidArgumentError( + "view inner-axis advance overflows i64".to_string(), + ) + })?; + } + } + + // Increment outer indices in C-order (last outer axis fastest). + if outer_idx.is_empty() { + break; + } + let mut k = inner; // outer axes are [0..inner) + let mut done = true; + while k > 0 { + k -= 1; + outer_idx[k] += 1; + if outer_idx[k] < visible_shape[k] { + done = false; + break; + } + outer_idx[k] = 0; + } + if done { + break; + } } + Ok(out) } -/// Implementation of BandsRef for accessing all bands in a raster -struct BandsRefImpl<'a> { +/// Arrow-backed implementation of RasterRef for a single raster row. +/// +/// Holds flat references to the underlying Arrow arrays so the impl does +/// not borrow from a `RasterStructArray` wrapper. That keeps +/// `RasterStructArray::get(&self, ...)` callable without a `&'a self` +/// constraint, which would otherwise force callers to hoist the +/// `RasterStructArray` into a `let` binding. +pub struct RasterRefImpl<'a> { + crs_array: &'a StringViewArray, + transform_list: &'a ListArray, + transform_values: &'a Float64Array, + spatial_dims_list: &'a ListArray, + spatial_dims_values: &'a StringViewArray, + spatial_shape_list: &'a ListArray, + spatial_shape_values: &'a Int64Array, bands_list: &'a ListArray, - raster_index: usize, - // Direct references to the metadata and data arrays - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, + band_name_array: &'a StringArray, + band_dim_names_list: &'a ListArray, + band_dim_names_values: &'a StringArray, + band_source_shape_list: &'a ListArray, + band_source_shape_values: &'a UInt64Array, + band_datatype_array: &'a UInt32Array, + band_nodata_array: &'a BinaryArray, + band_view_list: &'a ListArray, + band_view_source_axis: &'a Int64Array, + band_view_start: &'a Int64Array, + band_view_step: &'a Int64Array, + band_view_steps: &'a Int64Array, + band_outdb_uri_array: &'a StringArray, + band_outdb_format_array: &'a StringViewArray, band_data_array: &'a BinaryViewArray, + raster_index: usize, +} + +impl<'a> RasterRefImpl<'a> { + /// Returns the raw CRS string reference with the array's lifetime. + pub fn crs_str_ref(&self) -> Option<&'a str> { + if self.crs_array.is_null(self.raster_index) { + None + } else { + Some(self.crs_array.value(self.raster_index)) + } + } } -impl<'a> BandsRef for BandsRefImpl<'a> { - fn len(&self) -> usize { +impl<'a> RasterRef for RasterRefImpl<'a> { + fn num_bands(&self) -> usize { self.bands_list.value_length(self.raster_index) as usize } - /// Get a specific band by number (1-based index) - fn band(&self, number: usize) -> Result, ArrowError> { - if number == 0 { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid band number {number}: band numbers must be 1-based" - ))); - } - // By convention, band numbers are 1-based. - // Convert to zero-based index. - let index = number - 1; - if index >= self.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Band number {} is out of range: this raster has {} bands", - number, - self.len() - ))); - } + fn bands(&self) -> Bands<'_> { + Bands::new(self) + } + fn band(&self, index: usize) -> Option> { + if index >= self.num_bands() { + return None; + } let start = self.bands_list.value_offsets()[self.raster_index] as usize; let band_row = start + index; - let band_metadata = BandMetadataRefImpl { - nodata_array: self.nodata_array, - storage_type_array: self.storage_type_array, - datatype_array: self.datatype_array, - outdb_url_array: self.outdb_url_array, - outdb_band_id_array: self.outdb_band_id_array, - band_index: band_row, - }; - - let band_data = self.band_data_array.value(band_row); + // Read source shape slice. + let ss_start = self.band_source_shape_list.value_offsets()[band_row] as usize; + let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1] as usize; + let source_shape: &[u64] = &self.band_source_shape_values.values()[ss_start..ss_end]; - Ok(Box::new(BandRefImpl { - band_metadata, - band_data, - })) - } + // Reject 0-D bands at the read boundary. Schema doesn't forbid them + // outright but every consumer assumes ndim >= 1. + if source_shape.is_empty() { + return None; + } - fn iter(&self) -> Box + '_> { - Box::new(BandIteratorImpl { - bands: self, - current: 1, // Start at 1 for 1-based band numbering - }) - } -} + // Resolve data type up front; an unknown discriminant is a + // schema-corruption bug, not user data, so failing the band is + // appropriate. + let data_type_value = self.band_datatype_array.value(band_row); + let data_type = BandDataType::try_from_u32(data_type_value)?; + + // Read view entries. A null row encodes the canonical identity view + // and is synthesised on the fly from `source_shape`. + let view_entries: Vec = if self.band_view_list.is_null(band_row) { + source_shape + .iter() + .enumerate() + .map(|(i, &s)| ViewEntry { + source_axis: i as i64, + start: 0, + step: 1, + steps: s as i64, + }) + .collect() + } else { + let v_start = self.band_view_list.value_offsets()[band_row] as usize; + let v_end = self.band_view_list.value_offsets()[band_row + 1] as usize; + (v_start..v_end) + .map(|i| ViewEntry { + source_axis: self.band_view_source_axis.value(i), + start: self.band_view_start.value(i), + step: self.band_view_step.value(i), + steps: self.band_view_steps.value(i), + }) + .collect() + }; -/// Concrete implementation of BandIterator trait -pub struct BandIteratorImpl<'a> { - bands: &'a dyn BandsRef, - current: usize, -} + // Full validation: length match, source_axis permutation, bounds, + // and steps >= 0. Anything malformed is treated as a schema-level + // corruption — return None so the caller can surface it. + if validate_view(&view_entries, source_shape).is_err() { + return None; + } -impl<'a> Iterator for BandIteratorImpl<'a> { - type Item = Box; + let ndim = view_entries.len(); + let visible_shape = visible_shape_from_view(&view_entries); + + let dtype_size = data_type.byte_size() as i64; + + // C-order byte strides over the source_shape: + // source_strides_bytes[k] = dtype_size * Π_{j>k} source_shape[j] + // + // Computed with checked arithmetic so a corrupt source_shape (a u64 + // that doesn't fit in i64, or a product that overflows) is rejected + // here rather than producing a wrapped stride that silently passes + // later bound checks. + let mut source_strides_bytes = vec![0i64; source_shape.len()]; + source_strides_bytes[source_shape.len() - 1] = dtype_size; + for k in (0..source_shape.len() - 1).rev() { + let next_axis = i64::try_from(source_shape[k + 1]).ok()?; + source_strides_bytes[k] = source_strides_bytes[k + 1].checked_mul(next_axis)?; + } - fn next(&mut self) -> Option { - // current is 1-based, compare against len() + 1 - if self.current <= self.bands.len() { - let band = self.bands.band(self.current).ok(); // Convert Result to Option - self.current += 1; - band - } else { - None + // Compose view → byte strides + offset using checked arithmetic. + // validate_view bounded start/step within each source axis but can't + // bound the cumulative byte stride, so an axis with a huge stride + // and a non-trivial step can still wrap an unchecked product. + let mut byte_strides = vec![0i64; ndim]; + let mut byte_offset: i64 = 0; + let mut is_identity_view = true; + for (k, v) in view_entries.iter().enumerate() { + let sa_idx = v.source_axis as usize; + let src_stride = source_strides_bytes[sa_idx]; + byte_strides[k] = v.step.checked_mul(src_stride)?; + let start_off = v.start.checked_mul(src_stride)?; + byte_offset = byte_offset.checked_add(start_off)?; + // After validate_view: steps >= 0, so the u64 cast below is lossless. + if !(sa_idx == k + && v.start == 0 + && v.step == 1 + && v.steps as u64 == source_shape[sa_idx]) + { + is_identity_view = false; + } + } + // byte_offset is non-negative by construction (start >= 0, + // src_stride > 0), but assert to keep the cast below honest. + if byte_offset < 0 { + return None; } - } - fn size_hint(&self) -> (usize, Option) { - // current is 1-based, so remaining calculation needs adjustment - let remaining = self.bands.len().saturating_sub(self.current - 1); - (remaining, Some(remaining)) + Some(Box::new(BandRefImpl { + dim_names_list: self.band_dim_names_list, + dim_names_values: self.band_dim_names_values, + source_shape_list: self.band_source_shape_list, + source_shape_values: self.band_source_shape_values, + nodata_array: self.band_nodata_array, + outdb_uri_array: self.band_outdb_uri_array, + outdb_format_array: self.band_outdb_format_array, + data_array: self.band_data_array, + band_row, + data_type, + view_entries, + visible_shape, + byte_strides, + byte_offset: byte_offset as u64, + is_identity_view, + materialized: std::cell::OnceCell::new(), + })) } -} -impl<'a> BandIterator<'a> for BandIteratorImpl<'a> { - fn len(&self) -> usize { - // current is 1-based, so remaining calculation needs adjustment - self.bands.len().saturating_sub(self.current - 1) + fn band_data_type(&self, index: usize) -> Option { + if index >= self.num_bands() { + return None; + } + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + let value = self.band_datatype_array.value(band_row); + BandDataType::try_from_u32(value) } -} -impl ExactSizeIterator for BandIteratorImpl<'_> {} - -/// Implementation of RasterRef for complete raster access -pub struct RasterRefImpl<'a> { - metadata: MetadataRefImpl<'a>, - crs: &'a StringViewArray, - bands: BandsRefImpl<'a>, -} - -impl<'a> RasterRefImpl<'a> { - /// Creates a new RasterRefImpl that provides zero-copy access to the raster at the specified index. - /// - /// # Arguments - /// * `raster_struct_array` - The Arrow StructArray containing raster data - /// * `raster_index` - The zero-based index of the raster to access - #[inline(always)] - pub fn new(raster_struct_array: &RasterStructArray<'a>, raster_index: usize) -> Self { - let metadata = MetadataRefImpl { - width_array: raster_struct_array.width_array, - height_array: raster_struct_array.height_array, - upper_left_x_array: raster_struct_array.upper_left_x_array, - upper_left_y_array: raster_struct_array.upper_left_y_array, - scale_x_array: raster_struct_array.scale_x_array, - scale_y_array: raster_struct_array.scale_y_array, - skew_x_array: raster_struct_array.skew_x_array, - skew_y_array: raster_struct_array.skew_y_array, - index: raster_index, - }; - - let bands = BandsRefImpl { - bands_list: raster_struct_array.bands_list, - raster_index, - nodata_array: raster_struct_array.band_nodata_array, - storage_type_array: raster_struct_array.band_storage_type_array, - datatype_array: raster_struct_array.band_datatype_array, - outdb_url_array: raster_struct_array.band_outdb_url_array, - outdb_band_id_array: raster_struct_array.band_outdb_band_id_array, - band_data_array: raster_struct_array.band_data_array, - }; + fn band_outdb_uri(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.band_outdb_uri_array.is_null(band_row) { + None + } else { + Some(self.band_outdb_uri_array.value(band_row)) + } + } - Self { - metadata, - crs: raster_struct_array.crs, - bands, + fn band_outdb_format(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.band_outdb_format_array.is_null(band_row) { + None + } else { + Some(self.band_outdb_format_array.value(band_row)) } } - pub fn crs_str_ref(&self) -> Option<&'a str> { - if self.crs.is_null(self.bands.raster_index) { + fn band_nodata(&self, index: usize) -> Option<&[u8]> { + if index >= self.num_bands() { + return None; + } + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.band_nodata_array.is_null(band_row) { None } else { - Some(self.crs.value(self.bands.raster_index)) + Some(self.band_nodata_array.value(band_row)) } } -} -impl<'a> RasterRef for RasterRefImpl<'a> { - #[inline(always)] - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata + fn band_name(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.band_name_array.is_null(band_row) { + None + } else { + Some(self.band_name_array.value(band_row)) + } } - #[inline(always)] fn crs(&self) -> Option<&str> { self.crs_str_ref() } - #[inline(always)] - fn bands(&self) -> &dyn BandsRef { - &self.bands + fn transform(&self) -> &[f64] { + let start = self.transform_list.value_offsets()[self.raster_index] as usize; + let end = self.transform_list.value_offsets()[self.raster_index + 1] as usize; + assert!( + end - start >= 6, + "transform list must have at least 6 elements for raster {}, got {}", + self.raster_index, + end - start + ); + &self.transform_values.values()[start..start + 6] + } + + fn spatial_dims(&self) -> Vec<&str> { + let offsets = self.spatial_dims_list.value_offsets(); + let start = offsets[self.raster_index] as usize; + let end = offsets[self.raster_index + 1] as usize; + (start..end) + .map(|i| self.spatial_dims_values.value(i)) + .collect() + } + + fn spatial_shape(&self) -> &[i64] { + let offsets = self.spatial_shape_list.value_offsets(); + let start = offsets[self.raster_index] as usize; + let end = offsets[self.raster_index + 1] as usize; + &self.spatial_shape_values.values()[start..end] } } -/// Access rasters from the Arrow StructArray +/// Access rasters from the Arrow StructArray. /// -/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Provides efficient, zero-copy access to N-D raster data stored in Arrow format. pub struct RasterStructArray<'a> { raster_array: &'a StructArray, - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - crs: &'a StringViewArray, + // Top-level fields + crs_array: &'a StringViewArray, + transform_list: &'a ListArray, + transform_values: &'a Float64Array, + spatial_dims_list: &'a ListArray, + spatial_dims_values: &'a StringViewArray, + spatial_shape_list: &'a ListArray, + spatial_shape_values: &'a Int64Array, bands_list: &'a ListArray, - band_nodata_array: &'a BinaryArray, - band_storage_type_array: &'a UInt32Array, + // Band-level fields (flattened across all bands in all rasters) + band_name_array: &'a StringArray, + band_dim_names_list: &'a ListArray, + band_dim_names_values: &'a StringArray, + band_source_shape_list: &'a ListArray, + band_source_shape_values: &'a UInt64Array, band_datatype_array: &'a UInt32Array, - band_outdb_url_array: &'a StringArray, - band_outdb_band_id_array: &'a UInt32Array, + band_nodata_array: &'a BinaryArray, + band_view_list: &'a ListArray, + band_view_source_axis: &'a Int64Array, + band_view_start: &'a Int64Array, + band_view_step: &'a Int64Array, + band_view_steps: &'a Int64Array, + band_outdb_uri_array: &'a StringArray, + band_outdb_format_array: &'a StringViewArray, band_data_array: &'a BinaryViewArray, } impl<'a> RasterStructArray<'a> { - /// Create a new RasterStructArray from an existing StructArray + /// Create a new RasterStructArray from an existing StructArray. #[inline] pub fn new(raster_array: &'a StructArray) -> Self { - let crs = raster_array + // Top-level fields + let crs_array = raster_array .column(raster_indices::CRS) .as_any() .downcast_ref::() .unwrap(); + let transform_list = raster_array + .column(raster_indices::TRANSFORM) + .as_any() + .downcast_ref::() + .unwrap(); + let transform_values = transform_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let spatial_dims_list = raster_array + .column(raster_indices::SPATIAL_DIMS) + .as_any() + .downcast_ref::() + .unwrap(); + let spatial_dims_values = spatial_dims_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let spatial_shape_list = raster_array + .column(raster_indices::SPATIAL_SHAPE) + .as_any() + .downcast_ref::() + .unwrap(); + let spatial_shape_values = spatial_shape_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); - // Extract the metadata arrays for direct access - let metadata_struct = raster_array - .column(raster_indices::METADATA) + // Bands list and nested struct + let bands_list = raster_array + .column(raster_indices::BANDS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let width_array = metadata_struct - .column(metadata_indices::WIDTH) + let bands_struct = bands_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let height_array = metadata_struct - .column(metadata_indices::HEIGHT) + + // Band-level fields + let band_name_array = bands_struct + .column(band_indices::NAME) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let upper_left_x_array = metadata_struct - .column(metadata_indices::UPPERLEFT_X) + let band_dim_names_list = bands_struct + .column(band_indices::DIM_NAMES) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let upper_left_y_array = metadata_struct - .column(metadata_indices::UPPERLEFT_Y) + let band_dim_names_values = band_dim_names_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let scale_x_array = metadata_struct - .column(metadata_indices::SCALE_X) + let band_source_shape_list = bands_struct + .column(band_indices::SOURCE_SHAPE) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let scale_y_array = metadata_struct - .column(metadata_indices::SCALE_Y) + let band_source_shape_values = band_source_shape_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_x_array = metadata_struct - .column(metadata_indices::SKEW_X) + let band_datatype_array = bands_struct + .column(band_indices::DATA_TYPE) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_y_array = metadata_struct - .column(metadata_indices::SKEW_Y) + let band_nodata_array = bands_struct + .column(band_indices::NODATA) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - - // Extract the band arrays for direct access - let bands_list = raster_array - .column(raster_indices::BANDS) + let band_view_list = bands_struct + .column(band_indices::VIEW) .as_any() .downcast_ref::() .unwrap(); - let bands_struct = bands_list + let band_view_struct = band_view_list .values() .as_any() .downcast_ref::() .unwrap(); - let band_metadata_struct = bands_struct - .column(band_indices::METADATA) + let band_view_source_axis = band_view_struct + .column(band_view_indices::SOURCE_AXIS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_nodata_array = band_metadata_struct - .column(band_metadata_indices::NODATAVALUE) + let band_view_start = band_view_struct + .column(band_view_indices::START) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_storage_type_array = band_metadata_struct - .column(band_metadata_indices::STORAGE_TYPE) + let band_view_step = band_view_struct + .column(band_view_indices::STEP) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_datatype_array = band_metadata_struct - .column(band_metadata_indices::DATATYPE) + let band_view_steps = band_view_struct + .column(band_view_indices::STEPS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_outdb_url_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_URL) + let band_outdb_uri_array = bands_struct + .column(band_indices::OUTDB_URI) .as_any() .downcast_ref::() .unwrap(); - let band_outdb_band_id_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_BAND_ID) + let band_outdb_format_array = bands_struct + .column(band_indices::OUTDB_FORMAT) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let band_data_array = bands_struct .column(band_indices::DATA) @@ -502,38 +742,45 @@ impl<'a> RasterStructArray<'a> { Self { raster_array, - width_array, - height_array, - upper_left_x_array, - upper_left_y_array, - scale_x_array, - scale_y_array, - skew_x_array, - skew_y_array, - crs, + crs_array, + transform_list, + transform_values, + spatial_dims_list, + spatial_dims_values, + spatial_shape_list, + spatial_shape_values, bands_list, - band_nodata_array, - band_storage_type_array, + band_name_array, + band_dim_names_list, + band_dim_names_values, + band_source_shape_list, + band_source_shape_values, band_datatype_array, - band_outdb_url_array, - band_outdb_band_id_array, + band_nodata_array, + band_view_list, + band_view_source_axis, + band_view_start, + band_view_step, + band_view_steps, + band_outdb_uri_array, + band_outdb_format_array, band_data_array, } } - /// Get the total number of rasters in the array + /// Get the total number of rasters in the array. #[inline(always)] pub fn len(&self) -> usize { self.raster_array.len() } - /// Check if the array is empty + /// Check if the array is empty. #[inline(always)] pub fn is_empty(&self) -> bool { self.raster_array.is_empty() } - /// Get a specific raster by index without consuming the iterator + /// Get a specific raster by index. #[inline(always)] pub fn get(&self, index: usize) -> Result, ArrowError> { if index >= self.raster_array.len() { @@ -541,10 +788,35 @@ impl<'a> RasterStructArray<'a> { "Invalid raster index: {index}" ))); } - - Ok(RasterRefImpl::new(self, index)) + Ok(RasterRefImpl { + crs_array: self.crs_array, + transform_list: self.transform_list, + transform_values: self.transform_values, + spatial_dims_list: self.spatial_dims_list, + spatial_dims_values: self.spatial_dims_values, + spatial_shape_list: self.spatial_shape_list, + spatial_shape_values: self.spatial_shape_values, + bands_list: self.bands_list, + band_name_array: self.band_name_array, + band_dim_names_list: self.band_dim_names_list, + band_dim_names_values: self.band_dim_names_values, + band_source_shape_list: self.band_source_shape_list, + band_source_shape_values: self.band_source_shape_values, + band_datatype_array: self.band_datatype_array, + band_nodata_array: self.band_nodata_array, + band_view_list: self.band_view_list, + band_view_source_axis: self.band_view_source_axis, + band_view_start: self.band_view_start, + band_view_step: self.band_view_step, + band_view_steps: self.band_view_steps, + band_outdb_uri_array: self.band_outdb_uri_array, + band_outdb_format_array: self.band_outdb_format_array, + band_data_array: self.band_data_array, + raster_index: index, + }) } + /// Check if a raster at the given index is null. #[inline(always)] pub fn is_null(&self, index: usize) -> bool { self.raster_array.is_null(index) @@ -555,10 +827,17 @@ impl<'a> RasterStructArray<'a> { mod tests { use super::*; use crate::builder::RasterBuilder; - use crate::traits::{BandMetadata, RasterMetadata}; - use arrow_schema::DataType; - use sedona_schema::raster::{BandDataType, StorageType}; + use crate::traits::{BandMetadata, RasterMetadata, ViewEntry}; + use arrow_array::{ + types::Int64Type, ArrayRef, ListArray, StructArray, UInt32Array, UInt64Array, + }; + use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; + use arrow_schema::{DataType, Field, Fields}; + use sedona_schema::raster::{ + band_indices, raster_indices, BandDataType, RasterSchema, StorageType, + }; use sedona_testing::rasters::generate_test_rasters; + use std::sync::Arc; #[test] fn test_array_basic_functionality() { @@ -708,44 +987,50 @@ mod tests { assert!(rasters.is_null(1)); } - /// Test that `data_type()` and `storage_type()` return `Err` for invalid values - /// instead of panicking. - #[test] - fn test_invalid_band_metadata_returns_err() { - use arrow_buffer::{OffsetBuffer, ScalarBuffer}; - use sedona_schema::raster::RasterSchema; - use std::sync::Arc; - - // Build a valid single-band raster first + /// Build a single-raster, single-band raster StructArray with an explicit + /// view. Used as the input to the surgery helpers below; callers replace + /// one band-level column to simulate schema corruption. + fn build_explicit_view_raster() -> StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, None).unwrap(); - let band_meta = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - builder.start_band(band_meta).unwrap(); - builder.band_data_writer().append_value([1u8; 4]); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let valid_array = builder.finish().unwrap(); + builder.finish().unwrap() + } - // Extract original columns from the valid raster - let metadata_col = valid_array.column(raster_indices::METADATA).clone(); - let crs_col = valid_array.column(raster_indices::CRS).clone(); - let bands_list = valid_array + /// Replace a single column of the bands struct, then rebuild the bands + /// list and the top-level raster struct. Schema-shape preserving — this + /// only swaps the array data, never the field type. + fn replace_band_column( + array: &StructArray, + column_index: usize, + new_column: ArrayRef, + ) -> StructArray { + let bands_list = array .column(raster_indices::BANDS) .as_any() .downcast_ref::() @@ -755,75 +1040,701 @@ mod tests { .as_any() .downcast_ref::() .unwrap(); - let orig_band_meta_struct = bands_struct - .column(band_indices::METADATA) - .as_any() - .downcast_ref::() - .unwrap(); - let band_data_col = bands_struct.column(band_indices::DATA).clone(); - // Build tampered band metadata with invalid storage_type=99 and datatype=99 - let DataType::Struct(band_metadata_fields) = RasterSchema::band_metadata_type() else { - panic!("Expected struct type for band metadata"); + let mut columns: Vec = bands_struct.columns().to_vec(); + columns[column_index] = new_column; + let DataType::Struct(band_fields) = RasterSchema::band_type() else { + unreachable!("band_type must be Struct") }; - let tampered_band_metadata = StructArray::new( - band_metadata_fields, + let new_bands_struct = + StructArray::new(band_fields, columns, bands_struct.nulls().cloned()); + + let DataType::List(bands_field) = RasterSchema::bands_type() else { + unreachable!("bands_type must be List") + }; + let new_bands_list = ListArray::new( + bands_field, + bands_list.offsets().clone(), + Arc::new(new_bands_struct), + bands_list.nulls().cloned(), + ); + + let mut top_columns: Vec = array.columns().to_vec(); + top_columns[raster_indices::BANDS] = Arc::new(new_bands_list); + let raster_fields = RasterSchema::fields(); + StructArray::new( + Fields::from(raster_fields.to_vec()), + top_columns, + array.nulls().cloned(), + ) + } + + /// Rebuild the band view list with hand-rolled entries. `entries[i]` + /// supplies all four `(source_axis, start, step, steps)` Int64 values + /// for band-row `i`. `nulls` controls per-row validity bits — `None` + /// means every row is non-null. + fn make_band_view_list( + entries: Vec>, + nulls: Option>, + ) -> ArrayRef { + let mut offsets: Vec = vec![0]; + let mut sa: Vec = vec![]; + let mut start: Vec = vec![]; + let mut step: Vec = vec![]; + let mut steps: Vec = vec![]; + for row in &entries { + for &(a, s, k, n) in row { + sa.push(a); + start.push(s); + step.push(k); + steps.push(n); + } + offsets.push(sa.len() as i32); + } + let view_struct_fields = Fields::from(vec![ + Field::new("source_axis", DataType::Int64, false), + Field::new("start", DataType::Int64, false), + Field::new("step", DataType::Int64, false), + Field::new("steps", DataType::Int64, false), + ]); + let view_struct = StructArray::new( + view_struct_fields, vec![ - orig_band_meta_struct - .column(band_metadata_indices::NODATAVALUE) - .clone(), - Arc::new(UInt32Array::from(vec![99u32])), // invalid storage_type - Arc::new(UInt32Array::from(vec![99u32])), // invalid datatype - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_URL) - .clone(), - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_BAND_ID) - .clone(), + Arc::new(arrow_array::PrimitiveArray::::from(sa)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(start)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(step)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(steps)) as ArrayRef, ], None, ); + let DataType::List(view_field) = RasterSchema::view_type() else { + unreachable!() + }; + let null_buf = nulls.map(NullBuffer::from); + Arc::new(ListArray::new( + view_field, + OffsetBuffer::new(ScalarBuffer::from(offsets)), + Arc::new(view_struct), + null_buf, + )) + } - // Rebuild band struct - let DataType::Struct(band_fields) = RasterSchema::band_type() else { - panic!("Expected struct type for band"); + // ---- Critical #1: malformed view entries ---- + + #[test] + fn band_returns_none_when_view_has_negative_steps() { + // Schema accepts negative Int64 in the steps field, but validate_view + // rejects it. The reader path must surface that as None — never + // hand back a band whose visible_shape would underflow. + let array = build_explicit_view_raster(); + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, -1)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + #[test] + fn band_returns_none_when_view_source_axis_out_of_range() { + let array = build_explicit_view_raster(); + let bad_view = make_band_view_list(vec![vec![(5, 0, 1, 3)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + #[test] + fn band_returns_none_when_view_length_mismatches_source_shape() { + // source_shape has 1 dim but view encodes 2 entries. + let array = build_explicit_view_raster(); + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, 3), (0, 0, 1, 3)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + #[test] + fn band_returns_none_when_view_has_duplicate_source_axis() { + // Need a 2-D source_shape so two entries with source_axis=0 are + // legal in length but illegal as a permutation. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + builder + .start_band_with_view( + None, + &["a", "b"], + &[2, 3], + &[ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + ], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 6]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, 2), (0, 0, 1, 2)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + // ---- Critical #2: bad data_type discriminant ---- + + #[test] + fn band_and_band_data_type_return_none_for_unknown_discriminant() { + let array = build_explicit_view_raster(); + let bad_dtype: ArrayRef = Arc::new(UInt32Array::from(vec![0xFFu32])); + let mutated = replace_band_column(&array, band_indices::DATA_TYPE, bad_dtype); + let rasters = RasterStructArray::new(&mutated); + let r = rasters.get(0).unwrap(); + assert!(r.band(0).is_none()); + assert!(r.band_data_type(0).is_none()); + } + + // empty source_shape + + #[test] + fn band_returns_none_when_source_shape_is_empty() { + let array = build_explicit_view_raster(); + // Replace source_shape with a single empty list row. + let DataType::List(ss_field) = RasterSchema::source_shape_type() else { + unreachable!() }; - let tampered_band_struct = StructArray::new( - band_fields, - vec![Arc::new(tampered_band_metadata), band_data_col], + let empty_source_shape = ListArray::new( + ss_field, + OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 0])), + Arc::new(UInt64Array::from(Vec::::new())), None, ); + let mutated = replace_band_column( + &array, + band_indices::SOURCE_SHAPE, + Arc::new(empty_source_shape), + ); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + // ---- Stride composition overflow guards ---- - // Rebuild bands list - let DataType::List(band_field) = RasterSchema::bands_type() else { - panic!("Expected list type for bands"); + /// Build a band source_shape list with hand-rolled u64 entries so tests + /// can inject values that the builder's writer-side checks would refuse. + fn make_band_source_shape_list(rows: Vec>) -> ArrayRef { + let mut offsets: Vec = vec![0]; + let mut values: Vec = vec![]; + for row in &rows { + values.extend_from_slice(row); + offsets.push(values.len() as i32); + } + let DataType::List(field) = RasterSchema::source_shape_type() else { + unreachable!() }; - let tampered_bands_list = ListArray::new( - band_field, - OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 1])), - Arc::new(tampered_band_struct), + Arc::new(ListArray::new( + field, + OffsetBuffer::new(ScalarBuffer::from(offsets)), + Arc::new(UInt64Array::from(values)), None, + )) + } + + #[test] + fn band_returns_none_when_source_shape_value_exceeds_i64_max() { + // A u64 value > i64::MAX must not silently wrap to a negative i64 + // during stride composition. The 1-D fixture is replaced with a + // 2-D `[1, u64::MAX]`, view shaped to match, with steps=0 on the + // pathological axis so validate_view accepts it. + let array = build_explicit_view_raster(); + let new_source_shape = make_band_source_shape_list(vec![vec![1u64, u64::MAX]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + let new_view = make_band_view_list(vec![vec![(0, 0, 1, 1), (1, 0, 1, 0)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + #[test] + fn band_returns_none_when_source_strides_product_overflows() { + // dtype_size × Π source_shape[j>k] must not silently wrap. With a + // 3-D source_shape of `[1, 1<<32, 1<<32]` the product (1<<32) × + // (1<<32) = 1<<64 overflows i64 in the source-stride build. + let array = build_explicit_view_raster(); + let new_source_shape = + make_band_source_shape_list(vec![vec![1u64, 1u64 << 32, 1u64 << 32]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + // Pad the view to 3 entries; steps=0 on the giant axes keeps + // validate_view's start/last checks out of the casts-from-u64 path. + let new_view = + make_band_view_list(vec![vec![(0, 0, 1, 1), (1, 0, 1, 0), (2, 0, 1, 0)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + #[test] + fn band_returns_none_when_view_step_times_source_stride_overflows() { + // `validate_view` bounds (steps-1)*step + start on the SOURCE axis + // but doesn't bound v.step × cumulative_byte_stride. A view with a + // small visible region but a step large enough to wrap the byte + // stride must be rejected at construction. + // + // Source `[3, 1<<60]`, dtype_size=1 (UInt8) → src_stride[0] = 1<<60. + // View on axis 0 with step=8 makes byte_strides[0] = 8 × (1<<60) = + // 1<<63 which overflows i64. The view itself only walks 1 step on + // that axis so validate_view's (steps-1)*step bound holds. + let array = build_explicit_view_raster(); + let new_source_shape = make_band_source_shape_list(vec![vec![3u64, 1u64 << 60]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + let new_view = make_band_view_list(vec![vec![(0, 0, 8, 1), (1, 0, 1, 1)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); + } + + // ---- Combined-view test: 2-D transpose + axis reversal ---- + + #[test] + fn contiguous_data_transpose_and_reverse_combined() { + // Source is a 3-row × 4-col UInt8 array in C-order: + // 0x00 0x01 0x02 0x03 + // 0x04 0x05 0x06 0x07 + // 0x08 0x09 0x0a 0x0b + // View transposes (new_axis_0 = source x, new_axis_1 = source y) + // and reverses the new outer axis. Expected layout in C-order + // over the visible region: + // x=3: (y=0,1,2) = 03 07 0b + // x=2: = 02 06 0a + // x=1: = 01 05 09 + // x=0: = 00 04 08 + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + // Top-level spatial_dims match the band's visible-axis names so + // finish_raster's dim-name lookup finds equal sizes. + builder + .start_raster_nd(&transform, &["x", "y"], &[4, 3], None) + .unwrap(); + // Source axis 0 = y (size 3, outer in C-order); source axis 1 = x + // (size 4, inner). View transposes them and reverses x. + let view = [ + ViewEntry { + source_axis: 1, + start: 3, + step: -1, + steps: 4, + }, + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view( + None, + &["x", "y"], + &[3, 4], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + ]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let raster = rasters.get(0).unwrap(); + let band = raster.band(0).unwrap(); + let bytes = band.contiguous_data().unwrap(); + assert_eq!( + &*bytes, + &[0x03, 0x07, 0x0b, 0x02, 0x06, 0x0a, 0x01, 0x05, 0x09, 0x00, 0x04, 0x08][..] ); + } - // Rebuild the top-level raster struct - let tampered_raster = StructArray::new( - RasterSchema::fields(), - vec![metadata_col, crs_col, Arc::new(tampered_bands_list)], - None, + // ---- Important #7: direct fast-path tests ---- + + #[test] + fn data_materializes_non_identity_view_into_row_major_bytes() { + // build_explicit_view_raster() writes a 1-D source [0..8) with the + // view (start=1, step=2, steps=3), so the visible bytes are + // [1, 3, 5]. `.data()` on a non-identity view must return those + // materialized bytes, not the raw source column. + let array = build_explicit_view_raster(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.data(), &[1u8, 3, 5]); + } + + #[test] + fn data_returns_same_slice_across_repeated_calls() { + // The OnceCell-backed materialization must hand back the same + // borrowed slice on every call (proves we're not re-materializing). + let array = build_explicit_view_raster(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + let first = band.data().as_ptr(); + let second = band.data().as_ptr(); + assert_eq!(first, second); + } + + #[test] + fn data_and_contiguous_data_agree_on_non_identity_view() { + // `.data()` and `.contiguous_data()` must produce identical bytes + // for a non-identity view — the two share the strided-copy helper. + let array = build_explicit_view_raster(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + let from_data = band.data().to_vec(); + let from_contiguous = band.contiguous_data().unwrap().into_owned(); + assert_eq!(from_data, from_contiguous); + } + + #[test] + fn raster_ref_fast_paths_return_expected_values() { + // Single 2-band raster: band 0 has explicit values for nodata, + // outdb_uri, outdb_format; band 1 has all-nullable fields null. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band_nd( + Some("a"), + &["y", "x"], + &[2, 3], + BandDataType::UInt16, + Some(&[0xFFu8, 0xFE]), + Some("s3://bucket/a.tif"), + Some("GTiff"), + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 12]); + builder.finish_band().unwrap(); + builder + .start_band_nd( + Some("b"), + &["y", "x"], + &[2, 3], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 24]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + // Bounds: out-of-range indices yield None on every fast path. + assert!(r.band_data_type(2).is_none()); + assert!(r.band_outdb_uri(2).is_none()); + assert!(r.band_outdb_format(2).is_none()); + assert!(r.band_nodata(2).is_none()); + + // Band 0 — non-null values. + assert_eq!(r.band_data_type(0), Some(BandDataType::UInt16)); + assert_eq!(r.band_outdb_uri(0), Some("s3://bucket/a.tif")); + assert_eq!(r.band_outdb_format(0), Some("GTiff")); + assert_eq!(r.band_nodata(0), Some(&[0xFFu8, 0xFE][..])); + + // Band 1 — null fields. + assert_eq!(r.band_data_type(1), Some(BandDataType::Float32)); + assert!(r.band_outdb_uri(1).is_none()); + assert!(r.band_outdb_format(1).is_none()); + assert!(r.band_nodata(1).is_none()); + + // Cross-check against the BandRef slow path. + let band0 = r.band(0).unwrap(); + assert_eq!(band0.data_type(), BandDataType::UInt16); + assert_eq!(band0.outdb_uri(), Some("s3://bucket/a.tif")); + assert_eq!(band0.outdb_format(), Some("GTiff")); + assert_eq!(band0.nodata(), Some(&[0xFFu8, 0xFE][..])); + + // bands() view: 1-based band(N), len, is_empty, iter — same shape as + // pre-N-D callers expect. Exercise via the concrete type and via a + // `&dyn RasterRef` to confirm both dispatch paths work. + let bands = r.bands(); + assert_eq!(bands.len(), 2); + assert!(!bands.is_empty()); + assert_eq!(bands.band(1).unwrap().data_type(), BandDataType::UInt16); + assert_eq!(bands.band(2).unwrap().data_type(), BandDataType::Float32); + assert!(bands.band(0).is_err()); // 0 is invalid (1-based) + assert!(bands.band(3).is_err()); // out of range + assert_eq!(bands.iter().count(), 2); + let dyn_r: &dyn RasterRef = &r; + assert_eq!(dyn_r.bands().len(), 2); + + // metadata() shim: concrete RasterMetadata/BandMetadata values. + let m = r.metadata(); + assert_eq!(m.width(), 3); + assert_eq!(m.height(), 2); + assert_eq!(m.upper_left_x(), 0.0); + assert_eq!(m.scale_x(), 1.0); + let b0 = r.band(0).unwrap(); + let bm0 = b0.metadata(); + assert_eq!(bm0.data_type().unwrap(), BandDataType::UInt16); + assert_eq!( + bm0.storage_type().unwrap(), + sedona_schema::raster::StorageType::InDb ); + assert_eq!(bm0.nodata_value(), Some(&[0xFFu8, 0xFE][..])); + // Band 0 is InDb (has bytes), so outdb_* are hidden via the shim + // even though the row carries an outdb_uri hint. + assert!(bm0.outdb_url().is_none()); + assert!(bm0.outdb_band_id().is_none()); + } - // Read back and verify that data_type() and storage_type() return Err - let rasters = RasterStructArray::new(&tampered_raster); - let raster = rasters.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - let band_meta = band.metadata(); + // ---- Important #9: multi-band, multi-raster mixed identity/explicit ---- - let storage_err = band_meta.storage_type().unwrap_err(); - assert!(storage_err.to_string().contains("Unknown storage type: 99")); + #[test] + fn multi_raster_mixed_identity_and_explicit_views() { + // Two rasters. Raster 0 has 3 bands (identity, explicit slice, + // identity). Raster 1 has 2 bands (explicit broadcast, identity). + // bands_list.value_offsets() must correctly route each band. + let mut builder = RasterBuilder::new(2); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + + // Raster 0 + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band_nd(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![10u8, 20, 30]); + builder.finish_band().unwrap(); + builder + .start_band_with_view( + None, + &["x"], + &[8], + &[ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder + .start_band_nd(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![100u8, 101, 102]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let data_type_err = band_meta.data_type().unwrap_err(); - assert!(data_type_err - .to_string() - .contains("Unknown band data type: 99")); + // Raster 1 + builder + .start_raster_nd(&transform, &["x"], &[4], None) + .unwrap(); + builder + .start_band_with_view( + None, + &["x"], + &[1], + &[ViewEntry { + source_axis: 0, + start: 0, + step: 0, + steps: 4, + }], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![42u8]); + builder.finish_band().unwrap(); + builder + .start_band_nd(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![1u8, 2, 3, 4]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + + // Raster 0 bands: identity (3), slice (3), identity (3). + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.num_bands(), 3); + assert_eq!(r0.band(0).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(0).unwrap().contiguous_data().unwrap(), + &[10u8, 20, 30] + ); + assert_eq!(r0.band(1).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(1).unwrap().contiguous_data().unwrap(), + &[1u8, 3, 5] + ); + assert_eq!(r0.band(2).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(2).unwrap().contiguous_data().unwrap(), + &[100u8, 101, 102] + ); + + // Raster 1 bands: broadcast (4 copies of 42), identity (4). + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.num_bands(), 2); + assert_eq!(r1.band(0).unwrap().shape(), &[4]); + assert_eq!( + &*r1.band(0).unwrap().contiguous_data().unwrap(), + &[42u8, 42, 42, 42] + ); + assert_eq!(r1.band(1).unwrap().shape(), &[4]); + assert_eq!( + &*r1.band(1).unwrap().contiguous_data().unwrap(), + &[1u8, 2, 3, 4] + ); + + // Fast paths must honour the same offsets. + assert_eq!(r0.band_data_type(1), Some(BandDataType::UInt8)); + assert_eq!(r1.band_data_type(0), Some(BandDataType::UInt8)); + assert_eq!(r1.band_data_type(1), Some(BandDataType::UInt8)); + } + + // null raster row, fast path + + #[test] + fn null_raster_row_fast_paths_return_none_after_non_null() { + // A non-null raster precedes the null one, so the underlying flat + // band arrays are non-empty. A naive fast path that forgets the + // bands_list.value_offsets() routing would return *raster 0's* + // band 0 metadata when asked for raster 1's band 0 — a real bug + // that a single-null-raster fixture cannot detect. + let mut builder = RasterBuilder::new(2); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band_nd( + Some("a"), + &["x"], + &[3], + BandDataType::UInt16, + Some(&[0xFFu8, 0xFE]), + Some("s3://bucket/a.tif"), + Some("GTiff"), + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 6]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.append_null().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + + // Sanity: raster 0 still resolves correctly. + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.band_data_type(0), Some(BandDataType::UInt16)); + assert_eq!(r0.band_outdb_uri(0), Some("s3://bucket/a.tif")); + + // Raster 1 is null with zero bands. Every per-band lookup is + // out of range and must return None even though the flat + // underlying arrays still hold raster 0's data. + assert!(rasters.is_null(1)); + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.num_bands(), 0); + assert!(r1.band(0).is_none()); + assert!(r1.band_data_type(0).is_none()); + assert!(r1.band_outdb_uri(0).is_none()); + assert!(r1.band_outdb_format(0).is_none()); + assert!(r1.band_nodata(0).is_none()); + } + + // ---- Fast-path / band(i) divergence on a corrupt view ---- + + #[test] + fn fast_paths_return_columnar_values_when_band_is_corrupt() { + // band(i) goes through validate_view and returns None for a + // malformed view; the columnar fast paths read their fields + // directly without consulting the view at all. Pin down that + // contract so a future reader doesn't accidentally couple them + // (or "fix" the divergence in either direction without us + // noticing). Also catches a regression where a fast path would + // panic instead of returning the underlying value. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band_with_view( + Some("a"), + &["x"], + &[8], + &[ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }], + BandDataType::UInt32, + Some(&[0u8, 0, 0, 0]), + Some("s3://bucket/a.tif"), + Some("GTiff"), + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 32]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, -1)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + let r = rasters.get(0).unwrap(); + + // band(i) rejects on validate_view. + assert!(r.band(0).is_none()); + + // Fast paths still surface the underlying columnar values — + // they don't validate the view, by design. Locking that in. + assert_eq!(r.band_data_type(0), Some(BandDataType::UInt32)); + assert_eq!(r.band_outdb_uri(0), Some("s3://bucket/a.tif")); + assert_eq!(r.band_outdb_format(0), Some("GTiff")); + assert_eq!(r.band_nodata(0), Some(&[0u8, 0, 0, 0][..])); } } diff --git a/rust/sedona-raster/src/builder.rs b/rust/sedona-raster/src/builder.rs index 3db236cb4..369ddfff1 100644 --- a/rust/sedona-raster/src/builder.rs +++ b/rust/sedona-raster/src/builder.rs @@ -17,18 +17,18 @@ use arrow_array::{ builder::{ - BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, StringBuilder, - StringViewBuilder, UInt32Builder, UInt64Builder, + ArrayBuilder, BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, + Int64Builder, StringBuilder, StringViewBuilder, UInt32Builder, UInt64Builder, }, Array, ArrayRef, ListArray, StructArray, }; -use arrow_buffer::{OffsetBuffer, ScalarBuffer}; +use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow_schema::{ArrowError, DataType}; use std::sync::Arc; -use sedona_schema::raster::RasterSchema; +use sedona_schema::raster::{BandDataType, RasterSchema}; -use crate::traits::{BandMetadata, MetadataRef}; +use crate::traits::{validate_view, visible_shape_from_view, BandMetadata, MetadataRef, ViewEntry}; /// Builder for constructing raster arrays with zero-copy band data writing /// @@ -75,267 +75,700 @@ use crate::traits::{BandMetadata, MetadataRef}; /// // Finish the raster /// builder.finish_raster().unwrap(); /// -/// // Finish building and get the StructArray +/// // Get the final StructArray /// let raster_array = builder.finish().unwrap(); /// ``` pub struct RasterBuilder { - // Metadata fields - width: UInt64Builder, - height: UInt64Builder, - upper_left_x: Float64Builder, - upper_left_y: Float64Builder, - scale_x: Float64Builder, - scale_y: Float64Builder, - skew_x: Float64Builder, - skew_y: Float64Builder, - - // CRS field + // Top-level raster fields crs: StringViewBuilder, - - // Band metadata fields - band_nodata: BinaryBuilder, - band_storage_type: UInt32Builder, + transform_values: Float64Builder, + transform_offsets: Vec, + spatial_dims_values: StringViewBuilder, + spatial_dims_offsets: Vec, + spatial_shape_values: Int64Builder, + spatial_shape_offsets: Vec, + + // Band fields (flattened across all bands) + band_name: StringBuilder, + band_dim_names_values: StringBuilder, + band_dim_names_offsets: Vec, + band_shape_values: UInt64Builder, + band_shape_offsets: Vec, band_datatype: UInt32Builder, - band_outdb_url: StringBuilder, - band_outdb_band_id: UInt32Builder, - - // Band data field + band_nodata: BinaryBuilder, + // VIEW field — one entry per visible dimension per band. Stored as four + // parallel Int64 columns + a List offset vector; assembled into a + // `ListArray>` in `finish()`. + band_view_source_axis_values: Int64Builder, + band_view_start_values: Int64Builder, + band_view_step_values: Int64Builder, + band_view_steps_values: Int64Builder, + band_view_offsets: Vec, + // Per-band validity for the view list. `false` means the row is null — + // the canonical representation of an identity view. `true` means the row + // carries an explicit view in the four parallel value builders. + band_view_validity: Vec, + band_outdb_uri: StringBuilder, + band_outdb_format: StringViewBuilder, band_data: BinaryViewBuilder, // List structure tracking band_offsets: Vec, // Track where each raster's bands start/end current_band_count: i32, // Track bands in current raster - raster_validity: BooleanBuilder, // Track which rasters are null + // Current raster state (needed for start_band_2d) + current_width: u64, + current_height: u64, + + // Per-raster validation state: spatial dims/shape and recorded bands so + // finish_raster can check every band matches the top-level spatial grid. + current_spatial_dims: Vec, + current_spatial_shape: Vec, + current_raster_bands: Vec<(Vec, Vec)>, + + // Track band_data count at the start of each band for finish_band validation + band_data_count_at_start: usize, + + raster_validity: BooleanBuilder, } impl RasterBuilder { - /// Create a new raster builder with the specified capacity + /// Create a new raster builder with the specified capacity. pub fn new(capacity: usize) -> Self { Self { - // Metadata builders - width: UInt64Builder::with_capacity(capacity), - height: UInt64Builder::with_capacity(capacity), - upper_left_x: Float64Builder::with_capacity(capacity), - upper_left_y: Float64Builder::with_capacity(capacity), - scale_x: Float64Builder::with_capacity(capacity), - scale_y: Float64Builder::with_capacity(capacity), - skew_x: Float64Builder::with_capacity(capacity), - skew_y: Float64Builder::with_capacity(capacity), - - // CRS builder crs: StringViewBuilder::with_capacity(capacity), - - // Band builders - estimate some bands per raster - // The capacity is at raster level, but each raster has multiple bands and - // are large. We may want to add an optional parameter to control expected - // bands per raster or even band size in the future - band_nodata: BinaryBuilder::with_capacity(capacity, capacity), - band_storage_type: UInt32Builder::with_capacity(capacity), + transform_values: Float64Builder::with_capacity(capacity * 6), + transform_offsets: vec![0], + spatial_dims_values: StringViewBuilder::with_capacity(capacity * 2), + spatial_dims_offsets: vec![0], + spatial_shape_values: Int64Builder::with_capacity(capacity * 2), + spatial_shape_offsets: vec![0], + + band_name: StringBuilder::with_capacity(capacity, capacity), + band_dim_names_values: StringBuilder::with_capacity(capacity * 2, capacity * 4), + band_dim_names_offsets: vec![0], + band_shape_values: UInt64Builder::with_capacity(capacity * 2), + band_shape_offsets: vec![0], band_datatype: UInt32Builder::with_capacity(capacity), - band_outdb_url: StringBuilder::with_capacity(capacity, capacity), - band_outdb_band_id: UInt32Builder::with_capacity(capacity), + band_nodata: BinaryBuilder::with_capacity(capacity, capacity), + band_view_source_axis_values: Int64Builder::with_capacity(capacity * 2), + band_view_start_values: Int64Builder::with_capacity(capacity * 2), + band_view_step_values: Int64Builder::with_capacity(capacity * 2), + band_view_steps_values: Int64Builder::with_capacity(capacity * 2), + band_view_offsets: vec![0], + band_view_validity: Vec::with_capacity(capacity), + band_outdb_uri: StringBuilder::with_capacity(capacity, capacity), + band_outdb_format: StringViewBuilder::with_capacity(capacity), band_data: BinaryViewBuilder::with_capacity(capacity), - // List tracking band_offsets: vec![0], current_band_count: 0, + current_width: 0, + current_height: 0, + + current_spatial_dims: Vec::new(), + current_spatial_shape: Vec::new(), + current_raster_bands: Vec::new(), + + band_data_count_at_start: 0, - // Raster-level validity (keeps track of null rasters) raster_validity: BooleanBuilder::with_capacity(capacity), } } - /// Start a new raster with metadata and optional CRS - pub fn start_raster( + /// Start a new raster with explicit N-D parameters. + /// + /// `transform` must be a 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + /// + /// `spatial_dims` names the raster-level spatial dimensions (today always + /// length 2, e.g. `["x","y"]`). `spatial_shape` gives their sizes in the + /// same order. Every band added to this raster must contain each name in + /// `spatial_dims` within its own `dim_names`, with matching size. + pub fn start_raster_nd( &mut self, - metadata: &dyn MetadataRef, + transform: &[f64; 6], + spatial_dims: &[&str], + spatial_shape: &[i64], crs: Option<&str>, ) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata)?; - self.append_crs(crs)?; + if spatial_dims.len() != spatial_shape.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "spatial_dims.len() ({}) must equal spatial_shape.len() ({})", + spatial_dims.len(), + spatial_shape.len() + ))); + } + + // Transform + for &v in transform { + self.transform_values.append_value(v); + } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); + + // Spatial dims + shape + for d in spatial_dims { + self.spatial_dims_values.append_value(d); + } + let next = *self.spatial_dims_offsets.last().unwrap() + spatial_dims.len() as i32; + self.spatial_dims_offsets.push(next); + + for &s in spatial_shape { + self.spatial_shape_values.append_value(s); + } + let next = *self.spatial_shape_offsets.last().unwrap() + spatial_shape.len() as i32; + self.spatial_shape_offsets.push(next); + + // CRS + match crs { + Some(crs_data) => self.crs.append_value(crs_data), + None => self.crs.append_null(), + } - // Reset band count for this raster self.current_band_count = 0; + self.current_spatial_dims = spatial_dims.iter().map(|s| s.to_string()).collect(); + self.current_spatial_shape = spatial_shape.to_vec(); + self.current_raster_bands.clear(); + // Preserve legacy current_width/current_height for start_band_2d (set + // by start_raster_2d). Callers using this direct entry point drive + // their own shapes via start_band_nd. + self.current_width = 0; + self.current_height = 0; + + Ok(()) + } + /// Convenience: start a 2-D raster with positional geotransform parameters. + /// Sets `spatial_dims=["x","y"]` and `spatial_shape=[width, height]` and + /// builds the 6-element GDAL transform internally. The N-D entry point is + /// [`Self::start_raster_nd`]; the metadata-taking entry is + /// [`Self::start_raster`]. + #[allow(clippy::too_many_arguments)] + pub fn start_raster_2d( + &mut self, + width: u64, + height: u64, + origin_x: f64, + origin_y: f64, + scale_x: f64, + scale_y: f64, + skew_x: f64, + skew_y: f64, + crs: Option<&str>, + ) -> Result<(), ArrowError> { + let transform = [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]; + self.start_raster_nd(&transform, &["x", "y"], &[width as i64, height as i64], crs)?; + self.current_width = width; + self.current_height = height; Ok(()) } - /// Start a new band - this must be called before writing band data - pub fn start_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - // Append band metadata - match band_metadata.nodata_value { - Some(nodata) => self.band_nodata.append_value(&nodata), + /// Start a 2-D raster from a `&dyn MetadataRef`. Matches the pre-N-D + /// signature so callers from before the refactor keep compiling without + /// changing argument lists. + pub fn start_raster( + &mut self, + metadata: &dyn MetadataRef, + crs: Option<&str>, + ) -> Result<(), ArrowError> { + self.start_raster_2d( + metadata.width(), + metadata.height(), + metadata.upper_left_x(), + metadata.upper_left_y(), + metadata.scale_x(), + metadata.scale_y(), + metadata.skew_x(), + metadata.skew_y(), + crs, + ) + } + + /// Start a new band with explicit N-D parameters. + /// + /// `outdb_uri` is the *location* of the external resource (scheme is + /// resolved by an `ObjectStoreRegistry`). `outdb_format` is the *format* + /// used to interpret the bytes at that location (e.g. `"geotiff"`, + /// `"zarr"`). A null `outdb_format` means the band is in-memory — the + /// band's `data` buffer is authoritative. + #[allow(clippy::too_many_arguments)] + pub fn start_band_nd( + &mut self, + name: Option<&str>, + dim_names: &[&str], + shape: &[u64], + data_type: BandDataType, + nodata: Option<&[u8]>, + outdb_uri: Option<&str>, + outdb_format: Option<&str>, + ) -> Result<(), ArrowError> { + if dim_names.is_empty() { + return Err(ArrowError::InvalidArgumentError( + "start_band_nd: 0-dimensional bands are not supported".into(), + )); + } + if dim_names.len() != shape.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "start_band_nd: dim_names ({}) and shape ({}) must have the same length", + dim_names.len(), + shape.len(), + ))); + } + // Name + match name { + Some(n) => self.band_name.append_value(n), + None => self.band_name.append_null(), + } + + // Dim names + for dn in dim_names { + self.band_dim_names_values.append_value(dn); + } + let next = *self.band_dim_names_offsets.last().unwrap() + dim_names.len() as i32; + self.band_dim_names_offsets.push(next); + + // Shape + for &s in shape { + self.band_shape_values.append_value(s); + } + let next = *self.band_shape_offsets.last().unwrap() + shape.len() as i32; + self.band_shape_offsets.push(next); + + // Data type + self.band_datatype.append_value(data_type as u32); + + // Nodata + match nodata { + Some(nodata_bytes) => self.band_nodata.append_value(nodata_bytes), None => self.band_nodata.append_null(), } - self.band_storage_type - .append_value(band_metadata.storage_type as u32); - self.band_datatype - .append_value(band_metadata.datatype as u32); + // VIEW: canonical identity is encoded as a null list entry — no + // values appended, offset unchanged, validity bit cleared. + let next = *self.band_view_offsets.last().unwrap(); + self.band_view_offsets.push(next); + self.band_view_validity.push(false); + + // OutDb URI + match outdb_uri { + Some(uri) => self.band_outdb_uri.append_value(uri), + None => self.band_outdb_uri.append_null(), + } + + // OutDb format + match outdb_format { + Some(format) => self.band_outdb_format.append_value(format), + None => self.band_outdb_format.append_null(), + } + + self.current_band_count += 1; + self.band_data_count_at_start = self.band_data.len(); + + // Record this band's dims/shape for strict validation at finish_raster. + self.current_raster_bands.push(( + dim_names.iter().map(|s| s.to_string()).collect(), + shape.to_vec(), + )); + + Ok(()) + } + + /// Start a band with an explicit non-identity view over `source_shape`. + /// + /// Each `ViewEntry` describes one *visible* axis in `dim_names` order: + /// `(source_axis, start, step, steps)`. Validates that: + /// - `dim_names`, `source_shape`, and `view` have equal length. + /// - Across `view`, `source_axis` values form a permutation of + /// `0..ndim` (no axis duplicated, none missing). + /// - For each entry with `steps > 0`: `start` and (when `step != 0`) + /// `start + (steps - 1) * step` are in `[0, source_shape[source_axis])`. + /// - `steps >= 0`. + /// + /// On success, the band's `view` field is written verbatim and its + /// `source_shape` is written from `source_shape`. The visible shape + /// (== `[v.steps for v in view]`) is what `finish_raster` will compare + /// against `spatial_shape`. + #[allow(clippy::too_many_arguments)] + pub fn start_band_with_view( + &mut self, + name: Option<&str>, + dim_names: &[&str], + source_shape: &[u64], + view: &[ViewEntry], + data_type: BandDataType, + nodata: Option<&[u8]>, + outdb_uri: Option<&str>, + outdb_format: Option<&str>, + ) -> Result<(), ArrowError> { + let ndim = dim_names.len(); + if ndim == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band_with_view: 0-dimensional bands are not supported".into(), + )); + } + if source_shape.len() != ndim || view.len() != ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "start_band_with_view: dim_names ({}), source_shape ({}), and view ({}) \ + must all have the same length", + ndim, + source_shape.len(), + view.len() + ))); + } + + validate_view(view, source_shape)?; + + // Write fields. + match name { + Some(n) => self.band_name.append_value(n), + None => self.band_name.append_null(), + } + + for dn in dim_names { + self.band_dim_names_values.append_value(dn); + } + let next = *self.band_dim_names_offsets.last().unwrap() + ndim as i32; + self.band_dim_names_offsets.push(next); + + for &s in source_shape { + self.band_shape_values.append_value(s); + } + let next = *self.band_shape_offsets.last().unwrap() + ndim as i32; + self.band_shape_offsets.push(next); + + self.band_datatype.append_value(data_type as u32); + + match nodata { + Some(b) => self.band_nodata.append_value(b), + None => self.band_nodata.append_null(), + } - match band_metadata.outdb_url { - Some(url) => self.band_outdb_url.append_value(&url), - None => self.band_outdb_url.append_null(), + for v in view { + self.band_view_source_axis_values + .append_value(v.source_axis); + self.band_view_start_values.append_value(v.start); + self.band_view_step_values.append_value(v.step); + self.band_view_steps_values.append_value(v.steps); } + let next = *self.band_view_offsets.last().unwrap() + ndim as i32; + self.band_view_offsets.push(next); + self.band_view_validity.push(true); - match band_metadata.outdb_band_id { - Some(band_id) => self.band_outdb_band_id.append_value(band_id), - None => self.band_outdb_band_id.append_null(), + match outdb_uri { + Some(uri) => self.band_outdb_uri.append_value(uri), + None => self.band_outdb_uri.append_null(), + } + match outdb_format { + Some(format) => self.band_outdb_format.append_value(format), + None => self.band_outdb_format.append_null(), } self.current_band_count += 1; + self.band_data_count_at_start = self.band_data.len(); + + // finish_raster compares visible shape against spatial_shape. + self.current_raster_bands.push(( + dim_names.iter().map(|s| s.to_string()).collect(), + visible_shape_from_view(view), + )); Ok(()) } - /// Get direct access to the BinaryViewBuilder for writing the current band's data - /// Must be called after start_band() to write data to the current band + /// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`. + /// + /// Must be called after `start_raster_2d` / `start_raster_2d` which sets + /// the current width/height. + pub fn start_band_2d( + &mut self, + data_type: BandDataType, + nodata: Option<&[u8]>, + ) -> Result<(), ArrowError> { + if self.current_width == 0 && self.current_height == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band_2d requires prior start_raster_2d (width and height are 0)".into(), + )); + } + self.start_band_nd( + None, + &["y", "x"], + &[self.current_height, self.current_width], + data_type, + nodata, + None, + None, + ) + } + + /// Start a 2-D band from a concrete [`BandMetadata`] struct. Matches + /// the pre-N-D signature so callers from before the refactor keep + /// compiling. For OutDb bands the `outdb_url` + `outdb_band_id` are + /// recombined into the SedonaDB `#band=N` URI convention. + pub fn start_band(&mut self, metadata: BandMetadata) -> Result<(), ArrowError> { + if self.current_width == 0 && self.current_height == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band requires prior start_raster / start_raster_2d (width and height are 0)" + .into(), + )); + } + let outdb_uri = match (metadata.outdb_url.as_deref(), metadata.outdb_band_id) { + (Some(url), Some(band_id)) => Some(format!("{url}#band={band_id}")), + (Some(url), None) => Some(url.to_string()), + _ => None, + }; + self.start_band_nd( + None, + &["y", "x"], + &[self.current_height, self.current_width], + metadata.datatype, + metadata.nodata_value.as_deref(), + outdb_uri.as_deref(), + None, + ) + } + + /// Get direct access to the BinaryViewBuilder for writing the current band's data. pub fn band_data_writer(&mut self) -> &mut BinaryViewBuilder { &mut self.band_data } - /// Finish writing the current band + /// Finish writing the current band. + /// + /// Validates that exactly one data value was appended since `start_band_nd()`. pub fn finish_band(&mut self) -> Result<(), ArrowError> { - // Band data should already be written via band_data_writer - // Nothing additional needed here since we're building flat + let current_count = self.band_data.len(); + if current_count != self.band_data_count_at_start + 1 { + return Err(ArrowError::InvalidArgumentError( + format!( + "Expected exactly one band data value per band, but got {} appended since start_band_nd()", + current_count - self.band_data_count_at_start + ), + )); + } Ok(()) } - /// Finish all bands for the current raster + /// Finish all bands for the current raster. + /// + /// Strictly validates every band added since `start_raster_nd`: each name in + /// the top-level `spatial_dims` must appear in the band's own `dim_names` + /// with a size matching the corresponding entry in `spatial_shape`. pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - // Record the end offset for this raster's bands + for (band_idx, (band_dims, band_shape)) in self.current_raster_bands.iter().enumerate() { + for (spatial_idx, spatial_dim) in self.current_spatial_dims.iter().enumerate() { + let pos = band_dims + .iter() + .position(|d| d == spatial_dim) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Band {band_idx} is missing spatial dimension {spatial_dim:?} \ + (band dim_names = {band_dims:?})" + )) + })?; + let expected = self.current_spatial_shape[spatial_idx]; + let actual = band_shape[pos] as i64; + if actual != expected { + return Err(ArrowError::InvalidArgumentError(format!( + "Band {band_idx} dimension {spatial_dim:?} has size {actual}, \ + expected {expected} from top-level spatial_shape" + ))); + } + } + } + let next_offset = self.band_offsets.last().unwrap() + self.current_band_count; self.band_offsets.push(next_offset); - self.raster_validity.append_value(true); - - Ok(()) - } - - /// Append raster metadata from a MetadataRef trait object - fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - self.width.append_value(metadata.width()); - self.height.append_value(metadata.height()); - self.upper_left_x.append_value(metadata.upper_left_x()); - self.upper_left_y.append_value(metadata.upper_left_y()); - self.scale_x.append_value(metadata.scale_x()); - self.scale_y.append_value(metadata.scale_y()); - self.skew_x.append_value(metadata.skew_x()); - self.skew_y.append_value(metadata.skew_y()); - + self.current_raster_bands.clear(); + self.current_spatial_dims.clear(); + self.current_spatial_shape.clear(); Ok(()) } - /// Set the CRS for the current raster - pub fn append_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - match crs { - Some(crs_data) => self.crs.append_value(crs_data), - None => self.crs.append_null(), + /// Append a null raster. + pub fn append_null(&mut self) -> Result<(), ArrowError> { + // Transform: append 6 zeros + for _ in 0..6 { + self.transform_values.append_value(0.0); } - Ok(()) - } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); - /// Append a null raster - pub fn append_null(&mut self) -> Result<(), ArrowError> { - // Since metadata fields are non-nullable, provide default values - self.width.append_value(0u64); - self.height.append_value(0u64); - self.upper_left_x.append_value(0.0f64); - self.upper_left_y.append_value(0.0f64); - self.scale_x.append_value(0.0f64); - self.scale_y.append_value(0.0f64); - self.skew_x.append_value(0.0f64); - self.skew_y.append_value(0.0f64); - - // Append null CRS + // Spatial dims + shape: empty list for null rasters. + let next = *self.spatial_dims_offsets.last().unwrap(); + self.spatial_dims_offsets.push(next); + let next = *self.spatial_shape_offsets.last().unwrap(); + self.spatial_shape_offsets.push(next); + + // CRS: null self.crs.append_null(); - // No bands for null raster + // No bands let current_offset = *self.band_offsets.last().unwrap(); self.band_offsets.push(current_offset); - // Mark raster as null + // Mark null self.raster_validity.append_null(); Ok(()) } - /// Finish building and return the constructed StructArray + /// Finish building and return the constructed StructArray. pub fn finish(mut self) -> Result { - // Build the metadata struct using the schema - let metadata_fields = if let DataType::Struct(fields) = RasterSchema::metadata_type() { - fields - } else { + // Build transform list + let transform_values = self.transform_values.finish(); + let transform_offsets = OffsetBuffer::new(ScalarBuffer::from(self.transform_offsets)); + let DataType::List(transform_field) = RasterSchema::transform_type() else { return Err(ArrowError::SchemaError( - "Expected struct type for metadata".to_string(), + "Expected list type for transform".to_string(), )); }; + let transform_list = ListArray::new( + transform_field, + transform_offsets, + Arc::new(transform_values), + None, + ); - let metadata_arrays: Vec = vec![ - Arc::new(self.width.finish()), - Arc::new(self.height.finish()), - Arc::new(self.upper_left_x.finish()), - Arc::new(self.upper_left_y.finish()), - Arc::new(self.scale_x.finish()), - Arc::new(self.scale_y.finish()), - Arc::new(self.skew_x.finish()), - Arc::new(self.skew_y.finish()), - ]; - let metadata_array = StructArray::new(metadata_fields, metadata_arrays, None); - - // Build the band metadata struct using the schema - let band_metadata_fields = - if let DataType::Struct(fields) = RasterSchema::band_metadata_type() { - fields - } else { - return Err(ArrowError::SchemaError( - "Expected struct type for band metadata".to_string(), - )); - }; + // Build spatial_dims list + let spatial_dims_values = self.spatial_dims_values.finish(); + let spatial_dims_offsets = OffsetBuffer::new(ScalarBuffer::from(self.spatial_dims_offsets)); + let DataType::List(spatial_dims_field) = RasterSchema::spatial_dims_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for spatial_dims".to_string(), + )); + }; + let spatial_dims_list = ListArray::new( + spatial_dims_field, + spatial_dims_offsets, + Arc::new(spatial_dims_values), + None, + ); - let band_metadata_arrays: Vec = vec![ - Arc::new(self.band_nodata.finish()), - Arc::new(self.band_storage_type.finish()), - Arc::new(self.band_datatype.finish()), - Arc::new(self.band_outdb_url.finish()), - Arc::new(self.band_outdb_band_id.finish()), - ]; - let band_metadata_array = - StructArray::new(band_metadata_fields, band_metadata_arrays, None); + // Build spatial_shape list + let spatial_shape_values = self.spatial_shape_values.finish(); + let spatial_shape_offsets = + OffsetBuffer::new(ScalarBuffer::from(self.spatial_shape_offsets)); + let DataType::List(spatial_shape_field) = RasterSchema::spatial_shape_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for spatial_shape".to_string(), + )); + }; + let spatial_shape_list = ListArray::new( + spatial_shape_field, + spatial_shape_offsets, + Arc::new(spatial_shape_values), + None, + ); + + // Build band dim_names nested list + let dim_names_values = self.band_dim_names_values.finish(); + let dim_names_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_dim_names_offsets)); + let DataType::List(dim_names_field) = RasterSchema::dim_names_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for dim_names".to_string(), + )); + }; + let dim_names_list = ListArray::new( + dim_names_field, + dim_names_offsets, + Arc::new(dim_names_values), + None, + ); + + // Build band source_shape nested list + let source_shape_values = self.band_shape_values.finish(); + let source_shape_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_shape_offsets)); + let DataType::List(source_shape_field) = RasterSchema::source_shape_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for source_shape".to_string(), + )); + }; + let source_shape_list = ListArray::new( + source_shape_field, + source_shape_offsets, + Arc::new(source_shape_values), + None, + ); - // Build the band struct using the schema - let band_fields = if let DataType::Struct(fields) = RasterSchema::band_type() { - fields + // Build band view nested list (List>). + let view_source_axis = self.band_view_source_axis_values.finish(); + let view_start = self.band_view_start_values.finish(); + let view_step = self.band_view_step_values.finish(); + let view_steps = self.band_view_steps_values.finish(); + let view_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_view_offsets)); + let DataType::List(view_list_field) = RasterSchema::view_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for view".to_string(), + )); + }; + let DataType::Struct(view_struct_fields) = view_list_field.data_type().clone() else { + return Err(ArrowError::SchemaError( + "Expected struct type inside view list".to_string(), + )); + }; + let view_struct = StructArray::new( + view_struct_fields, + vec![ + Arc::new(view_source_axis) as ArrayRef, + Arc::new(view_start) as ArrayRef, + Arc::new(view_step) as ArrayRef, + Arc::new(view_steps) as ArrayRef, + ], + None, + ); + let view_nulls = if self.band_view_validity.iter().all(|&b| b) { + None } else { + Some(NullBuffer::from_iter( + self.band_view_validity.iter().copied(), + )) + }; + let view_list = ListArray::new( + view_list_field, + view_offsets, + Arc::new(view_struct), + view_nulls, + ); + + // Build band struct + let DataType::Struct(band_fields) = RasterSchema::band_type() else { return Err(ArrowError::SchemaError( "Expected struct type for band".to_string(), )); }; let band_arrays: Vec = vec![ - Arc::new(band_metadata_array), + Arc::new(self.band_name.finish()), + Arc::new(dim_names_list), + Arc::new(source_shape_list), + Arc::new(self.band_datatype.finish()), + Arc::new(self.band_nodata.finish()), + Arc::new(view_list), + Arc::new(self.band_outdb_uri.finish()), + Arc::new(self.band_outdb_format.finish()), Arc::new(self.band_data.finish()), ]; - let band_struct_array = StructArray::new(band_fields, band_arrays, None); + let band_struct = StructArray::new(band_fields, band_arrays, None); - // Build the bands list array using the schema - let band_field = if let DataType::List(field) = RasterSchema::bands_type() { - field - } else { + // Build bands list + let DataType::List(bands_field) = RasterSchema::bands_type() else { return Err(ArrowError::SchemaError( "Expected list type for bands".to_string(), )); }; + let band_list_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); + let bands_list = + ListArray::new(bands_field, band_list_offsets, Arc::new(band_struct), None); - let offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); - let bands_list = ListArray::new(band_field, offsets, Arc::new(band_struct_array), None); - - // Build the final raster struct using the schema + // Build top-level raster struct let raster_fields = RasterSchema::fields(); let raster_arrays: Vec = vec![ - Arc::new(metadata_array), Arc::new(self.crs.finish()), + Arc::new(transform_list), + Arc::new(spatial_dims_list), + Arc::new(spatial_shape_list), Arc::new(bands_list), ]; @@ -351,7 +784,13 @@ mod tests { use super::*; use crate::array::RasterStructArray; use crate::traits::{RasterMetadata, RasterRef}; - use sedona_schema::raster::{BandDataType, StorageType}; + use arrow_array::RecordBatch; + use arrow_ipc::reader::StreamReader; + use arrow_ipc::writer::StreamWriter; + use arrow_schema::Schema; + use sedona_schema::raster::StorageType; + use std::borrow::Cow; + use std::io::Cursor; #[test] fn test_iterator_basic_functionality() { @@ -533,7 +972,7 @@ mod tests { let source_raster = iterator.get(0).unwrap(); target_builder - .start_raster(source_raster.metadata(), source_raster.crs()) + .start_raster(&source_raster.metadata(), source_raster.crs()) .unwrap(); // Add new band data while preserving original metadata @@ -846,4 +1285,1755 @@ mod tests { let band = result.unwrap(); assert_eq!(band.data().len(), 100); } + + #[test] + fn test_roundtrip_2d_raster() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + 10, + 20, + 100.0, + 200.0, + 1.0, + -2.0, + 0.25, + 0.5, + Some("EPSG:4326"), + ) + .unwrap(); + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value(vec![1u8; 200]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + assert_eq!(rasters.len(), 1); + + let r = rasters.get(0).unwrap(); + assert_eq!(r.width().unwrap(), 10); + assert_eq!(r.height().unwrap(), 20); + assert_eq!(r.transform(), &[100.0, 1.0, 0.25, 200.0, 0.5, -2.0]); + assert_eq!(r.x_dim(), "x"); + assert_eq!(r.y_dim(), "y"); + assert_eq!(r.crs(), Some("EPSG:4326")); + assert_eq!(r.num_bands(), 1); + + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[20, 10]); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata(), Some(&[255u8][..])); + assert_eq!(band.contiguous_data().unwrap().len(), 200); + } + + #[test] + fn test_roundtrip_multi_band() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(2, 2, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + + // Band 0: UInt8 + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value([1u8, 2, 3, 4]); + builder.finish_band().unwrap(); + + // Band 1: Float32 + builder.start_band_2d(BandDataType::Float32, None).unwrap(); + let f32_data: Vec = [1.5f32, 2.5, 3.5, 4.5] + .iter() + .flat_map(|v| v.to_le_bytes()) + .collect(); + builder.band_data_writer().append_value(&f32_data); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 2); + + let b0 = r.band(0).unwrap(); + assert_eq!(b0.data_type(), BandDataType::UInt8); + assert_eq!(b0.nodata(), Some(&[255u8][..])); + + let b1 = r.band(1).unwrap(); + assert_eq!(b1.data_type(), BandDataType::Float32); + assert_eq!(b1.nodata(), None); + } + + #[test] + fn test_null_raster() { + let mut builder = RasterBuilder::new(2); + builder + .start_raster_2d(1, 1, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([0u8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + builder.append_null().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + assert_eq!(rasters.len(), 2); + assert!(!rasters.is_null(0)); + assert!(rasters.is_null(1)); + } + + #[test] + fn test_nd_band() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); + + // 3D band: [time=3, y=4, x=5] + builder + .start_band_nd( + Some("temperature"), + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 3 * 4 * 5 * 4]; // 3*4*5 Float32 elements + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.band_name(0), Some("temperature")); + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band.shape(), &[3, 4, 5]); + assert_eq!(band.dim_size("time"), Some(3)); + assert_eq!(band.dim_size("y"), Some(4)); + assert_eq!(band.dim_size("x"), Some(5)); + assert_eq!(band.dim_size("z"), None); + + // Verify strides are standard C-order: [4*5*4, 5*4, 4] = [80, 20, 4] + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[80, 20, 4]); + assert_eq!(buf.offset, 0); + } + + #[test] + fn test_nonstandard_spatial_dim_names() { + // Zarr-style dataset with lat/lon instead of y/x + let mut builder = RasterBuilder::new(1); + let transform = [10.0, 0.01, 0.0, 50.0, 0.0, -0.01]; + builder + .start_raster_nd( + &transform, + &["longitude", "latitude"], + &[360, 180], + Some("EPSG:4326"), + ) + .unwrap(); + builder + .start_band_nd( + Some("sst"), + &["latitude", "longitude"], + &[180, 360], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 180 * 360 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.x_dim(), "longitude"); + assert_eq!(r.y_dim(), "latitude"); + // width = size of "longitude" dim, height = size of "latitude" dim + assert_eq!(r.width().unwrap(), 360); + assert_eq!(r.height().unwrap(), 180); + } + + #[test] + fn test_mixed_dimensionality_bands() { + // One 3D band and one 2D band in the same raster + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[64, 64], None) + .unwrap(); + + // Band 0: 3D [time=12, y=64, x=64] + builder + .start_band_nd( + Some("temperature"), + &["time", "y", "x"], + &[12, 64, 64], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data_3d = vec![0u8; 12 * 64 * 64 * 4]; + builder.band_data_writer().append_value(&data_3d); + builder.finish_band().unwrap(); + + // Band 1: 2D [y=64, x=64] + builder + .start_band_nd( + Some("elevation"), + &["y", "x"], + &[64, 64], + BandDataType::Float64, + None, + None, + None, + ) + .unwrap(); + let data_2d = vec![0u8; 64 * 64 * 8]; + builder.band_data_writer().append_value(&data_2d); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 2); + // width/height derived from band(0) which is 3D + assert_eq!(r.width().unwrap(), 64); + assert_eq!(r.height().unwrap(), 64); + + let b0 = r.band(0).unwrap(); + assert_eq!(b0.ndim(), 3); + assert_eq!(b0.dim_names(), vec!["time", "y", "x"]); + assert_eq!(b0.shape(), &[12, 64, 64]); + assert_eq!(b0.dim_size("time"), Some(12)); + + let b1 = r.band(1).unwrap(); + assert_eq!(b1.ndim(), 2); + assert_eq!(b1.dim_names(), vec!["y", "x"]); + assert_eq!(b1.shape(), &[64, 64]); + assert_eq!(b1.dim_size("time"), None); + } + + #[test] + fn test_dim_index_lookup() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[32, 32], None) + .unwrap(); + builder + .start_band_nd( + None, + &["time", "pressure", "y", "x"], + &[6, 10, 32, 32], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 6 * 10 * 32 * 32 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.dim_index("time"), Some(0)); + assert_eq!(band.dim_index("pressure"), Some(1)); + assert_eq!(band.dim_index("y"), Some(2)); + assert_eq!(band.dim_index("x"), Some(3)); + assert_eq!(band.dim_index("wavelength"), None); + + assert_eq!(band.dim_size("time"), Some(6)); + assert_eq!(band.dim_size("pressure"), Some(10)); + assert_eq!(band.dim_size("wavelength"), None); + } + + #[test] + fn test_contiguous_data_is_borrowed() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([1u8; 16]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let data = band.contiguous_data().unwrap(); + // Identity-view bands are always contiguous, so should be Cow::Borrowed + assert!(matches!(data, Cow::Borrowed(_))); + assert_eq!(data.len(), 16); + } + + #[test] + fn test_nd_buffer_strides_various_types() { + // Each raster exercises a different shape; strict spatial-grid + // validation forbids mixing bands of disagreeing spatial sizes within + // one raster. + let mut builder = RasterBuilder::new(3); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + + // Raster 0 — UInt8: element size = 1, shape [3, 4] → strides [4, 1] + builder + .start_raster_nd(&transform, &["x", "y"], &[4, 3], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[3, 4], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 12]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + // Raster 1 — Float64: element size = 8, shape [2, 3, 5] → strides [120, 40, 8] + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 3], None) + .unwrap(); + builder + .start_band_nd( + None, + &["z", "y", "x"], + &[2, 3, 5], + BandDataType::Float64, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 2 * 3 * 5 * 8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + // Raster 2 — UInt16: element size = 2, shape [10] → strides [2]. + // Only has an "x" dim, so declare spatial_dims=["x"]. + builder + .start_raster_nd(&transform, &["x"], &[10], None) + .unwrap(); + builder + .start_band_nd(None, &["x"], &[10], BandDataType::UInt16, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 20]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + + let r0 = rasters.get(0).unwrap(); + let b0 = r0.band(0).unwrap(); + assert_eq!(b0.nd_buffer().unwrap().strides, &[4, 1]); // UInt8 [3, 4] + + let r1 = rasters.get(1).unwrap(); + let b1 = r1.band(0).unwrap(); + assert_eq!(b1.nd_buffer().unwrap().strides, &[120, 40, 8]); // Float64 [2, 3, 5] + + let r2 = rasters.get(2).unwrap(); + let b2 = r2.band(0).unwrap(); + assert_eq!(b2.nd_buffer().unwrap().strides, &[2]); // UInt16 [10] + } + + #[test] + fn test_width_height_no_bands() { + // Zero-band raster — used as a "target grid" specification (GDAL warp + // pattern). Width/height come from the top-level spatial_shape, not + // band(0). + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[64, 32], None) + .unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 0); + assert_eq!(r.width().unwrap(), 64); + assert_eq!(r.height().unwrap(), 32); + } + + #[test] + fn test_band_name_nullable() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); + + // Named band + builder + .start_band_nd( + Some("temperature"), + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 64]); + builder.finish_band().unwrap(); + + // Unnamed band (via start_band_2d which passes None for name) + builder.current_width = 4; + builder.current_height = 4; + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value(vec![0u8; 16]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.band_name(0), Some("temperature")); + assert_eq!(r.band_name(1), None); // unnamed + assert_eq!(r.band_name(99), None); // out of range + } + + #[test] + fn test_spatial_dims_shape_roundtrip() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["longitude", "latitude"], &[360, 180], None) + .unwrap(); + builder + .start_band_nd( + None, + &["latitude", "longitude"], + &[180, 360], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 360 * 180]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.spatial_dims(), vec!["longitude", "latitude"]); + assert_eq!(r.spatial_shape(), &[360, 180]); + assert_eq!(r.x_dim(), "longitude"); + assert_eq!(r.y_dim(), "latitude"); + assert_eq!(r.width().unwrap(), 360); + assert_eq!(r.height().unwrap(), 180); + } + + #[test] + fn test_zero_band_raster_roundtrip() { + // Zero-band rasters double as "target grid" specifications. They must + // round-trip through the builder cleanly. + let mut builder = RasterBuilder::new(1); + let transform = [10.0, 1.0, 0.0, 20.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[128, 64], Some("EPSG:3857")) + .unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 0); + assert_eq!(r.spatial_dims(), vec!["x", "y"]); + assert_eq!(r.spatial_shape(), &[128, 64]); + assert_eq!(r.width().unwrap(), 128); + assert_eq!(r.height().unwrap(), 64); + assert_eq!(r.crs(), Some("EPSG:3857")); + } + + #[test] + fn test_band_missing_spatial_dim_errors() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); + // Band is missing "y" entirely. + builder + .start_band_nd(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 4]); + builder.finish_band().unwrap(); + + let err = builder.finish_raster().unwrap_err(); + assert!( + err.to_string().contains("missing spatial dimension"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_start_band_with_view_identity_matches_start_band() { + // Identity view through start_band_with_view should produce the same + // visible shape and byte strides as the convenience start_band path. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); + + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 4, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 5, + }, + ]; + builder + .start_band_with_view( + None, + &["y", "x"], + &[4, 5], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 20]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[4, 5]); + assert_eq!(band.raw_source_shape(), &[4, 5]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[5, 1]); + assert_eq!(buf.offset, 0); + } + + #[test] + fn test_view_slice_nd_buffer_and_contiguous_data() { + // 1D source of size 8 (UInt8), view (start=1, step=2, steps=3) selects + // elements at byte offsets 1, 3, 5. Source: 0..8. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.shape(), &[3]); + assert_eq!(band.raw_source_shape(), &[8]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3]); + assert_eq!(buf.strides, &[2]); + assert_eq!(buf.offset, 1); + + // Materialised contiguous bytes should be [1, 3, 5]. + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[1u8, 3, 5]); + assert!(matches!(bytes, std::borrow::Cow::Owned(_))); + } + + #[test] + fn test_view_broadcast() { + // Broadcast: source size 1, step=0 → expose the same byte 4 times. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[4], None) + .unwrap(); + + let view = [ViewEntry { + source_axis: 0, + start: 0, + step: 0, + steps: 4, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[1], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![42u8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[4]); + assert_eq!(buf.strides, &[0]); + assert_eq!(buf.offset, 0); + + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[42u8, 42, 42, 42]); + } + + #[test] + fn test_view_permutation_transpose() { + // 2×3 source (UInt8), values 0..6 in C-order: + // row 0: [0, 1, 2] + // row 1: [3, 4, 5] + // Transposed view exposes axes (cols, rows) → 3×2: + // row 0: [0, 3] + // row 1: [1, 4] + // row 2: [2, 5] + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + // The transposed visible shape on the spatial axes would conflict with + // a 2D spatial grid; declare a single non-spatial dim "i" so the + // strict spatial check is trivially satisfied. + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + + let view = [ + // visible axis 0 reads source axis 1 (cols), full extent 3 + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + // visible axis 1 reads source axis 0 (rows), full extent 2 + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ]; + builder + .start_band_with_view( + None, + &["a", "b"], + &[2, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.shape(), &[3, 2]); + assert_eq!(band.raw_source_shape(), &[2, 3]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[1, 3]); // visible axis 0 → source col stride; visible axis 1 → source row stride + + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[0u8, 3, 1, 4, 2, 5]); + } + + #[test] + fn test_view_empty_axis() { + // steps=0 → empty visible axis. contiguous_data must succeed and + // return an empty buffer. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + + let view = [ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 0, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[0]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[0]); + let bytes = band.contiguous_data().unwrap(); + assert!(bytes.is_empty()); + } + + #[test] + fn test_start_band_rejects_zero_dim() { + // 0-D bands carry no spatial extent and no caller has a use for + // them. start_band_nd must reject an empty dim_names slice eagerly so + // the malformed band never reaches the buffer layer. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let err = builder + .start_band_nd(None, &[], &[], BandDataType::UInt8, None, None, None) + .unwrap_err(); + assert!( + err.to_string().contains("0-dimensional"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_start_band_with_view_rejects_zero_dim() { + // start_band_with_view must apply the same 0-D guard as start_band + // — accepting empty dim_names would otherwise bypass it via the + // explicit-view path. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let err = builder + .start_band_with_view(None, &[], &[], &[], BandDataType::UInt8, None, None, None) + .unwrap_err(); + assert!( + err.to_string().contains("0-dimensional"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_view_validation_rejects_out_of_range_start() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 8, + step: 1, + steps: 1, + }]; + let err = builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap_err(); + assert!( + err.to_string().contains("out of range"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_view_validation_rejects_step_overrun() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + // start=1, step=2, steps=4 → addresses element 1+(4-1)*2 = 7 which is + // out of range for a source size of 7. + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 4, + }]; + let err = builder + .start_band_with_view( + None, + &["x"], + &[7], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap_err(); + assert!( + err.to_string().contains("out of range"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_view_validation_rejects_duplicate_source_axis() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ]; + let err = builder + .start_band_with_view( + None, + &["a", "b"], + &[2, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap_err(); + assert!( + err.to_string().contains("permutation"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_contiguous_data_identity_via_start_band_is_borrowed() { + // Canonical identity: the row's view list is null, and the read path + // synthesises the identity view. Should still hand the underlying + // bytes back without copying. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let pixels: Vec = (0..6).collect(); + builder.band_data_writer().append_value(pixels.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + // Visible shape comes from the synthesised identity view. + assert_eq!(band.shape(), &[2, 3]); + assert_eq!(band.raw_source_shape(), &[2, 3]); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[3, 1]); + assert_eq!(buf.offset, 0); + + let bytes = band.contiguous_data().unwrap(); + assert!(matches!(bytes, Cow::Borrowed(_))); + assert_eq!(&*bytes, pixels.as_slice()); + } + + #[test] + fn test_contiguous_data_explicit_identity_view_is_borrowed() { + // Identity expressed *explicitly* through start_band_with_view must be + // indistinguishable to consumers from the null-row identity above — + // same visible shape, same byte strides, same Cow::Borrowed fast path. + use std::borrow::Cow; + + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view( + None, + &["y", "x"], + &[2, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let pixels: Vec = (0..6).collect(); + builder.band_data_writer().append_value(pixels.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.shape(), &[2, 3]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[3, 1]); + assert_eq!(buf.offset, 0); + + let bytes = band.contiguous_data().unwrap(); + assert!(matches!(bytes, Cow::Borrowed(_))); + assert_eq!(&*bytes, pixels.as_slice()); + } + + #[test] + fn test_contiguous_data_zero_step_broadcast_2d() { + // 2D broadcast: source shape [1, 3], view broadcasts axis 0 four + // times so the visible region is 4×3. Each visible row must equal the + // source's only row. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 0, + steps: 4, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view( + None, + &["row", "col"], + &[1, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![10u8, 20, 30]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[4, 3]); + // Broadcast row stride is 0; column stride is 1 byte per UInt8. + assert_eq!(buf.strides, &[0, 1]); + assert_eq!(buf.offset, 0); + + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[10u8, 20, 30, 10, 20, 30, 10, 20, 30, 10, 20, 30]); + } + + #[test] + fn test_contiguous_data_negative_step_full_reverse() { + // 1D source [0..8] with start=7, step=-1, steps=8 walks the source + // backwards. Byte stride must be negative; offset lands on the last + // source element. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 7, + step: -1, + steps: 8, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[8]); + assert_eq!(buf.strides, &[-1]); + assert_eq!(buf.offset, 7); + + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[7u8, 6, 5, 4, 3, 2, 1, 0]); + } + + #[test] + fn test_contiguous_data_negative_step_strided_reverse() { + // 1D source [0..8] with start=6, step=-2, steps=3 picks every other + // element walking backwards: {6, 4, 2}. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 6, + step: -2, + steps: 3, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3]); + assert_eq!(buf.strides, &[-2]); + assert_eq!(buf.offset, 6); + + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[6u8, 4, 2]); + } + + #[test] + fn test_view_field_is_null_for_identity_band() { + // Schema invariant: identity views are stored as null list rows so + // the canonical "no slice" case costs no Arrow space. Confirm by + // poking the raw column. + use arrow_array::Array; + + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 4]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let bands_list = array + .column(sedona_schema::raster::raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_struct = bands_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let view_list = bands_struct + .column(sedona_schema::raster::band_indices::VIEW) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(view_list.len(), 1); + assert!( + view_list.is_null(0), + "identity-view band should serialise as a null view row" + ); + } + + #[test] + fn test_band_spatial_dim_size_mismatch_errors() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); + // Band has "x" and "y" but x-size disagrees with top-level shape. + builder + .start_band_nd( + None, + &["y", "x"], + &[4, 8], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 32]); + builder.finish_band().unwrap(); + + let err = builder.finish_raster().unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("has size 8") && msg.contains("expected 4"), + "unexpected error: {msg}" + ); + } + + #[test] + fn test_contiguous_data_float32_fast_path() { + // Multi-byte dtype on the contiguous innermost-axis fast path: + // a 2D explicit-identity view over Float32 should still emit + // bytes by `extend_from_slice` and produce the exact source + // payload back. Catches a regression where the fast path + // assumed dtype_size == 1. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + // Slice the outer axis: take rows 0 and 1 of a 3-row source. With + // start=0, step=1, steps=2 over an axis of size 3, the view is + // not identity, so contiguous_data() materialises through the + // fast path. Inner stride = dtype_size = 4 → fast path is taken. + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view( + None, + &["y", "x"], + &[3, 3], // 3x3 source + &view, + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let source: Vec = (0..9).map(|i| i as f32).collect(); + let source_bytes: Vec = source.iter().flat_map(|f| f.to_le_bytes()).collect(); + builder + .band_data_writer() + .append_value(source_bytes.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + // Visible shape is [2, 3]; the first 6 source floats (rows 0,1) are + // exactly the visible pixels — i.e. the first 24 source bytes. + let bytes = band.contiguous_data().unwrap(); + assert!(matches!(bytes, std::borrow::Cow::Owned(_))); + assert_eq!(&*bytes, &source_bytes[0..24]); + } + + #[test] + fn test_contiguous_data_outer_axis_slice_3d() { + // 3D source [T=3, Y=2, X=3] of UInt8. View slices T to T=1..3 + // (start=1, step=1, steps=2), keeps Y and X identity. Innermost + // axis is contiguous (step=1, dtype=1) so the fast path emits 6 + // bytes per outer iteration via extend_from_slice. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 1, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 2, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view( + None, + &["t", "y", "x"], + &[3, 2, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let source: Vec = (0..18).collect(); + builder.band_data_writer().append_value(source.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + // Visible region = source[6..18] (T=1 and T=2 planes). + assert_eq!(band.shape(), &[2, 2, 3]); + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &source[6..18]); + } + + #[test] + fn test_contiguous_data_strided_inner_falls_back() { + // Inner stride != dtype_size forces the elementwise fallback. View + // takes every other column on a 1D UInt16 source. Verifies the + // slow path still emits correct bytes. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 0, + step: 2, + steps: 3, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[6], + &view, + BandDataType::UInt16, + None, + None, + None, + ) + .unwrap(); + let source: Vec = vec![10, 20, 30, 40, 50, 60]; + let source_bytes: Vec = source.iter().flat_map(|v| v.to_le_bytes()).collect(); + builder.band_data_writer().append_value(source_bytes); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let bytes = band.contiguous_data().unwrap(); + let expected: Vec = [10u16, 30, 50] + .iter() + .flat_map(|v| v.to_le_bytes()) + .collect(); + assert_eq!(&*bytes, expected.as_slice()); + } + + #[test] + fn test_nd_buffer_multidim_non_zero_starts() { + // 3D source [T=4, Y=3, X=5], slice T from 1, Y from 1, X identity. + // visible = [3, 2, 5]. byte_offset must equal 1*Y*X + 1*X = 1*15 + 1*5 = 20. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 2], None) + .unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 1, + step: 1, + steps: 3, + }, + ViewEntry { + source_axis: 1, + start: 1, + step: 1, + steps: 2, + }, + ViewEntry { + source_axis: 2, + start: 0, + step: 1, + steps: 5, + }, + ]; + builder + .start_band_with_view( + None, + &["t", "y", "x"], + &[4, 3, 5], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 60]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3, 2, 5]); + assert_eq!(buf.strides, &[15, 5, 1]); + assert_eq!(buf.offset, 20); + } + + #[test] + fn test_nd_buffer_permutation_and_slice_combined() { + // 2D source [Y=4, X=3]. View permutes (visible order [X, Y]) and + // slices Y from 1, step 2, steps 2. Expected: + // visible_shape = [3, 2] + // byte_strides = [step_X * stride_X_src, step_Y * stride_Y_src] + // = [1 * 1, 2 * 3] = [1, 6] + // byte_offset = start_X * stride_X_src + start_Y * stride_Y_src + // = 0 * 1 + 1 * 3 = 3 + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, // X + ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 2, + }, // Y + ]; + builder + .start_band_with_view( + None, + &["x", "y"], + &[4, 3], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value((0u8..12).collect::>()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3, 2]); + assert_eq!(buf.strides, &[1, 6]); + assert_eq!(buf.offset, 3); + + // contiguous_data must walk the permuted+strided view correctly. + // Visible (X, Y) → source row Y is 0..3=[0,1,2], 3..6=[3,4,5], 6..9=[6,7,8], 9..12=[9,10,11]. + // We expose Y rows 1 and 3 (start=1 step=2 steps=2). At visible index + // (x, y), source value = source[(start_Y + y*2) * 3 + x] = source[(1 + y*2)*3 + x]. + // Expected, in C-order with X outer and Y inner: + // (x=0,y=0)=src[3]=3, (x=0,y=1)=src[9]=9, + // (x=1,y=0)=src[4]=4, (x=1,y=1)=src[10]=10, + // (x=2,y=0)=src[5]=5, (x=2,y=1)=src[11]=11 + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[3u8, 9, 4, 10, 5, 11]); + } + + #[test] + fn test_nd_buffer_steps_one_view() { + // Degenerate inner view: pick a single element on each axis. + // visible_shape == [1, 1]; byte_strides retain their per-axis values + // because they're step * source_stride, not skipped on steps==1. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 1, + step: 1, + steps: 1, + }, + ViewEntry { + source_axis: 1, + start: 2, + step: 1, + steps: 1, + }, + ]; + builder + .start_band_with_view( + None, + &["a", "b"], + &[3, 4], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value((0u8..12).collect::>()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[1, 1]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[1, 1]); + // step * source_stride per axis: source_stride_a = 4, source_stride_b = 1. + // A regression that zeroed out strides when steps==1 would still pass + // the shape/offset/data assertions, so pin the strides explicitly. + assert_eq!(buf.strides, &[4, 1]); + // start_a * source_stride_a + start_b * source_stride_b + // = 1 * 4 + 2 * 1 = 6 + assert_eq!(buf.offset, 6); + let bytes = band.contiguous_data().unwrap(); + assert_eq!(&*bytes, &[6u8]); + } + + #[test] + fn test_nd_buffer_multidim_with_zero_axis() { + // visible_shape contains a zero axis somewhere in the middle. + // contiguous_data returns an empty buffer; nd_buffer returns the + // zero-element shape. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 1, + steps: 3, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 0, + }, + ViewEntry { + source_axis: 2, + start: 0, + step: 1, + steps: 5, + }, + ]; + builder + .start_band_with_view( + None, + &["a", "b", "c"], + &[3, 4, 5], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 60]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[3, 0, 5]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3, 0, 5]); + let bytes = band.contiguous_data().unwrap(); + assert!(bytes.is_empty()); + } + + #[test] + fn test_view_null_round_trips_through_arrow_ipc() { + // Schema invariant: a band built via start_band_nd serialises with a + // null view row, and the null must survive an Arrow IPC round-trip. + // If a future change accidentally writes a non-null empty list + // instead, downstream readers (DuckDB, PyArrow, sedona-py) will + // disagree about whether the view is identity. + + let mut builder = RasterBuilder::new(2); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + // Raster 0: identity-view band → null view row. + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 6]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + // Raster 1: explicit non-identity view → non-null view row. + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + builder + .start_band_with_view( + None, + &["x"], + &[8], + &view, + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let schema = Arc::new(Schema::new(vec![Arc::new(arrow_schema::Field::new( + "raster", + array.data_type().clone(), + true, + )) as arrow_schema::FieldRef])); + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array.clone())]).unwrap(); + + let mut buf: Vec = Vec::new(); + { + let mut writer = StreamWriter::try_new(&mut buf, schema.as_ref()).unwrap(); + writer.write(&batch).unwrap(); + writer.finish().unwrap(); + } + + let cursor = Cursor::new(buf); + let reader = StreamReader::try_new(cursor, None).unwrap(); + let batches: Vec<_> = reader.collect::, _>>().unwrap(); + assert_eq!(batches.len(), 1); + let restored_struct = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Reach into the restored bands list and confirm the view list + // preserves null/non-null per row. + let bands_list = restored_struct + .column(sedona_schema::raster::raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_struct = bands_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let view_list = bands_struct + .column(sedona_schema::raster::band_indices::VIEW) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(view_list.len(), 2); + assert!( + view_list.is_null(0), + "identity-view band must remain a null view row after IPC round-trip" + ); + assert!( + !view_list.is_null(1), + "explicit-view band must remain non-null after IPC round-trip" + ); + + // Sanity: read paths still produce the expected visible shapes. + let rasters = RasterStructArray::new(restored_struct); + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.band(0).unwrap().shape(), &[2, 3]); + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.band(0).unwrap().shape(), &[3]); + } } diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index f8541ff33..b2035b0a6 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -15,11 +15,65 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; + use arrow_schema::ArrowError; +use sedona_schema::raster::BandDataType; + +/// View into a band's N-D data buffer with layout metadata. +/// +/// `shape`, `strides`, and `offset` describe the *visible* region in +/// byte-stride terms — they are computed by composing the band's +/// `source_shape` (the natural extent of `buffer`) with its `view` +/// (the per-axis `(source_axis, start, step, steps)` slice spec). Stride +/// can be zero (broadcast) or negative (reverse iteration), and may not be +/// C-order. Consumers that need a flat row-major buffer should use +/// `BandRef::contiguous_data()` instead. +/// +/// Only `buffer` is tied to the producer's lifetime `'a` (it can be tens of +/// MBs of pixel data and must not be copied). `shape` and `strides` are +/// owned `Vec`s — they're tiny (ndim ≤ a handful) so an allocation here is +/// negligible, and owning them lets an `NdBuffer` outlive the producer's +/// internal layout cache (e.g. cross-thread, return-by-value). +#[derive(Debug)] +pub struct NdBuffer<'a> { + pub buffer: &'a [u8], + pub shape: Vec, + pub strides: Vec, + pub offset: u64, + pub data_type: BandDataType, +} -use sedona_schema::raster::{BandDataType, StorageType}; +/// One per-dimension entry of a band's logical view. Describes how a +/// visible axis maps onto an axis of the underlying source buffer. +/// +/// - `source_axis`: index into the band's `source_shape` that this visible +/// axis reads from. Across a band's full view, `source_axis` values must +/// form a permutation of `0..ndim` — axis-dropping and axis-introducing +/// views are not supported today. +/// - `start`: starting index along the source axis (in elements, not bytes). +/// - `step`: stride between consecutive visible elements along the source +/// axis. `step == 0` means broadcast (the same source element is +/// exposed `steps` times); negative `step` means reverse iteration. +/// - `steps`: number of visible elements along this axis. `steps == 0` is +/// allowed (empty axis). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ViewEntry { + pub source_axis: i64, + pub start: i64, + pub step: i64, + pub steps: i64, +} -/// Metadata for a raster +/// Concrete raster metadata returned by `RasterRef::metadata()`. +/// +/// Restored from the pre-N-D schema to keep callers that pattern-match on +/// `metadata.width`, `metadata.upperleft_x`, etc. compiling. Computed +/// eagerly from `RasterRef::transform()` and `RasterRef::spatial_shape()`. +/// +/// Panics on construction (`metadata()`) if the raster lacks width or +/// height — corrupt schemas error through the `width()`/`height()` trait +/// methods directly; the metadata accessor is the convenience surface. #[derive(Debug, Clone)] pub struct RasterMetadata { pub width: u64, @@ -32,29 +86,10 @@ pub struct RasterMetadata { pub skew_y: f64, } -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, - /// URL for OutDb reference (only used when storage_type == OutDbRef) - pub outdb_url: Option, - /// Band ID within the OutDb resource (only used when storage_type == OutDbRef) - pub outdb_band_id: Option, -} - -/// Trait for accessing complete raster data -pub trait RasterRef { - /// Raster metadata accessor - fn metadata(&self) -> &dyn MetadataRef; - /// CRS accessor - fn crs(&self) -> Option<&str>; - /// Bands accessor - fn bands(&self) -> &dyn BandsRef; -} - -/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) +/// Pre-N-D metadata-accessor trait. Restored so callers from before the +/// N-D refactor that write `fn foo(metadata: &dyn MetadataRef)` keep +/// compiling. `RasterMetadata` is the canonical implementer; new code +/// should reach for `RasterRef::width()? / height()?` instead. pub trait MetadataRef { /// Width of the raster in pixels fn width(&self) -> u64; @@ -73,61 +108,656 @@ pub trait MetadataRef { /// Y-direction skew/rotation fn skew_y(&self) -> f64; } -/// Trait for accessing all bands in a raster -pub trait BandsRef { - /// Number of bands in the raster - fn len(&self) -> usize; - /// Check if no bands are present - fn is_empty(&self) -> bool { + +impl MetadataRef for RasterMetadata { + fn width(&self) -> u64 { + self.width + } + fn height(&self) -> u64 { + self.height + } + fn upper_left_x(&self) -> f64 { + self.upperleft_x + } + fn upper_left_y(&self) -> f64 { + self.upperleft_y + } + fn scale_x(&self) -> f64 { + self.scale_x + } + fn scale_y(&self) -> f64 { + self.scale_y + } + fn skew_x(&self) -> f64 { + self.skew_x + } + fn skew_y(&self) -> f64 { + self.skew_y + } +} + +impl RasterMetadata { + pub fn width(&self) -> u64 { + self.width + } + pub fn height(&self) -> u64 { + self.height + } + pub fn upper_left_x(&self) -> f64 { + self.upperleft_x + } + pub fn upper_left_y(&self) -> f64 { + self.upperleft_y + } + pub fn scale_x(&self) -> f64 { + self.scale_x + } + pub fn scale_y(&self) -> f64 { + self.scale_y + } + pub fn skew_x(&self) -> f64 { + self.skew_x + } + pub fn skew_y(&self) -> f64 { + self.skew_y + } +} + +/// Concrete band metadata returned by `BandRef::metadata()`. +/// +/// Restored from the pre-N-D schema. The `outdb_url` and `outdb_band_id` +/// fields are eagerly parsed from the N-D `outdb_uri` (which carries a +/// `#band=N` fragment in the SedonaDB convention) so callers from the +/// pre-N-D era keep compiling against the same field names. +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: sedona_schema::raster::StorageType, + pub datatype: BandDataType, + pub outdb_url: Option, + pub outdb_band_id: Option, +} + +impl BandMetadata { + pub fn nodata_value(&self) -> Option<&[u8]> { + self.nodata_value.as_deref() + } + /// Returns the storage type. Wrapped in `Result` to match main's + /// `BandMetadataRef::storage_type()` signature — our shim + /// implementation never errors, but the signature is preserved so + /// existing `matches!(band.metadata().storage_type(), Ok(...))` + /// patterns from before the N-D refactor keep compiling. + pub fn storage_type(&self) -> Result { + Ok(self.storage_type) + } + /// Returns the band data type. Wrapped in `Result` to match main's + /// `BandMetadataRef::data_type()` signature — see `storage_type()`. + pub fn data_type(&self) -> Result { + Ok(self.datatype) + } + pub fn outdb_url(&self) -> Option<&str> { + self.outdb_url.as_deref() + } + pub fn outdb_band_id(&self) -> Option { + self.outdb_band_id + } + /// Nodata value interpreted as f64. Mirrors the pre-N-D + /// `BandMetadataRef::nodata_value_as_f64()`. Uses the lossless + /// conversion (errors on i64/u64 magnitudes > 2^53) so the shim + /// surface picks up the same correctness fix as + /// `BandRef::nodata_as_f64()`. + pub fn nodata_value_as_f64(&self) -> Result, ArrowError> { + let bytes = match self.nodata_value.as_deref() { + Some(b) => b, + None => return Ok(None), + }; + nodata_bytes_to_f64_lossless(bytes, &self.datatype).map(Some) + } +} + +/// Parse the SedonaDB `#band=N` fragment out of an out-DB URI. +/// Returns `(base_url, band_id)`; band_id defaults to 1 if absent. +/// Duplicated (intentionally — and minimally) from +/// `sedona-raster-gdal::source_uri` because the shim lives in +/// `sedona-raster` and can't reach across the crate boundary. +fn split_outdb_band_fragment(uri: &str) -> (String, u32) { + if let Some(hash_pos) = uri.rfind('#') { + let (base, fragment) = uri.split_at(hash_pos); + let fragment = &fragment[1..]; // skip the '#' + if let Some(rest) = fragment.strip_prefix("band=") { + if let Ok(n) = rest.parse::() { + return (base.to_string(), n); + } + } + } + (uri.to_string(), 1) +} + +/// Iteration view over a raster's bands. Returned by `RasterRef::bands()`. +/// +/// Wraps a borrowed `&dyn RasterRef` and offers the `len()` / `band(1-based)` +/// / `iter()` shape that callers used before the N-D refactor. New code can +/// equivalently use `RasterRef::num_bands()` and `RasterRef::band(0-based)` +/// directly; both call patterns coexist. +pub struct Bands<'a> { + raster: &'a dyn RasterRef, +} + +impl<'a> Bands<'a> { + /// Wrap a `&dyn RasterRef` for the legacy 1-based band-access surface. + pub fn new(raster: &'a dyn RasterRef) -> Self { + Self { raster } + } +} + +impl<'a> Bands<'a> { + /// Number of bands in the raster. + pub fn len(&self) -> usize { + self.raster.num_bands() + } + + /// True iff the raster has zero bands. + pub fn is_empty(&self) -> bool { self.len() == 0 } - /// Get a specific band by number (returns Error if out of bounds) - /// By convention, band numbers are 1-based - fn band(&self, number: usize) -> Result, ArrowError>; - /// Iterator over all bands - fn iter(&self) -> Box + '_>; + + /// Look up a band by **1-based** number. Returns an error rather than + /// `None` so callers can use `?`. For 0-based access, use + /// `RasterRef::band` directly. + pub fn band(&self, number: usize) -> Result, ArrowError> { + if number == 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid band number {number}: band numbers must be 1-based" + ))); + } + self.raster.band(number - 1).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Band number {} is out of range: this raster has {} bands", + number, + self.raster.num_bands() + )) + }) + } + + /// Iterate over every band in 0-based order. + pub fn iter(&self) -> impl Iterator> + 'a { + let raster = self.raster; + (0..raster.num_bands()).filter_map(move |i| raster.band(i)) + } } -/// Trait for accessing individual band data +/// Trait for accessing an N-dimensional raster (top level). +/// +/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with +/// a single flat interface. Bands are 0-indexed. +pub trait RasterRef { + /// Number of bands/variables + fn num_bands(&self) -> usize; + + /// Access a band by 0-based index + fn band(&self, index: usize) -> Option>; + + /// 1-based band-access view used by callers from before the N-D + /// refactor. Implementers typically write `Bands::new(self)`. + fn bands(&self) -> Bands<'_>; + + /// Band name (e.g., Zarr variable name). None for unnamed bands. + fn band_name(&self, index: usize) -> Option<&str>; + + /// Fast path for band data type — reads the scalar `data_type` column + /// without materialising a full `BandRef`. UDFs that only need this + /// metadata field should prefer this over `band(i)?.data_type()`. + /// Returns None if `index` is out of range or the discriminant is invalid. + /// + /// The default implementation delegates to `band(i)`. Backends with a + /// flat columnar layout should override for the no-allocation fast path. + fn band_data_type(&self, index: usize) -> Option { + self.band(index).map(|b| b.data_type()) + } + + /// Fast path for band outdb URI — reads the `outdb_uri` column without + /// materialising a `BandRef`. Returns None if the band has no URI or + /// if `index` is out of range. + /// + /// The default implementation must allocate a `Box`; the + /// raster-array backend overrides it to read the column directly. + /// Default returns None because the borrow can't outlive the boxed band. + fn band_outdb_uri(&self, index: usize) -> Option<&str> { + let _ = index; + None + } + + /// Fast path for band outdb format — reads the `outdb_format` column + /// without materialising a `BandRef`. Default returns None for the + /// same lifetime reason as `band_outdb_uri`. + fn band_outdb_format(&self, index: usize) -> Option<&str> { + let _ = index; + None + } + + /// Fast path for band nodata bytes — reads the `nodata` column without + /// materialising a `BandRef`. Default returns None for the same + /// lifetime reason as `band_outdb_uri`. + fn band_nodata(&self, index: usize) -> Option<&[u8]> { + let _ = index; + None + } + + /// CRS string (PROJJSON, WKT, or authority code). None if not set. + fn crs(&self) -> Option<&str>; + + /// 6-element affine transform in GDAL GeoTransform order: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + fn transform(&self) -> &[f64]; + + /// Eagerly-computed concrete metadata view (width, height, geotransform + /// scalars). Mirrors the pre-N-D `RasterRef::metadata()` accessor. + /// + /// Panics if `spatial_shape` lacks width/height or `transform` is the + /// wrong length — those are corrupt-schema cases that error cleanly + /// through the `width()`/`height()` trait methods, but the metadata + /// accessor predates that contract and is kept infallible for caller + /// ergonomics. + fn metadata(&self) -> RasterMetadata { + let width = self + .width() + .expect("raster has no width (spatial_shape missing); use width()? for error handling"); + let height = self + .height() + .expect("raster has no height; use height()? for error handling"); + let t = self.transform(); + if t.len() != 6 { + panic!("transform must be 6 elements, got {}", t.len()); + } + RasterMetadata { + width, + height, + upperleft_x: t[0], + scale_x: t[1], + skew_x: t[2], + upperleft_y: t[3], + skew_y: t[4], + scale_y: t[5], + } + } + + /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase + /// would extend to `["x","y","z"]`). Every band must contain each of these + /// names in its own `dim_names`, with matching sizes. + fn spatial_dims(&self) -> Vec<&str>; + + /// Spatial dimension sizes, in the same order as `spatial_dims`. Today + /// `[width, height]`. + fn spatial_shape(&self) -> &[i64]; + + /// Name of the X spatial dimension (e.g., "x", "lon", "easting"). + fn x_dim(&self) -> &str { + let dims = self.spatial_dims(); + dims.into_iter().next().unwrap_or("x") + } + + /// Name of the Y spatial dimension (e.g., "y", "lat", "northing"). + fn y_dim(&self) -> &str { + let dims = self.spatial_dims(); + dims.into_iter().nth(1).unwrap_or("y") + } + + /// Width in pixels — size of the X spatial dimension from the top-level + /// `spatial_shape`. Errors if `spatial_shape` is empty or the X size is + /// negative; both are invariant violations rather than legitimate "no + /// value" states. + fn width(&self) -> Result { + let shape = self.spatial_shape(); + let Some(&v) = shape.first() else { + return Err(ArrowError::InvalidArgumentError( + "raster has no width (spatial_shape is empty)".to_string(), + )); + }; + if v < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "raster width must be non-negative, got {v}" + ))); + } + Ok(v as u64) + } + + /// Height in pixels — size of the Y spatial dimension from the top-level + /// `spatial_shape`. Errors if `spatial_shape` has fewer than two entries + /// or the Y size is negative. + fn height(&self) -> Result { + let shape = self.spatial_shape(); + let Some(&v) = shape.get(1) else { + return Err(ArrowError::InvalidArgumentError(format!( + "raster has no height (spatial_shape has {} entries, need >= 2)", + shape.len() + ))); + }; + if v < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "raster height must be non-negative, got {v}" + ))); + } + Ok(v as u64) + } + + /// Look up a band by name. Returns None if no band has that name. + fn band_by_name(&self, name: &str) -> Option> { + (0..self.num_bands()) + .find(|&i| self.band_name(i) == Some(name)) + .and_then(|i| self.band(i)) + } +} + +/// Trait for accessing a single band/variable within an N-D raster. +/// +/// This is the consumer interface. Implementations handle storage details +/// Two data access paths: +/// - `contiguous_data()` — flat row-major bytes for consumers that don't need +/// stride awareness (most RS_* functions, GDAL boundary, serialization). +/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware +/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies. pub trait BandRef { - /// Band metadata accessor - fn metadata(&self) -> &dyn BandMetadataRef; - /// Raw band data as bytes (zero-copy access) - fn data(&self) -> &[u8]; -} - -/// Trait for accessing individual band metadata -pub trait BandMetadataRef { - /// No-data value as raw bytes (None if null) - fn nodata_value(&self) -> Option<&[u8]>; - /// Storage type (InDb, OutDbRef, etc) - fn storage_type(&self) -> Result; - /// Band data type (UInt8, Float32, etc.) - fn data_type(&self) -> Result; - /// OutDb URL (only used when storage_type == OutDbRef) - fn outdb_url(&self) -> Option<&str>; - /// OutDb band ID (only used when storage_type == OutDbRef) - fn outdb_band_id(&self) -> Option; - - /// No-data value interpreted as f64. + // -- Dimension metadata -- + + /// Number of dimensions in this band + fn ndim(&self) -> usize; + + /// Dimension names in order (e.g., `["time", "y", "x"]`) + fn dim_names(&self) -> Vec<&str>; + + /// Visible shape — size of each dimension in the band's view, in + /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`. + /// This is what almost all consumers want; use `raw_source_shape()` only + /// when you need to address into the raw `data` buffer (e.g. FFI). + fn shape(&self) -> &[u64]; + + /// **Internal/FFI-only.** Natural C-order extent of the band's + /// underlying `data` buffer, indexed by *source* axis (not visible + /// axis). Almost every consumer wants `shape()` instead — that is the + /// region the band exposes, and is what you compare against + /// `spatial_shape`, iterate over for pixels, and compose further views + /// against. The two only agree when the band's view is the identity; + /// any slice, broadcast, or permutation makes them diverge. + /// + /// Use this only when you need to index directly into the raw `data` + /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you + /// also handle `view()` and the byte-stride layout from `nd_buffer()`. + fn raw_source_shape(&self) -> &[u64]; + + /// Per-visible-dimension view entries describing how the band's + /// visible axes map onto its `source_shape`. `view().len() == ndim()`. + /// See `ViewEntry` for per-entry semantics. + fn view(&self) -> &[ViewEntry]; + + /// Size of a named dimension (None if doesn't exist) + fn dim_size(&self, name: &str) -> Option { + let idx = self.dim_index(name)?; + Some(self.shape()[idx]) + } + + /// Index of a named dimension (None if doesn't exist) + fn dim_index(&self, name: &str) -> Option { + self.dim_names().iter().position(|n| *n == name) + } + + /// True iff this band is shaped exactly like a legacy 2-D raster band: + /// `dim_names == ["y", "x"]` and the view is the identity over the + /// band's `raw_source_shape` (no slice, no broadcast, no permutation). + /// + /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while + /// they wait for an MDArray-aware port. + fn is_2d(&self) -> bool { + let dims = self.dim_names(); + if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" { + return false; + } + let view = self.view(); + let source_shape = self.raw_source_shape(); + if view.len() != 2 || source_shape.len() != 2 { + return false; + } + view.iter().enumerate().all(|(i, v)| { + v.source_axis as usize == i + && v.start == 0 + && v.step == 1 + && v.steps >= 0 + && v.steps as u64 == source_shape[i] + }) + } + + // -- Band metadata -- + + /// Data type for all elements in this band + fn data_type(&self) -> BandDataType; + + /// Nodata value as raw bytes (None if not set) + fn nodata(&self) -> Option<&[u8]>; + + /// OutDb URI — location of the external resource (e.g. + /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for + /// in-memory bands. Scheme resolution is delegated to an + /// `ObjectStoreRegistry`; it does *not* imply a format. + fn outdb_uri(&self) -> Option<&str> { + None + } + + /// OutDb format — how to interpret the bytes at `outdb_uri` + /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's + /// `contiguous_data()` / `nd_buffer()` is authoritative. + fn outdb_format(&self) -> Option<&str> { + None + } + + /// True if this band's bytes live in the `data` buffer (in-database). + /// False if the bytes must be fetched from `outdb_uri` (out-of-database). + /// + /// The discriminator is whether the `data` buffer is non-empty — + /// `outdb_uri` and `outdb_format` are orthogonal location/format hints + /// that may be set on either kind of band. + fn is_indb(&self) -> bool { + // Default: materialize via nd_buffer and check buffer emptiness. + // Concrete impls should override with a direct buffer check. + self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty()) + } + + /// Eagerly-computed concrete band metadata. Mirrors the pre-N-D + /// `BandRef::metadata()` accessor. + /// + /// `outdb_url` and `outdb_band_id` are parsed from `outdb_uri()`'s + /// SedonaDB `#band=N` fragment convention so callers that pattern-match + /// on those fields keep compiling. + fn metadata(&self) -> BandMetadata { + let is_indb = self.is_indb(); + // Match the pre-N-D contract: outdb_url / outdb_band_id are only + // populated when storage_type is OutDbRef. The current schema lets + // the URI hint coexist with InDb data; this surface hides that. + let (outdb_url, outdb_band_id) = if !is_indb { + match self.outdb_uri() { + Some(uri) => { + let (base, band) = split_outdb_band_fragment(uri); + (Some(base), Some(band)) + } + None => (None, None), + } + } else { + (None, None) + }; + BandMetadata { + nodata_value: self.nodata().map(|b| b.to_vec()), + storage_type: if is_indb { + sedona_schema::raster::StorageType::InDb + } else { + sedona_schema::raster::StorageType::OutDbRef + }, + datatype: self.data_type(), + outdb_url, + outdb_band_id, + } + } + + // -- Data access -- + + /// Raw backing buffer + visible-region layout. Triggers load for lazy + /// impls. The returned `NdBuffer` describes the band's view in + /// byte-stride terms — `shape` is the visible shape, `strides` and + /// `offset` are computed by composing the view with the source's + /// natural C-order byte strides. Strides may be zero (broadcast) or + /// negative (reverse iteration). + fn nd_buffer(&self) -> Result, ArrowError>; + + /// Contiguous row-major bytes covering the *visible* region. Zero-copy + /// (`Cow::Borrowed`) when the view is full identity over a C-order + /// source buffer; copies into a new buffer when the view slices, + /// broadcasts, or permutes. Most RS_* functions use this. + fn contiguous_data(&self) -> Result, ArrowError>; + + /// Pre-N-D compatibility shim: raw row-major bytes for InDb, + /// identity-view bands. Panics on anything else (OutDb, non-identity + /// view, or a `contiguous_data` error) — corresponds to main's + /// infallible `BandRef::data() -> &[u8]` which only ever ran against + /// identity-view InDb bands. + fn data(&self) -> &[u8] { + // Compatibility shim: returns the same bytes pre-N-D callers expect + // from `BandRef::data() -> &[u8]`. Delegates to `contiguous_data()` + // so identity-view bands surface the borrowed in-line bytes, + // matching the pre-N-D behavior exactly. View-materialized + // (`Cow::Owned`) bands can't be returned through `&[u8]` because + // the owned `Vec` would die at the end of this call — implementors + // that need view-materialized bytes via `data()` must override and + // anchor the materialized buffer on `Self`; other consumers should + // reach for `contiguous_data()` directly. + match self + .contiguous_data() + .expect("BandRef::data() requires an in-db band with bytes") + { + Cow::Borrowed(b) => b, + Cow::Owned(_) => panic!( + "BandRef::data() can't return view-materialized bytes; \ + use contiguous_data() for sliced/permuted bands" + ), + } + } + + /// Nodata value interpreted as f64. /// /// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on - /// success, or an error when the raw bytes have an unexpected length for - /// the band's data type. - fn nodata_value_as_f64(&self) -> Result, ArrowError> { - let bytes = match self.nodata_value() { + /// success, or an error when the raw bytes have an unexpected length **or** + /// when the nodata value cannot be represented exactly in `f64`. + /// + /// 64-bit integer bands (`Int64`, `UInt64`) error rather than silently + /// rounding when the magnitude exceeds 2^53 — values outside + /// `[-9_007_199_254_740_992, 9_007_199_254_740_992]` can't round-trip + /// through `f64` and a rounded sentinel can collide with a real pixel + /// value. Use `nodata()` directly to recover the exact bytes when full + /// integer precision matters (e.g. when nodata is the type's extreme + /// value like `0xFF…FF`). + fn nodata_as_f64(&self) -> Result, ArrowError> { + let bytes = match self.nodata() { Some(b) => b, None => return Ok(None), }; - let dt = self.data_type()?; - nodata_bytes_to_f64(bytes, &dt).map(Some) + nodata_bytes_to_f64_lossless(bytes, &self.data_type()).map(Some) + } +} + +/// Derive the visible (per-axis) shape from a view. +/// +/// `view[k].steps` is the visible extent along view axis `k` after slicing / +/// broadcasting. Callers should treat the returned shape as authoritative for +/// the visible region; `source_shape` is only meaningful in conjunction with +/// the per-entry `source_axis`. +/// +/// `validate_view` guarantees `steps >= 0`, so the `as u64` cast is lossless +/// when the input has already been validated. Callers that haven't validated +/// yet should still call `validate_view` first. +pub(crate) fn visible_shape_from_view(view: &[ViewEntry]) -> Vec { + view.iter().map(|v| v.steps as u64).collect() +} + +/// Validate a `[ViewEntry]` against a band's `source_shape`. +/// +/// Returns `Ok(())` if the view is well-formed under the rules: +/// - `view.len() == source_shape.len()`. +/// - `source_axis` values across `view` form a permutation of +/// `0..source_shape.len()` (no axis duplicated, none missing). +/// - `steps >= 0`. +/// - When `steps > 0`: `start ∈ [0, source_shape[source_axis])`, and when +/// `step != 0` the last addressed element +/// `start + (steps - 1) * step` is also in that range. +/// +/// Runs implicitly inside `start_band_with_view` (writer) and +/// `RasterRef::band` (reader); external callers don't need to invoke it. +pub(crate) fn validate_view(view: &[ViewEntry], source_shape: &[u64]) -> Result<(), ArrowError> { + let ndim = source_shape.len(); + if view.len() != ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "view length ({}) must equal source_shape length ({ndim})", + view.len() + ))); + } + let mut seen = vec![false; ndim]; + for (k, v) in view.iter().enumerate() { + if v.source_axis < 0 || (v.source_axis as usize) >= ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].source_axis = {} is out of range [0, {ndim})", + v.source_axis + ))); + } + let sa = v.source_axis as usize; + if seen[sa] { + return Err(ArrowError::InvalidArgumentError(format!( + "view source_axis values must be a permutation of 0..{ndim}; \ + axis {sa} appears more than once" + ))); + } + seen[sa] = true; + + if v.steps < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].steps = {} must be >= 0", + v.steps + ))); + } + if v.steps > 0 { + let s = source_shape[sa] as i64; + if v.start < 0 || v.start >= s { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].start = {} is out of range [0, {s}) for source axis {sa}", + v.start + ))); + } + if v.step != 0 { + // Use checked arithmetic so a malicious or corrupted view + // can't silently wrap (steps-1)*step or start+… into an + // in-range value and bypass the bound check. Any overflow + // is reported as a normal validation error. + let last = (v.steps - 1) + .checked_mul(v.step) + .and_then(|d| v.start.checked_add(d)) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "view[{k}] last-element index overflows i64 for \ + start={}, step={}, steps={} on source axis {sa}", + v.start, v.step, v.steps + )) + })?; + if last < 0 || last >= s { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}] addresses element {last} which is out of range \ + [0, {s}) for source axis {sa}" + ))); + } + } + } } + Ok(()) } /// Convert raw nodata bytes to f64 given a [`BandDataType`]. /// /// The bytes are expected to be in little-endian order and exactly match the -/// byte size of the data type. +/// byte size of the data type. Internal helper for the lossless wrapper; +/// non-i64/u64 callers reach for `nodata_bytes_to_f64_lossless` instead. fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { macro_rules! read_le { ($t:ty, $n:expr) => {{ @@ -173,12 +803,49 @@ fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result: Iterator> { - fn len(&self) -> usize; - /// Check if there are no more bands - fn is_empty(&self) -> bool { - self.len() == 0 +/// Convert raw nodata bytes to f64, erroring on lossy conversion. +/// +/// Like [`nodata_bytes_to_f64`] but rejects 64-bit integer values whose +/// magnitude exceeds 2^53, since they can't round-trip through `f64`. +/// Callers that interpret nodata as a sentinel (e.g. UDFs that compare +/// pixel == nodata) should prefer this over the lossy variant — a rounded +/// `0xFFFF_FFFF_FFFF_FFFE` sentinel can silently collide with a real +/// pixel value. +pub fn nodata_bytes_to_f64_lossless(bytes: &[u8], dt: &BandDataType) -> Result { + match dt { + BandDataType::UInt64 => { + let arr: [u8; 8] = bytes.try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Invalid nodata byte length for UInt64: expected 8, got {}", + bytes.len() + )) + })?; + let v = u64::from_le_bytes(arr); + if v > (1u64 << 53) { + return Err(ArrowError::InvalidArgumentError(format!( + "UInt64 nodata value {v} cannot be represented exactly as f64 \ + (magnitude > 2^53); use the raw nodata bytes instead" + ))); + } + Ok(v as f64) + } + BandDataType::Int64 => { + let arr: [u8; 8] = bytes.try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Invalid nodata byte length for Int64: expected 8, got {}", + bytes.len() + )) + })?; + let v = i64::from_le_bytes(arr); + if v.unsigned_abs() > (1u64 << 53) { + return Err(ArrowError::InvalidArgumentError(format!( + "Int64 nodata value {v} cannot be represented exactly as f64 \ + (magnitude > 2^53); use the raw nodata bytes instead" + ))); + } + Ok(v as f64) + } + _ => nodata_bytes_to_f64(bytes, dt), } } @@ -217,4 +884,329 @@ mod tests { let result = nodata_bytes_to_f64(&[1, 2, 3], &BandDataType::Float64); assert!(result.is_err()); } + + #[test] + fn test_nodata_bytes_to_f64_lossless_int64_within_mantissa() { + // Boundary: 2^53 is the largest magnitude that round-trips exactly. + let safe = 1i64 << 53; + let val = nodata_bytes_to_f64_lossless(&safe.to_le_bytes(), &BandDataType::Int64).unwrap(); + assert_eq!(val as i64, safe); + + let neg_safe = -(1i64 << 53); + let val = + nodata_bytes_to_f64_lossless(&neg_safe.to_le_bytes(), &BandDataType::Int64).unwrap(); + assert_eq!(val as i64, neg_safe); + } + + #[test] + fn test_nodata_bytes_to_f64_lossless_int64_errors_above_mantissa() { + let big = (1i64 << 53) + 1; + let err = + nodata_bytes_to_f64_lossless(&big.to_le_bytes(), &BandDataType::Int64).unwrap_err(); + assert!( + err.to_string().contains("Int64 nodata value"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_nodata_bytes_to_f64_lossless_uint64_sentinel_errors() { + // The common sentinel 0xFFFF_FFFF_FFFF_FFFF is exactly the case the + // review flagged: lossy variant silently rounds to a value that can + // collide with a real pixel; lossless variant errors. + let sentinel = u64::MAX; + let err = nodata_bytes_to_f64_lossless(&sentinel.to_le_bytes(), &BandDataType::UInt64) + .unwrap_err(); + assert!( + err.to_string().contains("UInt64 nodata value"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_nodata_bytes_to_f64_lossless_delegates_for_smaller_types() { + // Non-64-bit types pass through to nodata_bytes_to_f64 unchanged. + let val = nodata_bytes_to_f64_lossless(&[42], &BandDataType::UInt8).unwrap(); + assert_eq!(val, 42.0); + let val = nodata_bytes_to_f64_lossless(&[0xFE], &BandDataType::Int8).unwrap(); + assert_eq!(val, -2.0); + } + + #[test] + fn test_split_outdb_band_fragment_with_band() { + let (base, n) = split_outdb_band_fragment("s3://bucket/file.tif#band=42"); + assert_eq!(base, "s3://bucket/file.tif"); + assert_eq!(n, 42); + } + + #[test] + fn test_split_outdb_band_fragment_without_band_defaults_to_1() { + let (base, n) = split_outdb_band_fragment("s3://bucket/file.tif"); + assert_eq!(base, "s3://bucket/file.tif"); + assert_eq!(n, 1); + } + + #[test] + fn test_split_outdb_band_fragment_malformed_fragment_defaults_to_1() { + // `#band=abc` is malformed; treat the whole string as the base URL. + let (base, n) = split_outdb_band_fragment("s3://bucket/file.tif#band=abc"); + assert_eq!(base, "s3://bucket/file.tif#band=abc"); + assert_eq!(n, 1); + } + + fn ve(source_axis: i64, start: i64, step: i64, steps: i64) -> ViewEntry { + ViewEntry { + source_axis, + start, + step, + steps, + } + } + + #[test] + fn validate_view_accepts_identity() { + let v = [ve(0, 0, 1, 4), ve(1, 0, 1, 5)]; + validate_view(&v, &[4, 5]).unwrap(); + } + + #[test] + fn validate_view_rejects_length_mismatch() { + let v = [ve(0, 0, 1, 4)]; + let err = validate_view(&v, &[4, 5]).unwrap_err(); + assert!(err.to_string().contains("must equal"), "got {err}"); + } + + #[test] + fn validate_view_rejects_negative_source_axis() { + let v = [ve(-1, 0, 1, 4)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("source_axis"), "got {err}"); + } + + #[test] + fn validate_view_rejects_oob_source_axis() { + let v = [ve(2, 0, 1, 4)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("source_axis"), "got {err}"); + } + + #[test] + fn validate_view_rejects_duplicate_source_axis() { + let v = [ve(0, 0, 1, 2), ve(0, 0, 1, 2)]; + let err = validate_view(&v, &[2, 3]).unwrap_err(); + assert!(err.to_string().contains("permutation"), "got {err}"); + } + + #[test] + fn validate_view_rejects_negative_steps() { + let v = [ve(0, 0, 1, -1)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("steps"), "got {err}"); + } + + #[test] + fn validate_view_rejects_negative_start() { + let v = [ve(0, -1, 1, 1)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_view_rejects_start_at_source_size() { + // start == S is one past the end. Forbidden even with steps=1. + let v = [ve(0, 4, 1, 1)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_view_rejects_negative_step_underrun() { + // start=0, step=-1, steps=2 addresses element 0 then -1 → underrun. + // The most likely real bug in step != 0 arithmetic. + let v = [ve(0, 0, -1, 2)]; + let err = validate_view(&v, &[4]).unwrap_err(); + assert!(err.to_string().contains("out of range"), "got {err}"); + } + + #[test] + fn validate_view_accepts_negative_step_full_reverse() { + // start=3, step=-1, steps=4 addresses 3,2,1,0 — all in range. + let v = [ve(0, 3, -1, 4)]; + validate_view(&v, &[4]).unwrap(); + } + + #[test] + fn validate_view_accepts_steps_zero_with_unconstrained_start() { + // Empty axis short-circuits the bound check on start. + let v = [ve(0, 999, 1, 0)]; + validate_view(&v, &[4]).unwrap(); + } + + #[test] + fn validate_view_steps_one_only_checks_start() { + // steps=1, step=999 — only `start` matters; the would-be next index + // (start + 1*999) is never addressed and must not be checked. + let v = [ve(0, 3, 999, 1)]; + validate_view(&v, &[4]).unwrap(); + } + + #[test] + fn validate_view_step_zero_broadcast_within_bounds() { + // step=0 broadcasts. start ∈ [0, S) is the only check. + let v_ok = [ve(0, 3, 0, 100)]; + validate_view(&v_ok, &[4]).unwrap(); + let v_bad = [ve(0, 4, 0, 1)]; + let err = validate_view(&v_bad, &[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_view_permutation_with_slice_ok() { + // Mix permutation and slicing — both legal as long as source_axis + // values are a permutation and bounds hold per axis. + let v = [ve(1, 0, 1, 3), ve(0, 1, 1, 1)]; + validate_view(&v, &[2, 3]).unwrap(); + } + + #[test] + fn validate_view_rejects_i64_overflow_in_last_element() { + // start=10, step=i64::MAX, steps=3 wraps `(steps-1)*step` to a + // small negative i64; without checked arithmetic the naive sum + // becomes 8 — falsely "in range" for a source of size 100. With + // checked arithmetic, validate_view must reject it as overflow. + // This was a real bug: in release the wrapped value passed all + // bounds; in debug, the multiply would panic. + let v = [ve(0, 10, i64::MAX, 3)]; + let err = validate_view(&v, &[100]).unwrap_err(); + assert!( + err.to_string().contains("overflow"), + "expected overflow error, got: {err}" + ); + } + + #[test] + fn validate_view_rejects_i64_overflow_in_start_plus_offset() { + // (steps-1)*step = i64::MAX - 1 fits in i64. Adding a small, + // in-range start of 2 then overflows i64::MAX. The start bound + // check passes (2 < 100), so this exercises the checked_add arm + // specifically, not the start guard or the checked_mul arm. + let v = [ve(0, 2, 1, i64::MAX)]; + let err = validate_view(&v, &[100]).unwrap_err(); + assert!( + err.to_string().contains("overflow"), + "expected overflow error, got: {err}" + ); + } + + /// Minimal `BandRef` stub: only the inputs `is_2d` actually inspects + /// (`dim_names`, `view`, `raw_source_shape`) carry meaningful values; + /// every other method returns a placeholder we never read. + struct StubBand { + dim_names: Vec, + source_shape: Vec, + shape: Vec, + view: Vec, + } + + impl BandRef for StubBand { + fn ndim(&self) -> usize { + self.dim_names.len() + } + fn dim_names(&self) -> Vec<&str> { + self.dim_names.iter().map(String::as_str).collect() + } + fn shape(&self) -> &[u64] { + &self.shape + } + fn raw_source_shape(&self) -> &[u64] { + &self.source_shape + } + fn view(&self) -> &[ViewEntry] { + &self.view + } + fn data_type(&self) -> BandDataType { + BandDataType::UInt8 + } + fn nodata(&self) -> Option<&[u8]> { + None + } + fn nd_buffer(&self) -> Result, ArrowError> { + unimplemented!("not used in is_2d tests") + } + fn contiguous_data(&self) -> Result, ArrowError> { + unimplemented!("not used in is_2d tests") + } + } + + fn band(dims: &[&str], source_shape: &[u64], view: &[ViewEntry]) -> StubBand { + StubBand { + dim_names: dims.iter().map(|s| (*s).to_string()).collect(), + source_shape: source_shape.to_vec(), + shape: visible_shape_from_view(view), + view: view.to_vec(), + } + } + + #[test] + fn is_2d_identity_yx_is_true() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert!(b.is_2d()); + } + + #[test] + fn is_2d_identity_3d_is_false() { + let b = band( + &["time", "y", "x"], + &[3, 4, 5], + &[ve(0, 0, 1, 3), ve(1, 0, 1, 4), ve(2, 0, 1, 5)], + ); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_identity_1d_is_false() { + let b = band(&["x"], &[5], &[ve(0, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_slice_view_is_false() { + // Same dim_names but the y-axis is sliced — view is not the identity. + let b = band(&["y", "x"], &[4, 5], &[ve(0, 1, 1, 2), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_step_two_is_false() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 2, 2), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_broadcast_is_false() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 0, 4), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_permuted_xy_is_false() { + // dim_names are swapped — not the legacy 2D shape, even though the + // view per-axis is the identity. + let b = band(&["x", "y"], &[5, 4], &[ve(0, 0, 1, 5), ve(1, 0, 1, 4)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_transposed_source_axes_is_false() { + // dim_names are ["y","x"] but the view permutes the source axes, + // so the band exposes y-then-x out of an x-then-y source. + let b = band(&["y", "x"], &[5, 4], &[ve(1, 0, 1, 4), ve(0, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_other_dim_names_is_false() { + let b = band(&["lat", "lon"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } } diff --git a/rust/sedona-schema/src/raster.rs b/rust/sedona-schema/src/raster.rs index b5b8745c4..436baf8dd 100644 --- a/rust/sedona-schema/src/raster.rs +++ b/rust/sedona-schema/src/raster.rs @@ -16,34 +16,54 @@ // under the License. use arrow_schema::{DataType, Field, FieldRef, Fields}; -/// Schema for storing raster data in Apache Arrow format. -/// Utilizing nested structs and lists to represent raster metadata and bands. +/// Schema for storing N-dimensional raster data in Apache Arrow format. +/// +/// Each raster has a CRS, an affine transform, a list of spatial dimension +/// names (`spatial_dims`) and sizes (`spatial_shape`), and a list of bands. +/// Each band is an N-D chunk with named dimensions, a `source_shape` +/// describing the natural extent of its underlying buffer, and a `view` +/// describing the visible region of that buffer. +/// +/// `spatial_dims` + `spatial_shape` are the raster-level source of truth for +/// the spatial grid — today length 2 (`["x","y"]`, `[width, height]`), +/// Z-ready for a future 3D phase. All bands must contain every name in +/// `spatial_dims` in their own `dim_names`, with the band's *visible* size +/// for that dim matching `spatial_shape`. +/// +/// 2D rasters are represented as bands with `dim_names=["y","x"]` and +/// `source_shape=[height, width]`. #[derive(Debug, PartialEq, Clone)] pub struct RasterSchema; + impl RasterSchema { /// Returns the top-level fields for the raster schema structure. pub fn fields() -> Fields { Fields::from(vec![ - Field::new(column::METADATA, Self::metadata_type(), false), Field::new(column::CRS, Self::crs_type(), true), // Optional: may be inferred from data + Field::new(column::TRANSFORM, Self::transform_type(), false), + Field::new(column::SPATIAL_DIMS, Self::spatial_dims_type(), false), + Field::new(column::SPATIAL_SHAPE, Self::spatial_shape_type(), false), Field::new(column::BANDS, Self::bands_type(), true), ]) } - /// Raster metadata schema - pub fn metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - // Raster dimensions - Field::new(column::WIDTH, DataType::UInt64, false), - Field::new(column::HEIGHT, DataType::UInt64, false), - // Geospatial transformation parameters - Field::new(column::UPPERLEFT_X, DataType::Float64, false), - Field::new(column::UPPERLEFT_Y, DataType::Float64, false), - Field::new(column::SCALE_X, DataType::Float64, false), - Field::new(column::SCALE_Y, DataType::Float64, false), - Field::new(column::SKEW_X, DataType::Float64, false), - Field::new(column::SKEW_Y, DataType::Float64, false), - ])) + /// Affine transform schema — 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + pub fn transform_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Float64, false))) + } + + /// Spatial dimension names schema — list of `Utf8View` strings, one per + /// spatial axis. Today always `["x","y"]`; becomes `["x","y","z"]` if a + /// future phase adds Z support. + pub fn spatial_dims_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Utf8View, false))) + } + + /// Spatial shape schema — list of `Int64` sizes in the same order as + /// `spatial_dims`. Today `[width, height]`. + pub fn spatial_shape_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Int64, false))) } /// Bands list schema @@ -55,29 +75,59 @@ impl RasterSchema { ))) } - /// Individual band schema + /// Individual band schema — flattened N-D band with dimension metadata. + /// + /// Out-of-band ("outdb") bands carry two orthogonal identifiers: + /// - `outdb_uri` is the *location* (what scheme/registry to dispatch to, + /// e.g. `s3://bucket/file.tif`, `file:///…`, `mem://…`). + /// - `outdb_format` is the *format* (how to interpret the bytes, e.g. + /// `"geotiff"`, `"zarr"`). Null format means in-memory — the band's + /// `data` buffer is authoritative. pub fn band_type() -> DataType { DataType::Struct(Fields::from(vec![ - Field::new(column::METADATA, Self::band_metadata_type(), false), - Field::new(column::DATA, Self::band_data_type(), false), + Field::new(column::NAME, DataType::Utf8, true), + Field::new(column::DIM_NAMES, Self::dim_names_type(), false), + Field::new(column::SOURCE_SHAPE, Self::source_shape_type(), false), + Field::new(column::DATATYPE, DataType::UInt32, false), + Field::new(column::NODATA, DataType::Binary, true), + Field::new(column::VIEW, Self::view_type(), true), + Field::new(column::OUTDB_URI, DataType::Utf8, true), + Field::new(column::OUTDB_FORMAT, DataType::Utf8View, true), + Field::new(column::DATA, DataType::BinaryView, false), ])) } - /// Band metadata schema - pub fn band_metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::NODATAVALUE, DataType::Binary, true), // Optional: null means no nodata value specified - Field::new(column::STORAGE_TYPE, DataType::UInt32, false), - Field::new(column::DATATYPE, DataType::UInt32, false), - // OutDb reference fields - only used when storage_type == OutDbRef - Field::new(column::OUTDB_URL, DataType::Utf8, true), - Field::new(column::OUTDB_BAND_ID, DataType::UInt32, true), - ])) + /// Dimension names list type + pub fn dim_names_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Utf8, false))) } - /// Band data schema - stores the actual raster pixel data as a binary blob - pub fn band_data_type() -> DataType { - DataType::BinaryView + /// Source shape list type — the natural C-order extent of the band's + /// `data` buffer (or outdb-resolved source) per dimension. The *visible* + /// shape exposed to consumers is derived from `view`: + /// `[entry.steps for entry in view]`. + pub fn source_shape_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::UInt64, false))) + } + + /// View list type — one entry per dimension in the band's *visible* + /// order. Each entry is a `(source_axis, start, step, steps)` quadruple + /// describing how the visible axis maps onto the band's source shape. + /// The field is nullable: a null view denotes the identity view + /// `[(i, 0, 1, source_shape[i]) for i in 0..ndim]` and is the canonical + /// representation for any band whose data has not been sliced. See + /// `RasterSchema` doc for full semantics. + pub fn view_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + "item", + DataType::Struct(Fields::from(vec![ + Field::new("source_axis", DataType::Int64, false), + Field::new("start", DataType::Int64, false), + Field::new("step", DataType::Int64, false), + Field::new("steps", DataType::Int64, false), + ])), + false, + ))) } /// Coordinate Reference System (CRS) schema - stores CRS as JSON string (PROJ or WKT format) @@ -102,6 +152,10 @@ pub enum BandDataType { Float64 = 7, UInt64 = 8, Int64 = 9, + // Int8 was added after the original 1-7 set (PR #589) and after the + // 64-bit additions at 8-9. The discriminants are an Arrow-column + // contract for the `band.data_type` UInt32 column — reordering would + // silently misinterpret existing raster data, so new variants append. Int8 = 10, } @@ -116,6 +170,23 @@ impl BandDataType { } } + /// Try to convert from a u32 discriminant value. + pub fn try_from_u32(value: u32) -> Option { + match value { + 1 => Some(BandDataType::UInt8), + 2 => Some(BandDataType::UInt16), + 3 => Some(BandDataType::Int16), + 4 => Some(BandDataType::UInt32), + 5 => Some(BandDataType::Int32), + 6 => Some(BandDataType::Float32), + 7 => Some(BandDataType::Float64), + 8 => Some(BandDataType::UInt64), + 9 => Some(BandDataType::Int64), + 10 => Some(BandDataType::Int8), + _ => None, + } + } + /// Java/Sedona-compatible pixel type name (e.g. `"UNSIGNED_8BITS"`). pub fn pixel_type_name(&self) -> &'static str { match self { @@ -134,24 +205,18 @@ impl BandDataType { } } -/// Storage strategy for raster band data within Apache Arrow arrays. -/// -/// This enum defines how raster data is physically stored and accessed: +/// Where a band's pixel data lives. /// -/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Self-contained, no external dependencies, fast access for small-medium rasters -/// - Increases Arrow array size, memory usage grows and copy times increase with raster size -/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) -/// -/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Requires external storage management, potential for broken references -/// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints +/// Restored from the pre-N-D schema to keep downstream code that pattern- +/// matches on `StorageType::InDb` / `StorageType::OutDbRef` compiling. +/// The current N-D schema discriminates via `BandRef::is_indb()` (true ↔ +/// `InDb`, false ↔ `OutDbRef`); this enum is the shim over that. #[repr(u16)] #[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)] pub enum StorageType { + /// Band data is materialized into the raster row's `data` Arrow column. InDb = 0, + /// Band data lives outside the row and is referenced by `outdb_uri`. OutDbRef = 1, } @@ -160,62 +225,55 @@ pub enum StorageType { /// /// Using compile-time constants avoids string lookups and provides type safety /// when accessing nested struct fields in Arrow arrays. -pub mod metadata_indices { - pub const WIDTH: usize = 0; - pub const HEIGHT: usize = 1; - pub const UPPERLEFT_X: usize = 2; - pub const UPPERLEFT_Y: usize = 3; - pub const SCALE_X: usize = 4; - pub const SCALE_Y: usize = 5; - pub const SKEW_X: usize = 6; - pub const SKEW_Y: usize = 7; -} - -pub mod band_metadata_indices { - pub const NODATAVALUE: usize = 0; - pub const STORAGE_TYPE: usize = 1; - pub const DATATYPE: usize = 2; - pub const OUTDB_URL: usize = 3; - pub const OUTDB_BAND_ID: usize = 4; +pub mod raster_indices { + pub const CRS: usize = 0; + pub const TRANSFORM: usize = 1; + pub const SPATIAL_DIMS: usize = 2; + pub const SPATIAL_SHAPE: usize = 3; + pub const BANDS: usize = 4; } pub mod band_indices { - pub const METADATA: usize = 0; - pub const DATA: usize = 1; + pub const NAME: usize = 0; + pub const DIM_NAMES: usize = 1; + pub const SOURCE_SHAPE: usize = 2; + pub const DATA_TYPE: usize = 3; + pub const NODATA: usize = 4; + pub const VIEW: usize = 5; + pub const OUTDB_URI: usize = 6; + pub const OUTDB_FORMAT: usize = 7; + pub const DATA: usize = 8; } -pub mod raster_indices { - pub const METADATA: usize = 0; - pub const CRS: usize = 1; - pub const BANDS: usize = 2; +/// Field indices within the `view` struct (`(source_axis, start, step, steps)`). +pub mod band_view_indices { + pub const SOURCE_AXIS: usize = 0; + pub const START: usize = 1; + pub const STEP: usize = 2; + pub const STEPS: usize = 3; } /// Column name constants used throughout the raster schema definition. /// These string constants ensure consistency across schema creation and field access. pub mod column { - pub const METADATA: &str = "metadata"; + // Top-level raster fields + pub const CRS: &str = "crs"; + pub const TRANSFORM: &str = "transform"; + pub const SPATIAL_DIMS: &str = "spatial_dims"; + pub const SPATIAL_SHAPE: &str = "spatial_shape"; pub const BANDS: &str = "bands"; pub const BAND: &str = "band"; - pub const DATA: &str = "data"; - - // Raster metadata fields - pub const WIDTH: &str = "width"; - pub const HEIGHT: &str = "height"; - pub const UPPERLEFT_X: &str = "upperleft_x"; - pub const UPPERLEFT_Y: &str = "upperleft_y"; - pub const SCALE_X: &str = "scale_x"; - pub const SCALE_Y: &str = "scale_y"; - pub const SKEW_X: &str = "skew_x"; - pub const SKEW_Y: &str = "skew_y"; - // Raster CRS field - pub const CRS: &str = "crs"; - // Band metadata fields - pub const NODATAVALUE: &str = "nodata_value"; - pub const STORAGE_TYPE: &str = "storage_type"; + // Band fields + pub const NAME: &str = "name"; + pub const DIM_NAMES: &str = "dim_names"; + pub const SOURCE_SHAPE: &str = "source_shape"; pub const DATATYPE: &str = "data_type"; - pub const OUTDB_URL: &str = "outdb_url"; - pub const OUTDB_BAND_ID: &str = "outdb_band_id"; + pub const NODATA: &str = "nodata"; + pub const VIEW: &str = "view"; + pub const OUTDB_URI: &str = "outdb_uri"; + pub const OUTDB_FORMAT: &str = "outdb_format"; + pub const DATA: &str = "data"; } #[cfg(test)] @@ -225,10 +283,12 @@ mod tests { #[test] fn test_raster_schema_fields() { let fields = RasterSchema::fields(); - assert_eq!(fields.len(), 3); - assert_eq!(fields[0].name(), column::METADATA); - assert_eq!(fields[1].name(), column::CRS); - assert_eq!(fields[2].name(), column::BANDS); + assert_eq!(fields.len(), 5); + assert_eq!(fields[0].name(), column::CRS); + assert_eq!(fields[1].name(), column::TRANSFORM); + assert_eq!(fields[2].name(), column::SPATIAL_DIMS); + assert_eq!(fields[3].name(), column::SPATIAL_SHAPE); + assert_eq!(fields[4].name(), column::BANDS); } /// Comprehensive test to verify all hard-coded indices match the actual schema. @@ -238,128 +298,90 @@ mod tests { fn test_hardcoded_indices_match_schema() { // Test raster-level indices let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 3, "Expected exactly 3 raster fields"); - assert_eq!( - raster_fields[raster_indices::METADATA].name(), - column::METADATA, - "Raster metadata index mismatch" - ); + assert_eq!(raster_fields.len(), 5, "Expected exactly 5 raster fields"); assert_eq!( raster_fields[raster_indices::CRS].name(), column::CRS, "Raster CRS index mismatch" ); + assert_eq!( + raster_fields[raster_indices::TRANSFORM].name(), + column::TRANSFORM, + "Raster TRANSFORM index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::SPATIAL_DIMS].name(), + column::SPATIAL_DIMS, + "Raster SPATIAL_DIMS index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::SPATIAL_SHAPE].name(), + column::SPATIAL_SHAPE, + "Raster SPATIAL_SHAPE index mismatch" + ); assert_eq!( raster_fields[raster_indices::BANDS].name(), column::BANDS, "Raster BANDS index mismatch" ); - // Test metadata indices - let metadata_type = RasterSchema::metadata_type(); - if let DataType::Struct(metadata_fields) = metadata_type { - assert_eq!( - metadata_fields.len(), - 8, - "Expected exactly 8 metadata fields" - ); - assert_eq!( - metadata_fields[metadata_indices::WIDTH].name(), - column::WIDTH, - "Metadata width index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::HEIGHT].name(), - column::HEIGHT, - "Metadata height index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_X].name(), - column::UPPERLEFT_X, - "Metadata upperleft_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_Y].name(), - column::UPPERLEFT_Y, - "Metadata upperleft_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_X].name(), - column::SCALE_X, - "Metadata scale_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_Y].name(), - column::SCALE_Y, - "Metadata scale_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_X].name(), - column::SKEW_X, - "Metadata skew_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_Y].name(), - column::SKEW_Y, - "Metadata skew_y index mismatch" - ); - } else { - panic!("Expected Struct type for metadata"); - } - - // Test band metadata indices - let band_metadata_type = RasterSchema::band_metadata_type(); - if let DataType::Struct(band_metadata_fields) = band_metadata_type { + // Test band indices + let band_type = RasterSchema::band_type(); + if let DataType::Struct(band_fields) = band_type { + assert_eq!(band_fields.len(), 9, "Expected exactly 9 band fields"); + assert_eq!(band_fields[band_indices::NAME].name(), column::NAME); assert_eq!( - band_metadata_fields.len(), - 5, - "Expected exactly 5 band metadata fields" + band_fields[band_indices::DIM_NAMES].name(), + column::DIM_NAMES ); assert_eq!( - band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), - column::NODATAVALUE, - "Band metadata nodatavalue index mismatch" + band_fields[band_indices::SOURCE_SHAPE].name(), + column::SOURCE_SHAPE ); assert_eq!( - band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), - column::STORAGE_TYPE, - "Band metadata storage_type index mismatch" + band_fields[band_indices::DATA_TYPE].name(), + column::DATATYPE ); - assert_eq!( - band_metadata_fields[band_metadata_indices::DATATYPE].name(), - column::DATATYPE, - "Band metadata datatype index mismatch" + assert_eq!(band_fields[band_indices::NODATA].name(), column::NODATA); + assert_eq!(band_fields[band_indices::VIEW].name(), column::VIEW); + assert!( + band_fields[band_indices::VIEW].is_nullable(), + "view field must be nullable — null encodes the identity view" ); assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_URL].name(), - column::OUTDB_URL, - "Band metadata outdb_url index mismatch" + band_fields[band_indices::OUTDB_URI].name(), + column::OUTDB_URI ); assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_BAND_ID].name(), - column::OUTDB_BAND_ID, - "Band metadata outdb_band_id index mismatch" + band_fields[band_indices::OUTDB_FORMAT].name(), + column::OUTDB_FORMAT ); + assert_eq!(band_fields[band_indices::DATA].name(), column::DATA); } else { - panic!("Expected Struct type for band metadata"); + panic!("Expected Struct type for band"); } + } - // Test band indices - let band_type = RasterSchema::band_type(); - if let DataType::Struct(band_fields) = band_type { - assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); - assert_eq!( - band_fields[band_indices::METADATA].name(), - column::METADATA, - "Band metadata index mismatch" - ); - assert_eq!( - band_fields[band_indices::DATA].name(), - column::DATA, - "Band data index mismatch" - ); - } else { - panic!("Expected Struct type for band"); + #[test] + fn test_view_type_struct_shape() { + // The view struct must have exactly 4 Int64 fields in the order + // expected by band_view_indices. + let DataType::List(item_field) = RasterSchema::view_type() else { + panic!("Expected List type for view"); + }; + let DataType::Struct(view_fields) = item_field.data_type() else { + panic!("Expected Struct type inside view list"); + }; + assert_eq!(view_fields.len(), 4); + assert_eq!( + view_fields[band_view_indices::SOURCE_AXIS].name(), + "source_axis" + ); + assert_eq!(view_fields[band_view_indices::START].name(), "start"); + assert_eq!(view_fields[band_view_indices::STEP].name(), "step"); + assert_eq!(view_fields[band_view_indices::STEPS].name(), "steps"); + for f in view_fields.iter() { + assert_eq!(f.data_type(), &DataType::Int64); } } @@ -377,6 +399,48 @@ mod tests { assert_eq!(BandDataType::Float64.byte_size(), 8); } + #[test] + fn test_band_data_type_try_from_u32() { + assert_eq!(BandDataType::try_from_u32(1), Some(BandDataType::UInt8)); + assert_eq!(BandDataType::try_from_u32(2), Some(BandDataType::UInt16)); + assert_eq!(BandDataType::try_from_u32(3), Some(BandDataType::Int16)); + assert_eq!(BandDataType::try_from_u32(4), Some(BandDataType::UInt32)); + assert_eq!(BandDataType::try_from_u32(5), Some(BandDataType::Int32)); + assert_eq!(BandDataType::try_from_u32(6), Some(BandDataType::Float32)); + assert_eq!(BandDataType::try_from_u32(7), Some(BandDataType::Float64)); + assert_eq!(BandDataType::try_from_u32(8), Some(BandDataType::UInt64)); + assert_eq!(BandDataType::try_from_u32(9), Some(BandDataType::Int64)); + assert_eq!(BandDataType::try_from_u32(10), Some(BandDataType::Int8)); + assert_eq!(BandDataType::try_from_u32(0), None); + assert_eq!(BandDataType::try_from_u32(11), None); + assert_eq!(BandDataType::try_from_u32(u32::MAX), None); + } + + #[test] + fn test_band_data_type_roundtrip_u32() { + // Verify that discriminant → try_from_u32 round-trips for all variants + let all_types = [ + BandDataType::UInt8, + BandDataType::UInt16, + BandDataType::Int16, + BandDataType::UInt32, + BandDataType::Int32, + BandDataType::Float32, + BandDataType::Float64, + BandDataType::UInt64, + BandDataType::Int64, + BandDataType::Int8, + ]; + for dt in all_types { + let value = dt as u32; + assert_eq!( + BandDataType::try_from_u32(value), + Some(dt), + "Round-trip failed for {dt:?} (discriminant {value})" + ); + } + } + #[test] fn test_band_data_type_pixel_type_name() { assert_eq!(BandDataType::UInt8.pixel_type_name(), "UNSIGNED_8BITS");