NVIDIA · felipeblazing · Mar 3, 2026 · Mar 4, 2026
diff --git a/include/cucascade/data/data_repository.hpp b/include/cucascade/data/data_repository.hpp
@@ -389,7 +389,7 @@ class idata_repository {
 
  private:
   /**
-   * @brief Helper function to attempt state transition on a batch when poping.
+   * @brief Helper function to attempt state transition on a batch when popping.
    *
    * @param batch_ptr Pointer to the batch to transition
    * @param target_state The state to transition to

diff --git a/include/cucascade/memory/fixed_size_host_memory_resource.hpp b/include/cucascade/memory/fixed_size_host_memory_resource.hpp
@@ -29,6 +29,9 @@
 #include <rmm/detail/nvtx/ranges.hpp>
 #include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuda/memory_resource>
 
 #include <cstddef>
 #include <memory>
@@ -216,7 +219,7 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
    */
   explicit fixed_size_host_memory_resource(
     int device_id,
-    rmm::mr::device_memory_resource& upstream_mr,
+    rmm::device_async_resource_ref upstream_mr,
     std::size_t mem_limit,
     std::size_t capacity,
     std::size_t block_size    = default_block_size,
@@ -268,10 +271,9 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
   /**
    * @brief Get the upstream memory resource.
    *
-   * @return rmm::mr::host_memory_resource* Pointer to upstream resource (nullptr if using pinned
-   * host)
+   * @return rmm::device_async_resource_ref Reference to the upstream resource
    */
-  [[nodiscard]] rmm::mr::device_memory_resource* get_upstream_resource() const noexcept;
+  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() noexcept;
 
   /**
    * @brief Get total reserved bytes.
@@ -413,11 +415,11 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
   memory_space_id _space_id;
   std::size_t _memory_limit;
   std::size_t _memory_capacity;
-  std::size_t _block_size;                        ///< Size of each block
-  std::size_t _pool_size;                         ///< Number of blocks in pool
-  rmm::mr::device_memory_resource* _upstream_mr;  ///< Upstream memory resource (optional)
-  std::vector<void*> _allocated_blocks;           ///< All allocated blocks
-  std::vector<void*> _free_blocks;                ///< Currently free blocks
+  std::size_t _block_size;                      ///< Size of each block
+  std::size_t _pool_size;                       ///< Number of blocks in pool
+  rmm::device_async_resource_ref _upstream_mr;  ///< Upstream memory resource
+  std::vector<void*> _allocated_blocks;         ///< All allocated blocks
+  std::vector<void*> _free_blocks;              ///< Currently free blocks
   mutable std::mutex _mutex;
   utils::atomic_bounded_counter<size_t> _allocated_bytes{0};
   utils::atomic_peak_tracker<size_t> _peak_allocated_bytes{0};

diff --git a/src/data/representation_converter.cpp b/src/data/representation_converter.cpp
@@ -689,8 +689,9 @@ static std::unique_ptr<cudf::column> reconstruct_column(
       meta.num_rows, std::move(fields), null_count, std::move(null_mask));
   }
 
-  // Fixed-width types and DECIMAL (scale stored in meta.scale)
-  const cudf::data_type dtype{meta.type_id, meta.scale};
+  const cudf::data_type dtype = cudf::is_fixed_point(cudf::data_type{meta.type_id})
+                                  ? cudf::data_type{meta.type_id, meta.scale}
+                                  : cudf::data_type{meta.type_id};
   return std::make_unique<cudf::column>(
     dtype,
     meta.num_rows,

diff --git a/src/memory/fixed_size_host_memory_resource.cpp b/src/memory/fixed_size_host_memory_resource.cpp
@@ -23,6 +23,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/error.hpp>
 #include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <memory>
 #include <mutex>
@@ -31,9 +32,13 @@
 namespace cucascade {
 namespace memory {
 
+// cccl_async_resource_ref copy-construction goes through __basic_any virtual dispatch,
+// which GCC incorrectly flags as a potential null dereference (false positive).
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnull-dereference"
 fixed_size_host_memory_resource::fixed_size_host_memory_resource(
   int device_id,
-  rmm::mr::device_memory_resource& upstream_mr,
+  rmm::device_async_resource_ref upstream_mr,
   std::size_t memory_limit,
   std::size_t memory_capacity,
   std::size_t block_size,
@@ -44,21 +49,25 @@ fixed_size_host_memory_resource::fixed_size_host_memory_resource(
     _memory_capacity(memory_capacity),
     _block_size(rmm::align_up(block_size, alignof(std::max_align_t))),
     _pool_size(pool_size),
-    _upstream_mr(&upstream_mr)
+    _upstream_mr(upstream_mr)
 {
-  assert(_upstream_mr);
   for (std::size_t i = 0; i < initial_pools; ++i) {
     expand_pool();
   }
 }
+#pragma GCC diagnostic pop
 
 fixed_size_host_memory_resource::~fixed_size_host_memory_resource()
 {
   std::lock_guard<std::mutex> lock(_mutex);
+// See constructor for explanation of this suppression.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnull-dereference"
   for (auto& block : _allocated_blocks) {
     const std::size_t dealloc_size = _block_size * _pool_size;
-    _upstream_mr->deallocate(rmm::cuda_stream_view{}, block, dealloc_size);
+    _upstream_mr.deallocate(rmm::cuda_stream_view{}, block, dealloc_size);
   }
+#pragma GCC diagnostic pop
   _allocated_blocks.clear();
   _free_blocks.clear();
 }
@@ -93,8 +102,7 @@ std::size_t fixed_size_host_memory_resource::get_total_blocks() const noexcept
   return _allocated_blocks.size() * _pool_size;
 }
 
-rmm::mr::device_memory_resource* fixed_size_host_memory_resource::get_upstream_resource()
-  const noexcept
+rmm::device_async_resource_ref fixed_size_host_memory_resource::get_upstream_resource() noexcept
 {
   return _upstream_mr;
 }
@@ -267,7 +275,11 @@ void fixed_size_host_memory_resource::expand_pool()
 {
   const std::size_t total_size = _block_size * _pool_size;
 
-  void* large_allocation = _upstream_mr->allocate(rmm::cuda_stream_view{}, total_size);
+  // See constructor for explanation of this suppression.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnull-dereference"
+  void* large_allocation = _upstream_mr.allocate(rmm::cuda_stream_view{}, total_size);
+#pragma GCC diagnostic pop
 
   _allocated_blocks.push_back(large_allocation);
 

diff --git a/test/data/test_data_representation.cpp b/test/data/test_data_representation.cpp
@@ -1778,7 +1778,7 @@ TEST_CASE("Round-trip fast: INT32 column data preserved", "[fast][roundtrip]")
   rmm::cuda_stream stream;
 
   constexpr int N = 64;
-  auto col        = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32},
+  auto col        = cudf::make_numeric_column(cudf::data_type(cudf::type_id::INT32),
                                        N,
                                        cudf::mask_state::UNALLOCATED,
                                        stream.view(),